1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
//! Register pressure tracking.
//!
//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
//! sufficiently". This module defines the data structures needed to measure register pressure
//! accurately enough to guarantee that the coloring phase will not run out of registers.
//!
//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
//! any given program point. This simplistic method has two problems:
//!
//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
//!    register banks, so we need to at least count the number of live registers in each register
//!    bank separately.
//!
//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
//!    ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
//!    This makes it difficult to accurately measure register pressure.
//!
//! This module deals with the problems via *register banks* and *top-level register classes*.
//! Register classes in different register banks are completely independent, so we can count
//! registers in one bank without worrying about the other bank at all.
//!
//! All register classes have a unique top-level register class, and we will count registers for
//! each top-level register class individually. However, a register bank can have multiple
//! top-level register classes that interfere with each other, so all top-level counts need to
//! be considered when determining how many more registers can be allocated.
//!
//! Currently, the only register bank with multiple top-level registers is the `arm32`
//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
//!
//! # Base and transient counts
//!
//! We maintain two separate register counts per top-level register class: base counts and
//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
//! transient counts are adjusted with `take_transient` and `free_transient`.

// Remove once we're using the pressure tracker.
#![allow(dead_code)]

use crate::isa::registers::{RegClass, RegClassMask, RegInfo};
use crate::regalloc::RegisterSet;
use core::cmp::min;
use core::fmt;
use core::iter::ExactSizeIterator;
use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS;

/// Information per top-level register class.
///
/// Everything but the counts is static information computed from the constructor arguments.
#[derive(Default)]
struct TopRC {
    /// Number of registers currently used from this register class.
    base_count: u32,
    transient_count: u32,

    /// Max number of registers that can be allocated.
    limit: u32,

    /// Register units per register.
    width: u8,

    /// The first aliasing top-level RC.
    first_toprc: u8,

    /// The number of aliasing top-level RCs.
    num_toprcs: u8,
}

impl TopRC {
    fn total_count(&self) -> u32 {
        self.base_count + self.transient_count
    }
}

pub struct Pressure {
    /// Bit mask of top-level register classes that are aliased by other top-level register classes.
    /// Unaliased register classes can use a simpler interference algorithm.
    aliased: RegClassMask,

    /// Current register counts per top-level register class.
    toprc: [TopRC; MAX_TRACKED_TOP_RCS],
}

impl Pressure {
    /// Create a new register pressure tracker.
    pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self {
        let mut p = Self {
            aliased: 0,
            toprc: Default::default(),
        };

        // Get the layout of aliasing top-level register classes from the register banks.
        for bank in reginfo.banks {
            let first = bank.first_toprc;
            let num = bank.num_toprcs;

            if bank.pressure_tracking {
                for rc in &mut p.toprc[first..first + num] {
                    rc.first_toprc = first as u8;
                    rc.num_toprcs = num as u8;
                }

                // Flag the top-level register classes with aliases.
                if num > 1 {
                    p.aliased |= ((1 << num) - 1) << first;
                }
            } else {
                // This bank has no pressure tracking, so its top-level register classes may exceed
                // `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
                for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] {
                    // These aren't used if we don't set the `aliased` bit.
                    rc.first_toprc = !0;
                    rc.limit = !0;
                }
            }
        }

        // Compute per-class limits from `usable`.
        for (toprc, rc) in p
            .toprc
            .iter_mut()
            .take_while(|t| t.num_toprcs > 0)
            .zip(reginfo.classes)
        {
            toprc.limit = usable.iter(rc).len() as u32;
            toprc.width = rc.width;
        }

        p
    }

    /// Check for an available register in the register class `rc`.
    ///
    /// If it is possible to allocate one more register from `rc`'s top-level register class,
    /// returns 0.
    ///
    /// If not, returns a bit-mask of top-level register classes that are interfering. Register
    /// pressure should be eased in one of the returned top-level register classes before calling
    /// `can_take()` to check again.
    fn check_avail(&self, rc: RegClass) -> RegClassMask {
        let entry = match self.toprc.get(rc.toprc as usize) {
            None => return 0, // Not a pressure tracked bank.
            Some(e) => e,
        };
        let mask = 1 << rc.toprc;
        if (self.aliased & mask) == 0 {
            // This is a simple unaliased top-level register class.
            if entry.total_count() < entry.limit {
                0
            } else {
                mask
            }
        } else {
            // This is the more complicated case. The top-level register class has aliases.
            self.check_avail_aliased(entry)
        }
    }

    /// Check for an available register in a top-level register class that may have aliases.
    ///
    /// This is the out-of-line slow path for `check_avail()`.
    fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
        let first = usize::from(entry.first_toprc);
        let num = usize::from(entry.num_toprcs);
        let width = u32::from(entry.width);
        let ulimit = entry.limit * width;

        // Count up the number of available register units.
        let mut units = 0;
        for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
            let rcw = u32::from(rc.width);
            // If `rc.width` is smaller than `width`, each register in `rc` could potentially block
            // one of ours. This is assuming that none of the smaller registers are straddling the
            // bigger ones.
            //
            // If `rc.width` is larger than `width`, we are also assuming that the registers are
            // aligned and `rc.width` is a multiple of `width`.
            let u = if rcw < width {
                // We can't take more than the total number of register units in the class.
                // This matters for arm32 S-registers which can only ever lock out 16 D-registers.
                min(rc.total_count() * width, rc.limit * rcw)
            } else {
                rc.total_count() * rcw
            };

            // If this top-level RC on its own is responsible for exceeding our limit, return it
            // early to guarantee that registers here are spilled before spilling other registers
            // unnecessarily.
            if u >= ulimit {
                return 1 << rci;
            }

            units += u;
        }

        // We've counted up the worst-case number of register units claimed by all aliasing
        // classes. Compare to the unit limit in this class.
        if units < ulimit {
            0
        } else {
            // Registers need to be spilled from any one of the aliasing classes.
            ((1 << num) - 1) << first
        }
    }

    /// Take a register from `rc`.
    ///
    /// This does not check if there are enough registers available.
    pub fn take(&mut self, rc: RegClass) {
        if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
            t.base_count += 1;
        }
    }

    /// Free a register in `rc`.
    pub fn free(&mut self, rc: RegClass) {
        if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
            t.base_count -= 1;
        }
    }

    /// Reset all counts to 0, both base and transient.
    pub fn reset(&mut self) {
        for e in &mut self.toprc {
            e.base_count = 0;
            e.transient_count = 0;
        }
    }

    /// Try to increment a transient counter.
    ///
    /// This will fail if there are not enough registers available.
    pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
        let mask = self.check_avail(rc);
        if mask == 0 {
            if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
                t.transient_count += 1;
            }

            Ok(())
        } else {
            Err(mask)
        }
    }

    /// Reset all transient counts to 0.
    pub fn reset_transient(&mut self) {
        for e in &mut self.toprc {
            e.transient_count = 0;
        }
    }

    /// Preserve the transient counts by transferring them to the base counts.
    pub fn preserve_transient(&mut self) {
        for e in &mut self.toprc {
            e.base_count += e.transient_count;
            e.transient_count = 0;
        }
    }
}

impl fmt::Display for Pressure {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Pressure[")?;
        for rc in &self.toprc {
            if rc.limit > 0 && rc.limit < !0 {
                write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
            }
        }
        write!(f, " ]")
    }
}

#[cfg(test)]
#[cfg(feature = "arm32")]
mod tests {
    use super::Pressure;
    use crate::isa::registers::{RegBank, RegClassData};
    use crate::isa::{RegClass, RegInfo, RegUnit};
    use crate::regalloc::RegisterSet;
    use core::borrow::Borrow;

    // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info
    // about registers, so we directly access `INFO` from registers-arm32.rs.
    include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));

    // Get a register class by name.
    fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass {
        reginfo
            .classes
            .iter()
            .find(|rc| rc.name == name)
            .expect("Can't find named register class.")
    }

    #[test]
    fn basic_counting() {
        let reginfo = INFO.borrow();
        let gpr = rc_by_name(&reginfo, "GPR");
        let s = rc_by_name(&reginfo, "S");

        let regs = RegisterSet::new();

        let mut pressure = Pressure::new(&reginfo, &regs);
        let mut count = 0;
        while pressure.check_avail(gpr) == 0 {
            pressure.take(gpr);
            count += 1;
        }
        assert_eq!(count, 16);
        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
        assert_eq!(pressure.check_avail(s), 0);
        pressure.free(gpr);
        assert_eq!(pressure.check_avail(gpr), 0);
        pressure.take(gpr);
        assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
        assert_eq!(pressure.check_avail(s), 0);
        pressure.reset();
        assert_eq!(pressure.check_avail(gpr), 0);
        assert_eq!(pressure.check_avail(s), 0);
    }

    #[test]
    fn arm_float_bank() {
        let reginfo = INFO.borrow();
        let s = rc_by_name(&reginfo, "S");
        let d = rc_by_name(&reginfo, "D");
        let q = rc_by_name(&reginfo, "Q");
        let regs = RegisterSet::new();

        let mut pressure = Pressure::new(&reginfo, &regs);
        assert_eq!(pressure.check_avail(s), 0);
        assert_eq!(pressure.check_avail(d), 0);
        assert_eq!(pressure.check_avail(q), 0);

        // Allocating a single S-register should not affect availability.
        pressure.take(s);
        assert_eq!(pressure.check_avail(s), 0);
        assert_eq!(pressure.check_avail(d), 0);
        assert_eq!(pressure.check_avail(q), 0);

        pressure.take(d);
        assert_eq!(pressure.check_avail(s), 0);
        assert_eq!(pressure.check_avail(d), 0);
        assert_eq!(pressure.check_avail(q), 0);

        pressure.take(q);
        assert_eq!(pressure.check_avail(s), 0);
        assert_eq!(pressure.check_avail(d), 0);
        assert_eq!(pressure.check_avail(q), 0);

        // Take a total of 16 S-regs.
        for _ in 1..16 {
            pressure.take(s);
        }
        assert_eq!(pressure.check_avail(s), 0);
        assert_eq!(pressure.check_avail(d), 0);
        assert_eq!(pressure.check_avail(q), 0);

        // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
        for _ in 0..6 {
            assert_eq!(pressure.check_avail(d), 0);
            assert_eq!(pressure.check_avail(q), 0);
            pressure.take(q);
        }

        // We've taken 16 S, 1 D, and 7 Qs.
        assert!(pressure.check_avail(s) != 0);
        assert_eq!(pressure.check_avail(d), 0);
        assert!(pressure.check_avail(q) != 0);
    }
}