1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
//! A control flow graph represented as mappings of basic blocks to their predecessors
//! and successors.
//!
//! Successors are represented as basic blocks while predecessors are represented by basic
//! blocks. Basic blocks are denoted by tuples of block and branch/jump instructions. Each
//! predecessor tuple corresponds to the end of a basic block.
//!
//! ```c
//!     Block0:
//!         ...          ; beginning of basic block
//!
//!         ...
//!
//!         brz vx, Block1 ; end of basic block
//!
//!         ...          ; beginning of basic block
//!
//!         ...
//!
//!         jmp Block2     ; end of basic block
//! ```
//!
//! Here `Block1` and `Block2` would each have a single predecessor denoted as `(Block0, brz)`
//! and `(Block0, jmp Block2)` respectively.

use crate::bforest;
use crate::entity::SecondaryMap;
use crate::ir::instructions::BranchInfo;
use crate::ir::{Block, Function, Inst};
use crate::timing;
use core::mem;

/// A basic block denoted by its enclosing Block and last instruction.
#[derive(Debug, PartialEq, Eq)]
pub struct BlockPredecessor {
    /// Enclosing Block key.
    pub block: Block,
    /// Last instruction in the basic block.
    pub inst: Inst,
}

impl BlockPredecessor {
    /// Convenient method to construct new BlockPredecessor.
    pub fn new(block: Block, inst: Inst) -> Self {
        Self { block, inst }
    }
}

/// A container for the successors and predecessors of some Block.
#[derive(Clone, Default)]
struct CFGNode {
    /// Instructions that can branch or jump to this block.
    ///
    /// This maps branch instruction -> predecessor block which is redundant since the block containing
    /// the branch instruction is available from the `layout.inst_block()` method. We store the
    /// redundant information because:
    ///
    /// 1. Many `pred_iter()` consumers want the block anyway, so it is handily available.
    /// 2. The `invalidate_block_successors()` may be called *after* branches have been removed from
    ///    their block, but we still need to remove them form the old block predecessor map.
    ///
    /// The redundant block stored here is always consistent with the CFG successor lists, even after
    /// the IR has been edited.
    pub predecessors: bforest::Map<Inst, Block>,

    /// Set of blocks that are the targets of branches and jumps in this block.
    /// The set is ordered by block number, indicated by the `()` comparator type.
    pub successors: bforest::Set<Block>,
}

/// The Control Flow Graph maintains a mapping of blocks to their predecessors
/// and successors where predecessors are basic blocks and successors are
/// basic blocks.
pub struct ControlFlowGraph {
    data: SecondaryMap<Block, CFGNode>,
    pred_forest: bforest::MapForest<Inst, Block>,
    succ_forest: bforest::SetForest<Block>,
    valid: bool,
}

impl ControlFlowGraph {
    /// Allocate a new blank control flow graph.
    pub fn new() -> Self {
        Self {
            data: SecondaryMap::new(),
            valid: false,
            pred_forest: bforest::MapForest::new(),
            succ_forest: bforest::SetForest::new(),
        }
    }

    /// Clear all data structures in this control flow graph.
    pub fn clear(&mut self) {
        self.data.clear();
        self.pred_forest.clear();
        self.succ_forest.clear();
        self.valid = false;
    }

    /// Allocate and compute the control flow graph for `func`.
    pub fn with_function(func: &Function) -> Self {
        let mut cfg = Self::new();
        cfg.compute(func);
        cfg
    }

    /// Compute the control flow graph of `func`.
    ///
    /// This will clear and overwrite any information already stored in this data structure.
    pub fn compute(&mut self, func: &Function) {
        let _tt = timing::flowgraph();
        self.clear();
        self.data.resize(func.dfg.num_blocks());

        for block in &func.layout {
            self.compute_block(func, block);
        }

        self.valid = true;
    }

    fn compute_block(&mut self, func: &Function, block: Block) {
        for inst in func.layout.block_likely_branches(block) {
            match func.dfg.analyze_branch(inst) {
                BranchInfo::SingleDest(dest, _) => {
                    self.add_edge(block, inst, dest);
                }
                BranchInfo::Table(jt, dest) => {
                    if let Some(dest) = dest {
                        self.add_edge(block, inst, dest);
                    }
                    for dest in func.jump_tables[jt].iter() {
                        self.add_edge(block, inst, *dest);
                    }
                }
                BranchInfo::NotABranch => {}
            }
        }
    }

    fn invalidate_block_successors(&mut self, block: Block) {
        // Temporarily take ownership because we need mutable access to self.data inside the loop.
        // Unfortunately borrowck cannot see that our mut accesses to predecessors don't alias
        // our iteration over successors.
        let mut successors = mem::replace(&mut self.data[block].successors, Default::default());
        for succ in successors.iter(&self.succ_forest) {
            self.data[succ]
                .predecessors
                .retain(&mut self.pred_forest, |_, &mut e| e != block);
        }
        successors.clear(&mut self.succ_forest);
    }

    /// Recompute the control flow graph of `block`.
    ///
    /// This is for use after modifying instructions within a specific block. It recomputes all edges
    /// from `block` while leaving edges to `block` intact. Its functionality a subset of that of the
    /// more expensive `compute`, and should be used when we know we don't need to recompute the CFG
    /// from scratch, but rather that our changes have been restricted to specific blocks.
    pub fn recompute_block(&mut self, func: &Function, block: Block) {
        debug_assert!(self.is_valid());
        self.invalidate_block_successors(block);
        self.compute_block(func, block);
    }

    fn add_edge(&mut self, from: Block, from_inst: Inst, to: Block) {
        self.data[from]
            .successors
            .insert(to, &mut self.succ_forest, &());
        self.data[to]
            .predecessors
            .insert(from_inst, from, &mut self.pred_forest, &());
    }

    /// Get an iterator over the CFG predecessors to `block`.
    pub fn pred_iter(&self, block: Block) -> PredIter {
        PredIter(self.data[block].predecessors.iter(&self.pred_forest))
    }

    /// Get an iterator over the CFG successors to `block`.
    pub fn succ_iter(&self, block: Block) -> SuccIter {
        debug_assert!(self.is_valid());
        self.data[block].successors.iter(&self.succ_forest)
    }

    /// Check if the CFG is in a valid state.
    ///
    /// Note that this doesn't perform any kind of validity checks. It simply checks if the
    /// `compute()` method has been called since the last `clear()`. It does not check that the
    /// CFG is consistent with the function.
    pub fn is_valid(&self) -> bool {
        self.valid
    }
}

/// An iterator over block predecessors. The iterator type is `BlockPredecessor`.
///
/// Each predecessor is an instruction that branches to the block.
pub struct PredIter<'a>(bforest::MapIter<'a, Inst, Block>);

impl<'a> Iterator for PredIter<'a> {
    type Item = BlockPredecessor;

    fn next(&mut self) -> Option<BlockPredecessor> {
        self.0.next().map(|(i, e)| BlockPredecessor::new(e, i))
    }
}

/// An iterator over block successors. The iterator type is `Block`.
pub type SuccIter<'a> = bforest::SetIter<'a, Block>;

#[cfg(test)]
mod tests {
    use super::*;
    use crate::cursor::{Cursor, FuncCursor};
    use crate::ir::{types, Function, InstBuilder};
    use alloc::vec::Vec;

    #[test]
    fn empty() {
        let func = Function::new();
        ControlFlowGraph::with_function(&func);
    }

    #[test]
    fn no_predecessors() {
        let mut func = Function::new();
        let block0 = func.dfg.make_block();
        let block1 = func.dfg.make_block();
        let block2 = func.dfg.make_block();
        func.layout.append_block(block0);
        func.layout.append_block(block1);
        func.layout.append_block(block2);

        let cfg = ControlFlowGraph::with_function(&func);

        let mut fun_blocks = func.layout.blocks();
        for block in func.layout.blocks() {
            assert_eq!(block, fun_blocks.next().unwrap());
            assert_eq!(cfg.pred_iter(block).count(), 0);
            assert_eq!(cfg.succ_iter(block).count(), 0);
        }
    }

    #[test]
    fn branches_and_jumps() {
        let mut func = Function::new();
        let block0 = func.dfg.make_block();
        let cond = func.dfg.append_block_param(block0, types::I32);
        let block1 = func.dfg.make_block();
        let block2 = func.dfg.make_block();

        let br_block0_block2;
        let br_block1_block1;
        let jmp_block0_block1;
        let jmp_block1_block2;

        {
            let mut cur = FuncCursor::new(&mut func);

            cur.insert_block(block0);
            br_block0_block2 = cur.ins().brnz(cond, block2, &[]);
            jmp_block0_block1 = cur.ins().jump(block1, &[]);

            cur.insert_block(block1);
            br_block1_block1 = cur.ins().brnz(cond, block1, &[]);
            jmp_block1_block2 = cur.ins().jump(block2, &[]);

            cur.insert_block(block2);
        }

        let mut cfg = ControlFlowGraph::with_function(&func);

        {
            let block0_predecessors = cfg.pred_iter(block0).collect::<Vec<_>>();
            let block1_predecessors = cfg.pred_iter(block1).collect::<Vec<_>>();
            let block2_predecessors = cfg.pred_iter(block2).collect::<Vec<_>>();

            let block0_successors = cfg.succ_iter(block0).collect::<Vec<_>>();
            let block1_successors = cfg.succ_iter(block1).collect::<Vec<_>>();
            let block2_successors = cfg.succ_iter(block2).collect::<Vec<_>>();

            assert_eq!(block0_predecessors.len(), 0);
            assert_eq!(block1_predecessors.len(), 2);
            assert_eq!(block2_predecessors.len(), 2);

            assert_eq!(
                block1_predecessors.contains(&BlockPredecessor::new(block0, jmp_block0_block1)),
                true
            );
            assert_eq!(
                block1_predecessors.contains(&BlockPredecessor::new(block1, br_block1_block1)),
                true
            );
            assert_eq!(
                block2_predecessors.contains(&BlockPredecessor::new(block0, br_block0_block2)),
                true
            );
            assert_eq!(
                block2_predecessors.contains(&BlockPredecessor::new(block1, jmp_block1_block2)),
                true
            );

            assert_eq!(block0_successors, [block1, block2]);
            assert_eq!(block1_successors, [block1, block2]);
            assert_eq!(block2_successors, []);
        }

        // Change some instructions and recompute block0
        func.dfg.replace(br_block0_block2).brnz(cond, block1, &[]);
        func.dfg.replace(jmp_block0_block1).return_(&[]);
        cfg.recompute_block(&mut func, block0);
        let br_block0_block1 = br_block0_block2;

        {
            let block0_predecessors = cfg.pred_iter(block0).collect::<Vec<_>>();
            let block1_predecessors = cfg.pred_iter(block1).collect::<Vec<_>>();
            let block2_predecessors = cfg.pred_iter(block2).collect::<Vec<_>>();

            let block0_successors = cfg.succ_iter(block0);
            let block1_successors = cfg.succ_iter(block1);
            let block2_successors = cfg.succ_iter(block2);

            assert_eq!(block0_predecessors.len(), 0);
            assert_eq!(block1_predecessors.len(), 2);
            assert_eq!(block2_predecessors.len(), 1);

            assert_eq!(
                block1_predecessors.contains(&BlockPredecessor::new(block0, br_block0_block1)),
                true
            );
            assert_eq!(
                block1_predecessors.contains(&BlockPredecessor::new(block1, br_block1_block1)),
                true
            );
            assert_eq!(
                block2_predecessors.contains(&BlockPredecessor::new(block0, br_block0_block2)),
                false
            );
            assert_eq!(
                block2_predecessors.contains(&BlockPredecessor::new(block1, jmp_block1_block2)),
                true
            );

            assert_eq!(block0_successors.collect::<Vec<_>>(), [block1]);
            assert_eq!(block1_successors.collect::<Vec<_>>(), [block1, block2]);
            assert_eq!(block2_successors.collect::<Vec<_>>(), []);
        }
    }
}