|
|
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
// This file defines the lifting pass which tries to "lift" Alloc
// cells (new/local variables) into SSA registers, replacing loads
// with the dominating stored value, eliminating loads and stores, and
// inserting φ-nodes as needed.
// Cited papers and resources:
//
// Ron Cytron et al. 1991. Efficiently computing SSA form...
// http://doi.acm.org/10.1145/115372.115320
//
// Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
// Software Practice and Experience 2001, 4:1-10.
// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
//
// Daniel Berlin, llvmdev mailing list, 2012.
// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
// (Be sure to expand the whole thread.)
// TODO(adonovan): opt: there are many optimizations worth evaluating, and
// the conventional wisdom for SSA construction is that a simple
// algorithm well engineered often beats those of better asymptotic
// complexity on all but the most egregious inputs.
//
// Danny Berlin suggests that the Cooper et al. algorithm for
// computing the dominance frontier is superior to Cytron et al.
// Furthermore he recommends that rather than computing the DF for the
// whole function then renaming all alloc cells, it may be cheaper to
// compute the DF for each alloc cell separately and throw it away.
//
// Consider exploiting liveness information to avoid creating dead
// φ-nodes which we then immediately remove.
//
// Also see many other "TODO: opt" suggestions in the code.
import ( "fmt" "go/token" "go/types" "math/big" "os" )
// If true, show diagnostic information at each step of lifting.
// Very verbose.
const debugLifting = false
// domFrontier maps each block to the set of blocks in its dominance
// frontier. The outer slice is conceptually a map keyed by
// Block.Index. The inner slice is conceptually a set, possibly
// containing duplicates.
//
// TODO(adonovan): opt: measure impact of dups; consider a packed bit
// representation, e.g. big.Int, and bitwise parallel operations for
// the union step in the Children loop.
//
// domFrontier's methods mutate the slice's elements but not its
// length, so their receivers needn't be pointers.
//
type domFrontier [][]*BasicBlock
func (df domFrontier) add(u, v *BasicBlock) { p := &df[u.Index] *p = append(*p, v) }
// build builds the dominance frontier df for the dominator (sub)tree
// rooted at u, using the Cytron et al. algorithm.
//
// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
// by pruning the entire IDF computation, rather than merely pruning
// the DF -> IDF step.
func (df domFrontier) build(u *BasicBlock) { // Encounter each node u in postorder of dom tree.
for _, child := range u.dom.children { df.build(child) } for _, vb := range u.Succs { if v := vb.dom; v.idom != u { df.add(u, vb) } } for _, w := range u.dom.children { for _, vb := range df[w.Index] { // TODO(adonovan): opt: use word-parallel bitwise union.
if v := vb.dom; v.idom != u { df.add(u, vb) } } } }
func buildDomFrontier(fn *Function) domFrontier { df := make(domFrontier, len(fn.Blocks)) df.build(fn.Blocks[0]) if fn.Recover != nil { df.build(fn.Recover) } return df }
func removeInstr(refs []Instruction, instr Instruction) []Instruction { i := 0 for _, ref := range refs { if ref == instr { continue } refs[i] = ref i++ } for j := i; j != len(refs); j++ { refs[j] = nil // aid GC
} return refs[:i] }
// lift replaces local and new Allocs accessed only with
// load/store by SSA registers, inserting φ-nodes where necessary.
// The result is a program in classical pruned SSA form.
//
// Preconditions:
// - fn has no dead blocks (blockopt has run).
// - Def/use info (Operands and Referrers) is up-to-date.
// - The dominator tree is up-to-date.
//
func lift(fn *Function) { // TODO(adonovan): opt: lots of little optimizations may be
// worthwhile here, especially if they cause us to avoid
// buildDomFrontier. For example:
//
// - Alloc never loaded? Eliminate.
// - Alloc never stored? Replace all loads with a zero constant.
// - Alloc stored once? Replace loads with dominating store;
// don't forget that an Alloc is itself an effective store
// of zero.
// - Alloc used only within a single block?
// Use degenerate algorithm avoiding φ-nodes.
// - Consider synergy with scalar replacement of aggregates (SRA).
// e.g. *(&x.f) where x is an Alloc.
// Perhaps we'd get better results if we generated this as x.f
// i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
// Unclear.
//
// But we will start with the simplest correct code.
df := buildDomFrontier(fn)
if debugLifting { title := false for i, blocks := range df { if blocks != nil { if !title { fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn) title = true } fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks) } } }
newPhis := make(newPhiMap)
// During this pass we will replace some BasicBlock.Instrs
// (allocs, loads and stores) with nil, keeping a count in
// BasicBlock.gaps. At the end we will reset Instrs to the
// concatenation of all non-dead newPhis and non-nil Instrs
// for the block, reusing the original array if space permits.
// While we're here, we also eliminate 'rundefers'
// instructions in functions that contain no 'defer'
// instructions.
usesDefer := false
// A counter used to generate ~unique ids for Phi nodes, as an
// aid to debugging. We use large numbers to make them highly
// visible. All nodes are renumbered later.
fresh := 1000
// Determine which allocs we can lift and number them densely.
// The renaming phase uses this numbering for compact maps.
numAllocs := 0 for _, b := range fn.Blocks { b.gaps = 0 b.rundefers = 0 for _, instr := range b.Instrs { switch instr := instr.(type) { case *Alloc: index := -1 if liftAlloc(df, instr, newPhis, &fresh) { index = numAllocs numAllocs++ } instr.index = index case *Defer: usesDefer = true case *RunDefers: b.rundefers++ } } }
// renaming maps an alloc (keyed by index) to its replacement
// value. Initially the renaming contains nil, signifying the
// zero constant of the appropriate type; we construct the
// Const lazily at most once on each path through the domtree.
// TODO(adonovan): opt: cache per-function not per subtree.
renaming := make([]Value, numAllocs)
// Renaming.
rename(fn.Blocks[0], renaming, newPhis)
// Eliminate dead φ-nodes.
removeDeadPhis(fn.Blocks, newPhis)
// Prepend remaining live φ-nodes to each block.
for _, b := range fn.Blocks { nps := newPhis[b] j := len(nps)
rundefersToKill := b.rundefers if usesDefer { rundefersToKill = 0 }
if j+b.gaps+rundefersToKill == 0 { continue // fast path: no new phis or gaps
}
// Compact nps + non-nil Instrs into a new slice.
// TODO(adonovan): opt: compact in situ (rightwards)
// if Instrs has sufficient space or slack.
dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill) for i, np := range nps { dst[i] = np.phi } for _, instr := range b.Instrs { if instr == nil { continue } if !usesDefer { if _, ok := instr.(*RunDefers); ok { continue } } dst[j] = instr j++ } b.Instrs = dst }
// Remove any fn.Locals that were lifted.
j := 0 for _, l := range fn.Locals { if l.index < 0 { fn.Locals[j] = l j++ } } // Nil out fn.Locals[j:] to aid GC.
for i := j; i < len(fn.Locals); i++ { fn.Locals[i] = nil } fn.Locals = fn.Locals[:j] }
// removeDeadPhis removes φ-nodes not transitively needed by a
// non-Phi, non-DebugRef instruction.
func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) { // First pass: find the set of "live" φ-nodes: those reachable
// from some non-Phi instruction.
//
// We compute reachability in reverse, starting from each φ,
// rather than forwards, starting from each live non-Phi
// instruction, because this way visits much less of the
// Value graph.
livePhis := make(map[*Phi]bool) for _, npList := range newPhis { for _, np := range npList { phi := np.phi if !livePhis[phi] && phiHasDirectReferrer(phi) { markLivePhi(livePhis, phi) } } }
// Existing φ-nodes due to && and || operators
// are all considered live (see Go issue 19622).
for _, b := range blocks { for _, phi := range b.phis() { markLivePhi(livePhis, phi.(*Phi)) } }
// Second pass: eliminate unused phis from newPhis.
for block, npList := range newPhis { j := 0 for _, np := range npList { if livePhis[np.phi] { npList[j] = np j++ } else { // discard it, first removing it from referrers
for _, val := range np.phi.Edges { if refs := val.Referrers(); refs != nil { *refs = removeInstr(*refs, np.phi) } } np.phi.block = nil } } newPhis[block] = npList[:j] } }
// markLivePhi marks phi, and all φ-nodes transitively reachable via
// its Operands, live.
func markLivePhi(livePhis map[*Phi]bool, phi *Phi) { livePhis[phi] = true for _, rand := range phi.Operands(nil) { if q, ok := (*rand).(*Phi); ok { if !livePhis[q] { markLivePhi(livePhis, q) } } } }
// phiHasDirectReferrer reports whether phi is directly referred to by
// a non-Phi instruction. Such instructions are the
// roots of the liveness traversal.
func phiHasDirectReferrer(phi *Phi) bool { for _, instr := range *phi.Referrers() { if _, ok := instr.(*Phi); !ok { return true } } return false }
type blockSet struct{ big.Int } // (inherit methods from Int)
// add adds b to the set and returns true if the set changed.
func (s *blockSet) add(b *BasicBlock) bool { i := b.Index if s.Bit(i) != 0 { return false } s.SetBit(&s.Int, i, 1) return true }
// take removes an arbitrary element from a set s and
// returns its index, or returns -1 if empty.
func (s *blockSet) take() int { l := s.BitLen() for i := 0; i < l; i++ { if s.Bit(i) == 1 { s.SetBit(&s.Int, i, 0) return i } } return -1 }
// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
// it replaces.
type newPhi struct { phi *Phi alloc *Alloc }
// newPhiMap records for each basic block, the set of newPhis that
// must be prepended to the block.
type newPhiMap map[*BasicBlock][]newPhi
// liftAlloc determines whether alloc can be lifted into registers,
// and if so, it populates newPhis with all the φ-nodes it may require
// and returns true.
//
// fresh is a source of fresh ids for phi nodes.
//
func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool { // Don't lift aggregates into registers, because we don't have
// a way to express their zero-constants.
switch deref(alloc.Type()).Underlying().(type) { case *types.Array, *types.Struct: return false }
// Don't lift named return values in functions that defer
// calls that may recover from panic.
if fn := alloc.Parent(); fn.Recover != nil { for _, nr := range fn.namedResults { if nr == alloc { return false } } }
// Compute defblocks, the set of blocks containing a
// definition of the alloc cell.
var defblocks blockSet for _, instr := range *alloc.Referrers() { // Bail out if we discover the alloc is not liftable;
// the only operations permitted to use the alloc are
// loads/stores into the cell, and DebugRef.
switch instr := instr.(type) { case *Store: if instr.Val == alloc { return false // address used as value
} if instr.Addr != alloc { panic("Alloc.Referrers is inconsistent") } defblocks.add(instr.Block()) case *UnOp: if instr.Op != token.MUL { return false // not a load
} if instr.X != alloc { panic("Alloc.Referrers is inconsistent") } case *DebugRef: // ok
default: return false // some other instruction
} } // The Alloc itself counts as a (zero) definition of the cell.
defblocks.add(alloc.Block())
if debugLifting { fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name()) }
fn := alloc.Parent()
// Φ-insertion.
//
// What follows is the body of the main loop of the insert-φ
// function described by Cytron et al, but instead of using
// counter tricks, we just reset the 'hasAlready' and 'work'
// sets each iteration. These are bitmaps so it's pretty cheap.
//
// TODO(adonovan): opt: recycle slice storage for W,
// hasAlready, defBlocks across liftAlloc calls.
var hasAlready blockSet
// Initialize W and work to defblocks.
var work blockSet = defblocks // blocks seen
var W blockSet // blocks to do
W.Set(&defblocks.Int)
// Traverse iterated dominance frontier, inserting φ-nodes.
for i := W.take(); i != -1; i = W.take() { u := fn.Blocks[i] for _, v := range df[u.Index] { if hasAlready.add(v) { // Create φ-node.
// It will be prepended to v.Instrs later, if needed.
phi := &Phi{ Edges: make([]Value, len(v.Preds)), Comment: alloc.Comment, } // This is merely a debugging aid:
phi.setNum(*fresh) *fresh++
phi.pos = alloc.Pos() phi.setType(deref(alloc.Type())) phi.block = v if debugLifting { fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v) } newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
if work.add(v) { W.add(v) } } } }
return true }
// replaceAll replaces all intraprocedural uses of x with y,
// updating x.Referrers and y.Referrers.
// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
//
func replaceAll(x, y Value) { var rands []*Value pxrefs := x.Referrers() pyrefs := y.Referrers() for _, instr := range *pxrefs { rands = instr.Operands(rands[:0]) // recycle storage
for _, rand := range rands { if *rand != nil { if *rand == x { *rand = y } } } if pyrefs != nil { *pyrefs = append(*pyrefs, instr) // dups ok
} } *pxrefs = nil // x is now unreferenced
}
// renamed returns the value to which alloc is being renamed,
// constructing it lazily if it's the implicit zero initialization.
//
func renamed(renaming []Value, alloc *Alloc) Value { v := renaming[alloc.index] if v == nil { v = zeroConst(deref(alloc.Type())) renaming[alloc.index] = v } return v }
// rename implements the (Cytron et al) SSA renaming algorithm, a
// preorder traversal of the dominator tree replacing all loads of
// Alloc cells with the value stored to that cell by the dominating
// store instruction. For lifting, we need only consider loads,
// stores and φ-nodes.
//
// renaming is a map from *Alloc (keyed by index number) to its
// dominating stored value; newPhis[x] is the set of new φ-nodes to be
// prepended to block x.
//
func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) { // Each φ-node becomes the new name for its associated Alloc.
for _, np := range newPhis[u] { phi := np.phi alloc := np.alloc renaming[alloc.index] = phi }
// Rename loads and stores of allocs.
for i, instr := range u.Instrs { switch instr := instr.(type) { case *Alloc: if instr.index >= 0 { // store of zero to Alloc cell
// Replace dominated loads by the zero value.
renaming[instr.index] = nil if debugLifting { fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr) } // Delete the Alloc.
u.Instrs[i] = nil u.gaps++ }
case *Store: if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
// Replace dominated loads by the stored value.
renaming[alloc.index] = instr.Val if debugLifting { fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n", instr, instr.Val.Name()) } // Remove the store from the referrer list of the stored value.
if refs := instr.Val.Referrers(); refs != nil { *refs = removeInstr(*refs, instr) } // Delete the Store.
u.Instrs[i] = nil u.gaps++ }
case *UnOp: if instr.Op == token.MUL { if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
newval := renamed(renaming, alloc) if debugLifting { fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n", instr.Name(), instr, newval.Name()) } // Replace all references to
// the loaded value by the
// dominating stored value.
replaceAll(instr, newval) // Delete the Load.
u.Instrs[i] = nil u.gaps++ } }
case *DebugRef: if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
if instr.IsAddr { instr.X = renamed(renaming, alloc) instr.IsAddr = false
// Add DebugRef to instr.X's referrers.
if refs := instr.X.Referrers(); refs != nil { *refs = append(*refs, instr) } } else { // A source expression denotes the address
// of an Alloc that was optimized away.
instr.X = nil
// Delete the DebugRef.
u.Instrs[i] = nil u.gaps++ } } } }
// For each φ-node in a CFG successor, rename the edge.
for _, v := range u.Succs { phis := newPhis[v] if len(phis) == 0 { continue } i := v.predIndex(u) for _, np := range phis { phi := np.phi alloc := np.alloc newval := renamed(renaming, alloc) if debugLifting { fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n", phi.Name(), u, v, i, alloc.Name(), newval.Name()) } phi.Edges[i] = newval if prefs := newval.Referrers(); prefs != nil { *prefs = append(*prefs, phi) } } }
// Continue depth-first recursion over domtree, pushing a
// fresh copy of the renaming map for each subtree.
for i, v := range u.dom.children { r := renaming if i < len(u.dom.children)-1 { // On all but the final iteration, we must make
// a copy to avoid destructive update.
r = make([]Value, len(renaming)) copy(r, renaming) } rename(v, r, newPhis) }
}
|