You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

653 lines
18 KiB

  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package ssa
  5. // This file defines the lifting pass which tries to "lift" Alloc
  6. // cells (new/local variables) into SSA registers, replacing loads
  7. // with the dominating stored value, eliminating loads and stores, and
  8. // inserting φ-nodes as needed.
  9. // Cited papers and resources:
  10. //
  11. // Ron Cytron et al. 1991. Efficiently computing SSA form...
  12. // http://doi.acm.org/10.1145/115372.115320
  13. //
  14. // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
  15. // Software Practice and Experience 2001, 4:1-10.
  16. // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
  17. //
  18. // Daniel Berlin, llvmdev mailing list, 2012.
  19. // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
  20. // (Be sure to expand the whole thread.)
  21. // TODO(adonovan): opt: there are many optimizations worth evaluating, and
  22. // the conventional wisdom for SSA construction is that a simple
  23. // algorithm well engineered often beats those of better asymptotic
  24. // complexity on all but the most egregious inputs.
  25. //
  26. // Danny Berlin suggests that the Cooper et al. algorithm for
  27. // computing the dominance frontier is superior to Cytron et al.
  28. // Furthermore he recommends that rather than computing the DF for the
  29. // whole function then renaming all alloc cells, it may be cheaper to
  30. // compute the DF for each alloc cell separately and throw it away.
  31. //
  32. // Consider exploiting liveness information to avoid creating dead
  33. // φ-nodes which we then immediately remove.
  34. //
  35. // Also see many other "TODO: opt" suggestions in the code.
  36. import (
  37. "fmt"
  38. "go/token"
  39. "go/types"
  40. "math/big"
  41. "os"
  42. )
  43. // If true, show diagnostic information at each step of lifting.
  44. // Very verbose.
  45. const debugLifting = false
  46. // domFrontier maps each block to the set of blocks in its dominance
  47. // frontier. The outer slice is conceptually a map keyed by
  48. // Block.Index. The inner slice is conceptually a set, possibly
  49. // containing duplicates.
  50. //
  51. // TODO(adonovan): opt: measure impact of dups; consider a packed bit
  52. // representation, e.g. big.Int, and bitwise parallel operations for
  53. // the union step in the Children loop.
  54. //
  55. // domFrontier's methods mutate the slice's elements but not its
  56. // length, so their receivers needn't be pointers.
  57. //
  58. type domFrontier [][]*BasicBlock
  59. func (df domFrontier) add(u, v *BasicBlock) {
  60. p := &df[u.Index]
  61. *p = append(*p, v)
  62. }
  63. // build builds the dominance frontier df for the dominator (sub)tree
  64. // rooted at u, using the Cytron et al. algorithm.
  65. //
  66. // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
  67. // by pruning the entire IDF computation, rather than merely pruning
  68. // the DF -> IDF step.
  69. func (df domFrontier) build(u *BasicBlock) {
  70. // Encounter each node u in postorder of dom tree.
  71. for _, child := range u.dom.children {
  72. df.build(child)
  73. }
  74. for _, vb := range u.Succs {
  75. if v := vb.dom; v.idom != u {
  76. df.add(u, vb)
  77. }
  78. }
  79. for _, w := range u.dom.children {
  80. for _, vb := range df[w.Index] {
  81. // TODO(adonovan): opt: use word-parallel bitwise union.
  82. if v := vb.dom; v.idom != u {
  83. df.add(u, vb)
  84. }
  85. }
  86. }
  87. }
  88. func buildDomFrontier(fn *Function) domFrontier {
  89. df := make(domFrontier, len(fn.Blocks))
  90. df.build(fn.Blocks[0])
  91. if fn.Recover != nil {
  92. df.build(fn.Recover)
  93. }
  94. return df
  95. }
  96. func removeInstr(refs []Instruction, instr Instruction) []Instruction {
  97. i := 0
  98. for _, ref := range refs {
  99. if ref == instr {
  100. continue
  101. }
  102. refs[i] = ref
  103. i++
  104. }
  105. for j := i; j != len(refs); j++ {
  106. refs[j] = nil // aid GC
  107. }
  108. return refs[:i]
  109. }
  110. // lift replaces local and new Allocs accessed only with
  111. // load/store by SSA registers, inserting φ-nodes where necessary.
  112. // The result is a program in classical pruned SSA form.
  113. //
  114. // Preconditions:
  115. // - fn has no dead blocks (blockopt has run).
  116. // - Def/use info (Operands and Referrers) is up-to-date.
  117. // - The dominator tree is up-to-date.
  118. //
  119. func lift(fn *Function) {
  120. // TODO(adonovan): opt: lots of little optimizations may be
  121. // worthwhile here, especially if they cause us to avoid
  122. // buildDomFrontier. For example:
  123. //
  124. // - Alloc never loaded? Eliminate.
  125. // - Alloc never stored? Replace all loads with a zero constant.
  126. // - Alloc stored once? Replace loads with dominating store;
  127. // don't forget that an Alloc is itself an effective store
  128. // of zero.
  129. // - Alloc used only within a single block?
  130. // Use degenerate algorithm avoiding φ-nodes.
  131. // - Consider synergy with scalar replacement of aggregates (SRA).
  132. // e.g. *(&x.f) where x is an Alloc.
  133. // Perhaps we'd get better results if we generated this as x.f
  134. // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
  135. // Unclear.
  136. //
  137. // But we will start with the simplest correct code.
  138. df := buildDomFrontier(fn)
  139. if debugLifting {
  140. title := false
  141. for i, blocks := range df {
  142. if blocks != nil {
  143. if !title {
  144. fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
  145. title = true
  146. }
  147. fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
  148. }
  149. }
  150. }
  151. newPhis := make(newPhiMap)
  152. // During this pass we will replace some BasicBlock.Instrs
  153. // (allocs, loads and stores) with nil, keeping a count in
  154. // BasicBlock.gaps. At the end we will reset Instrs to the
  155. // concatenation of all non-dead newPhis and non-nil Instrs
  156. // for the block, reusing the original array if space permits.
  157. // While we're here, we also eliminate 'rundefers'
  158. // instructions in functions that contain no 'defer'
  159. // instructions.
  160. usesDefer := false
  161. // A counter used to generate ~unique ids for Phi nodes, as an
  162. // aid to debugging. We use large numbers to make them highly
  163. // visible. All nodes are renumbered later.
  164. fresh := 1000
  165. // Determine which allocs we can lift and number them densely.
  166. // The renaming phase uses this numbering for compact maps.
  167. numAllocs := 0
  168. for _, b := range fn.Blocks {
  169. b.gaps = 0
  170. b.rundefers = 0
  171. for _, instr := range b.Instrs {
  172. switch instr := instr.(type) {
  173. case *Alloc:
  174. index := -1
  175. if liftAlloc(df, instr, newPhis, &fresh) {
  176. index = numAllocs
  177. numAllocs++
  178. }
  179. instr.index = index
  180. case *Defer:
  181. usesDefer = true
  182. case *RunDefers:
  183. b.rundefers++
  184. }
  185. }
  186. }
  187. // renaming maps an alloc (keyed by index) to its replacement
  188. // value. Initially the renaming contains nil, signifying the
  189. // zero constant of the appropriate type; we construct the
  190. // Const lazily at most once on each path through the domtree.
  191. // TODO(adonovan): opt: cache per-function not per subtree.
  192. renaming := make([]Value, numAllocs)
  193. // Renaming.
  194. rename(fn.Blocks[0], renaming, newPhis)
  195. // Eliminate dead φ-nodes.
  196. removeDeadPhis(fn.Blocks, newPhis)
  197. // Prepend remaining live φ-nodes to each block.
  198. for _, b := range fn.Blocks {
  199. nps := newPhis[b]
  200. j := len(nps)
  201. rundefersToKill := b.rundefers
  202. if usesDefer {
  203. rundefersToKill = 0
  204. }
  205. if j+b.gaps+rundefersToKill == 0 {
  206. continue // fast path: no new phis or gaps
  207. }
  208. // Compact nps + non-nil Instrs into a new slice.
  209. // TODO(adonovan): opt: compact in situ (rightwards)
  210. // if Instrs has sufficient space or slack.
  211. dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
  212. for i, np := range nps {
  213. dst[i] = np.phi
  214. }
  215. for _, instr := range b.Instrs {
  216. if instr == nil {
  217. continue
  218. }
  219. if !usesDefer {
  220. if _, ok := instr.(*RunDefers); ok {
  221. continue
  222. }
  223. }
  224. dst[j] = instr
  225. j++
  226. }
  227. b.Instrs = dst
  228. }
  229. // Remove any fn.Locals that were lifted.
  230. j := 0
  231. for _, l := range fn.Locals {
  232. if l.index < 0 {
  233. fn.Locals[j] = l
  234. j++
  235. }
  236. }
  237. // Nil out fn.Locals[j:] to aid GC.
  238. for i := j; i < len(fn.Locals); i++ {
  239. fn.Locals[i] = nil
  240. }
  241. fn.Locals = fn.Locals[:j]
  242. }
  243. // removeDeadPhis removes φ-nodes not transitively needed by a
  244. // non-Phi, non-DebugRef instruction.
  245. func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
  246. // First pass: find the set of "live" φ-nodes: those reachable
  247. // from some non-Phi instruction.
  248. //
  249. // We compute reachability in reverse, starting from each φ,
  250. // rather than forwards, starting from each live non-Phi
  251. // instruction, because this way visits much less of the
  252. // Value graph.
  253. livePhis := make(map[*Phi]bool)
  254. for _, npList := range newPhis {
  255. for _, np := range npList {
  256. phi := np.phi
  257. if !livePhis[phi] && phiHasDirectReferrer(phi) {
  258. markLivePhi(livePhis, phi)
  259. }
  260. }
  261. }
  262. // Existing φ-nodes due to && and || operators
  263. // are all considered live (see Go issue 19622).
  264. for _, b := range blocks {
  265. for _, phi := range b.phis() {
  266. markLivePhi(livePhis, phi.(*Phi))
  267. }
  268. }
  269. // Second pass: eliminate unused phis from newPhis.
  270. for block, npList := range newPhis {
  271. j := 0
  272. for _, np := range npList {
  273. if livePhis[np.phi] {
  274. npList[j] = np
  275. j++
  276. } else {
  277. // discard it, first removing it from referrers
  278. for _, val := range np.phi.Edges {
  279. if refs := val.Referrers(); refs != nil {
  280. *refs = removeInstr(*refs, np.phi)
  281. }
  282. }
  283. np.phi.block = nil
  284. }
  285. }
  286. newPhis[block] = npList[:j]
  287. }
  288. }
  289. // markLivePhi marks phi, and all φ-nodes transitively reachable via
  290. // its Operands, live.
  291. func markLivePhi(livePhis map[*Phi]bool, phi *Phi) {
  292. livePhis[phi] = true
  293. for _, rand := range phi.Operands(nil) {
  294. if q, ok := (*rand).(*Phi); ok {
  295. if !livePhis[q] {
  296. markLivePhi(livePhis, q)
  297. }
  298. }
  299. }
  300. }
  301. // phiHasDirectReferrer reports whether phi is directly referred to by
  302. // a non-Phi instruction. Such instructions are the
  303. // roots of the liveness traversal.
  304. func phiHasDirectReferrer(phi *Phi) bool {
  305. for _, instr := range *phi.Referrers() {
  306. if _, ok := instr.(*Phi); !ok {
  307. return true
  308. }
  309. }
  310. return false
  311. }
  312. type blockSet struct{ big.Int } // (inherit methods from Int)
  313. // add adds b to the set and returns true if the set changed.
  314. func (s *blockSet) add(b *BasicBlock) bool {
  315. i := b.Index
  316. if s.Bit(i) != 0 {
  317. return false
  318. }
  319. s.SetBit(&s.Int, i, 1)
  320. return true
  321. }
  322. // take removes an arbitrary element from a set s and
  323. // returns its index, or returns -1 if empty.
  324. func (s *blockSet) take() int {
  325. l := s.BitLen()
  326. for i := 0; i < l; i++ {
  327. if s.Bit(i) == 1 {
  328. s.SetBit(&s.Int, i, 0)
  329. return i
  330. }
  331. }
  332. return -1
  333. }
  334. // newPhi is a pair of a newly introduced φ-node and the lifted Alloc
  335. // it replaces.
  336. type newPhi struct {
  337. phi *Phi
  338. alloc *Alloc
  339. }
  340. // newPhiMap records for each basic block, the set of newPhis that
  341. // must be prepended to the block.
  342. type newPhiMap map[*BasicBlock][]newPhi
  343. // liftAlloc determines whether alloc can be lifted into registers,
  344. // and if so, it populates newPhis with all the φ-nodes it may require
  345. // and returns true.
  346. //
  347. // fresh is a source of fresh ids for phi nodes.
  348. //
  349. func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool {
  350. // Don't lift aggregates into registers, because we don't have
  351. // a way to express their zero-constants.
  352. switch deref(alloc.Type()).Underlying().(type) {
  353. case *types.Array, *types.Struct:
  354. return false
  355. }
  356. // Don't lift named return values in functions that defer
  357. // calls that may recover from panic.
  358. if fn := alloc.Parent(); fn.Recover != nil {
  359. for _, nr := range fn.namedResults {
  360. if nr == alloc {
  361. return false
  362. }
  363. }
  364. }
  365. // Compute defblocks, the set of blocks containing a
  366. // definition of the alloc cell.
  367. var defblocks blockSet
  368. for _, instr := range *alloc.Referrers() {
  369. // Bail out if we discover the alloc is not liftable;
  370. // the only operations permitted to use the alloc are
  371. // loads/stores into the cell, and DebugRef.
  372. switch instr := instr.(type) {
  373. case *Store:
  374. if instr.Val == alloc {
  375. return false // address used as value
  376. }
  377. if instr.Addr != alloc {
  378. panic("Alloc.Referrers is inconsistent")
  379. }
  380. defblocks.add(instr.Block())
  381. case *UnOp:
  382. if instr.Op != token.MUL {
  383. return false // not a load
  384. }
  385. if instr.X != alloc {
  386. panic("Alloc.Referrers is inconsistent")
  387. }
  388. case *DebugRef:
  389. // ok
  390. default:
  391. return false // some other instruction
  392. }
  393. }
  394. // The Alloc itself counts as a (zero) definition of the cell.
  395. defblocks.add(alloc.Block())
  396. if debugLifting {
  397. fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
  398. }
  399. fn := alloc.Parent()
  400. // Φ-insertion.
  401. //
  402. // What follows is the body of the main loop of the insert-φ
  403. // function described by Cytron et al, but instead of using
  404. // counter tricks, we just reset the 'hasAlready' and 'work'
  405. // sets each iteration. These are bitmaps so it's pretty cheap.
  406. //
  407. // TODO(adonovan): opt: recycle slice storage for W,
  408. // hasAlready, defBlocks across liftAlloc calls.
  409. var hasAlready blockSet
  410. // Initialize W and work to defblocks.
  411. var work blockSet = defblocks // blocks seen
  412. var W blockSet // blocks to do
  413. W.Set(&defblocks.Int)
  414. // Traverse iterated dominance frontier, inserting φ-nodes.
  415. for i := W.take(); i != -1; i = W.take() {
  416. u := fn.Blocks[i]
  417. for _, v := range df[u.Index] {
  418. if hasAlready.add(v) {
  419. // Create φ-node.
  420. // It will be prepended to v.Instrs later, if needed.
  421. phi := &Phi{
  422. Edges: make([]Value, len(v.Preds)),
  423. Comment: alloc.Comment,
  424. }
  425. // This is merely a debugging aid:
  426. phi.setNum(*fresh)
  427. *fresh++
  428. phi.pos = alloc.Pos()
  429. phi.setType(deref(alloc.Type()))
  430. phi.block = v
  431. if debugLifting {
  432. fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
  433. }
  434. newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
  435. if work.add(v) {
  436. W.add(v)
  437. }
  438. }
  439. }
  440. }
  441. return true
  442. }
  443. // replaceAll replaces all intraprocedural uses of x with y,
  444. // updating x.Referrers and y.Referrers.
  445. // Precondition: x.Referrers() != nil, i.e. x must be local to some function.
  446. //
  447. func replaceAll(x, y Value) {
  448. var rands []*Value
  449. pxrefs := x.Referrers()
  450. pyrefs := y.Referrers()
  451. for _, instr := range *pxrefs {
  452. rands = instr.Operands(rands[:0]) // recycle storage
  453. for _, rand := range rands {
  454. if *rand != nil {
  455. if *rand == x {
  456. *rand = y
  457. }
  458. }
  459. }
  460. if pyrefs != nil {
  461. *pyrefs = append(*pyrefs, instr) // dups ok
  462. }
  463. }
  464. *pxrefs = nil // x is now unreferenced
  465. }
  466. // renamed returns the value to which alloc is being renamed,
  467. // constructing it lazily if it's the implicit zero initialization.
  468. //
  469. func renamed(renaming []Value, alloc *Alloc) Value {
  470. v := renaming[alloc.index]
  471. if v == nil {
  472. v = zeroConst(deref(alloc.Type()))
  473. renaming[alloc.index] = v
  474. }
  475. return v
  476. }
  477. // rename implements the (Cytron et al) SSA renaming algorithm, a
  478. // preorder traversal of the dominator tree replacing all loads of
  479. // Alloc cells with the value stored to that cell by the dominating
  480. // store instruction. For lifting, we need only consider loads,
  481. // stores and φ-nodes.
  482. //
  483. // renaming is a map from *Alloc (keyed by index number) to its
  484. // dominating stored value; newPhis[x] is the set of new φ-nodes to be
  485. // prepended to block x.
  486. //
  487. func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
  488. // Each φ-node becomes the new name for its associated Alloc.
  489. for _, np := range newPhis[u] {
  490. phi := np.phi
  491. alloc := np.alloc
  492. renaming[alloc.index] = phi
  493. }
  494. // Rename loads and stores of allocs.
  495. for i, instr := range u.Instrs {
  496. switch instr := instr.(type) {
  497. case *Alloc:
  498. if instr.index >= 0 { // store of zero to Alloc cell
  499. // Replace dominated loads by the zero value.
  500. renaming[instr.index] = nil
  501. if debugLifting {
  502. fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
  503. }
  504. // Delete the Alloc.
  505. u.Instrs[i] = nil
  506. u.gaps++
  507. }
  508. case *Store:
  509. if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
  510. // Replace dominated loads by the stored value.
  511. renaming[alloc.index] = instr.Val
  512. if debugLifting {
  513. fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
  514. instr, instr.Val.Name())
  515. }
  516. // Remove the store from the referrer list of the stored value.
  517. if refs := instr.Val.Referrers(); refs != nil {
  518. *refs = removeInstr(*refs, instr)
  519. }
  520. // Delete the Store.
  521. u.Instrs[i] = nil
  522. u.gaps++
  523. }
  524. case *UnOp:
  525. if instr.Op == token.MUL {
  526. if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
  527. newval := renamed(renaming, alloc)
  528. if debugLifting {
  529. fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
  530. instr.Name(), instr, newval.Name())
  531. }
  532. // Replace all references to
  533. // the loaded value by the
  534. // dominating stored value.
  535. replaceAll(instr, newval)
  536. // Delete the Load.
  537. u.Instrs[i] = nil
  538. u.gaps++
  539. }
  540. }
  541. case *DebugRef:
  542. if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
  543. if instr.IsAddr {
  544. instr.X = renamed(renaming, alloc)
  545. instr.IsAddr = false
  546. // Add DebugRef to instr.X's referrers.
  547. if refs := instr.X.Referrers(); refs != nil {
  548. *refs = append(*refs, instr)
  549. }
  550. } else {
  551. // A source expression denotes the address
  552. // of an Alloc that was optimized away.
  553. instr.X = nil
  554. // Delete the DebugRef.
  555. u.Instrs[i] = nil
  556. u.gaps++
  557. }
  558. }
  559. }
  560. }
  561. // For each φ-node in a CFG successor, rename the edge.
  562. for _, v := range u.Succs {
  563. phis := newPhis[v]
  564. if len(phis) == 0 {
  565. continue
  566. }
  567. i := v.predIndex(u)
  568. for _, np := range phis {
  569. phi := np.phi
  570. alloc := np.alloc
  571. newval := renamed(renaming, alloc)
  572. if debugLifting {
  573. fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
  574. phi.Name(), u, v, i, alloc.Name(), newval.Name())
  575. }
  576. phi.Edges[i] = newval
  577. if prefs := newval.Referrers(); prefs != nil {
  578. *prefs = append(*prefs, phi)
  579. }
  580. }
  581. }
  582. // Continue depth-first recursion over domtree, pushing a
  583. // fresh copy of the renaming map for each subtree.
  584. for i, v := range u.dom.children {
  585. r := renaming
  586. if i < len(u.dom.children)-1 {
  587. // On all but the final iteration, we must make
  588. // a copy to avoid destructive update.
  589. r = make([]Value, len(renaming))
  590. copy(r, renaming)
  591. }
  592. rename(v, r, newPhis)
  593. }
  594. }