You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

452 lines
14 KiB

  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package pointer
  5. // This file defines the main datatypes and Analyze function of the pointer analysis.
  6. import (
  7. "fmt"
  8. "go/token"
  9. "go/types"
  10. "io"
  11. "os"
  12. "reflect"
  13. "runtime"
  14. "runtime/debug"
  15. "sort"
  16. "golang.org/x/tools/go/callgraph"
  17. "golang.org/x/tools/go/ssa"
  18. "golang.org/x/tools/go/types/typeutil"
  19. )
  20. const (
  21. // optimization options; enable all when committing
  22. optRenumber = true // enable renumbering optimization (makes logs hard to read)
  23. optHVN = true // enable pointer equivalence via Hash-Value Numbering
  24. // debugging options; disable all when committing
  25. debugHVN = false // enable assertions in HVN
  26. debugHVNVerbose = false // enable extra HVN logging
  27. debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
  28. debugTimers = false // show running time of each phase
  29. )
  30. // object.flags bitmask values.
  31. const (
  32. otTagged = 1 << iota // type-tagged object
  33. otIndirect // type-tagged object with indirect payload
  34. otFunction // function object
  35. )
  36. // An object represents a contiguous block of memory to which some
  37. // (generalized) pointer may point.
  38. //
  39. // (Note: most variables called 'obj' are not *objects but nodeids
  40. // such that a.nodes[obj].obj != nil.)
  41. //
  42. type object struct {
  43. // flags is a bitset of the node type (ot*) flags defined above.
  44. flags uint32
  45. // Number of following nodes belonging to the same "object"
  46. // allocation. Zero for all other nodes.
  47. size uint32
  48. // data describes this object; it has one of these types:
  49. //
  50. // ssa.Value for an object allocated by an SSA operation.
  51. // types.Type for an rtype instance object or *rtype-tagged object.
  52. // string for an instrinsic object, e.g. the array behind os.Args.
  53. // nil for an object allocated by an instrinsic.
  54. // (cgn provides the identity of the intrinsic.)
  55. data interface{}
  56. // The call-graph node (=context) in which this object was allocated.
  57. // May be nil for global objects: Global, Const, some Functions.
  58. cgn *cgnode
  59. }
  60. // nodeid denotes a node.
  61. // It is an index within analysis.nodes.
  62. // We use small integers, not *node pointers, for many reasons:
  63. // - they are smaller on 64-bit systems.
  64. // - sets of them can be represented compactly in bitvectors or BDDs.
  65. // - order matters; a field offset can be computed by simple addition.
  66. type nodeid uint32
  67. // A node is an equivalence class of memory locations.
  68. // Nodes may be pointers, pointed-to locations, neither, or both.
  69. //
  70. // Nodes that are pointed-to locations ("labels") have an enclosing
  71. // object (see analysis.enclosingObject).
  72. //
  73. type node struct {
  74. // If non-nil, this node is the start of an object
  75. // (addressable memory location).
  76. // The following obj.size nodes implicitly belong to the object;
  77. // they locate their object by scanning back.
  78. obj *object
  79. // The type of the field denoted by this node. Non-aggregate,
  80. // unless this is an tagged.T node (i.e. the thing
  81. // pointed to by an interface) in which case typ is that type.
  82. typ types.Type
  83. // subelement indicates which directly embedded subelement of
  84. // an object of aggregate type (struct, tuple, array) this is.
  85. subelement *fieldInfo // e.g. ".a.b[*].c"
  86. // Solver state for the canonical node of this pointer-
  87. // equivalence class. Each node is created with its own state
  88. // but they become shared after HVN.
  89. solve *solverState
  90. }
  91. // An analysis instance holds the state of a single pointer analysis problem.
  92. type analysis struct {
  93. config *Config // the client's control/observer interface
  94. prog *ssa.Program // the program being analyzed
  95. log io.Writer // log stream; nil to disable
  96. panicNode nodeid // sink for panic, source for recover
  97. nodes []*node // indexed by nodeid
  98. flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
  99. trackTypes map[types.Type]bool // memoization of shouldTrack()
  100. constraints []constraint // set of constraints
  101. cgnodes []*cgnode // all cgnodes
  102. genq []*cgnode // queue of functions to generate constraints for
  103. intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
  104. globalval map[ssa.Value]nodeid // node for each global ssa.Value
  105. globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
  106. localval map[ssa.Value]nodeid // node for each local ssa.Value
  107. localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
  108. atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
  109. mapValues []nodeid // values of makemap objects (indirect in HVN)
  110. work nodeset // solver's worklist
  111. result *Result // results of the analysis
  112. track track // pointerlike types whose aliasing we track
  113. deltaSpace []int // working space for iterating over PTS deltas
  114. // Reflection & intrinsics:
  115. hasher typeutil.Hasher // cache of type hashes
  116. reflectValueObj types.Object // type symbol for reflect.Value (if present)
  117. reflectValueCall *ssa.Function // (reflect.Value).Call
  118. reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present)
  119. reflectRtypePtr *types.Pointer // *reflect.rtype
  120. reflectType *types.Named // reflect.Type
  121. rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T
  122. reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value
  123. runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
  124. }
  125. // enclosingObj returns the first node of the addressable memory
  126. // object that encloses node id. Panic ensues if that node does not
  127. // belong to any object.
  128. func (a *analysis) enclosingObj(id nodeid) nodeid {
  129. // Find previous node with obj != nil.
  130. for i := id; i >= 0; i-- {
  131. n := a.nodes[i]
  132. if obj := n.obj; obj != nil {
  133. if i+nodeid(obj.size) <= id {
  134. break // out of bounds
  135. }
  136. return i
  137. }
  138. }
  139. panic("node has no enclosing object")
  140. }
  141. // labelFor returns the Label for node id.
  142. // Panic ensues if that node is not addressable.
  143. func (a *analysis) labelFor(id nodeid) *Label {
  144. return &Label{
  145. obj: a.nodes[a.enclosingObj(id)].obj,
  146. subelement: a.nodes[id].subelement,
  147. }
  148. }
  149. func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
  150. msg := fmt.Sprintf(format, args...)
  151. if a.log != nil {
  152. fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
  153. }
  154. a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
  155. }
  156. // computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
  157. func (a *analysis) computeTrackBits() {
  158. if len(a.config.extendedQueries) != 0 {
  159. // TODO(dh): only track the types necessary for the query.
  160. a.track = trackAll
  161. return
  162. }
  163. var queryTypes []types.Type
  164. for v := range a.config.Queries {
  165. queryTypes = append(queryTypes, v.Type())
  166. }
  167. for v := range a.config.IndirectQueries {
  168. queryTypes = append(queryTypes, mustDeref(v.Type()))
  169. }
  170. for _, t := range queryTypes {
  171. switch t.Underlying().(type) {
  172. case *types.Chan:
  173. a.track |= trackChan
  174. case *types.Map:
  175. a.track |= trackMap
  176. case *types.Pointer:
  177. a.track |= trackPtr
  178. case *types.Slice:
  179. a.track |= trackSlice
  180. case *types.Interface:
  181. a.track = trackAll
  182. return
  183. }
  184. if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
  185. a.track = trackAll
  186. return
  187. }
  188. }
  189. }
  190. // Analyze runs the pointer analysis with the scope and options
  191. // specified by config, and returns the (synthetic) root of the callgraph.
  192. //
  193. // Pointer analysis of a transitively closed well-typed program should
  194. // always succeed. An error can occur only due to an internal bug.
  195. //
  196. func Analyze(config *Config) (result *Result, err error) {
  197. if config.Mains == nil {
  198. return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
  199. }
  200. defer func() {
  201. if p := recover(); p != nil {
  202. err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
  203. fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
  204. debug.PrintStack()
  205. }
  206. }()
  207. a := &analysis{
  208. config: config,
  209. log: config.Log,
  210. prog: config.prog(),
  211. globalval: make(map[ssa.Value]nodeid),
  212. globalobj: make(map[ssa.Value]nodeid),
  213. flattenMemo: make(map[types.Type][]*fieldInfo),
  214. trackTypes: make(map[types.Type]bool),
  215. atFuncs: make(map[*ssa.Function]bool),
  216. hasher: typeutil.MakeHasher(),
  217. intrinsics: make(map[*ssa.Function]intrinsic),
  218. result: &Result{
  219. Queries: make(map[ssa.Value]Pointer),
  220. IndirectQueries: make(map[ssa.Value]Pointer),
  221. },
  222. deltaSpace: make([]int, 0, 100),
  223. }
  224. if false {
  225. a.log = os.Stderr // for debugging crashes; extremely verbose
  226. }
  227. if a.log != nil {
  228. fmt.Fprintln(a.log, "==== Starting analysis")
  229. }
  230. // Pointer analysis requires a complete program for soundness.
  231. // Check to prevent accidental misconfiguration.
  232. for _, pkg := range a.prog.AllPackages() {
  233. // (This only checks that the package scope is complete,
  234. // not that func bodies exist, but it's a good signal.)
  235. if !pkg.Pkg.Complete() {
  236. return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path())
  237. }
  238. }
  239. if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
  240. rV := reflect.Pkg.Scope().Lookup("Value")
  241. a.reflectValueObj = rV
  242. a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
  243. a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named)
  244. a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype")
  245. a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
  246. // Override flattening of reflect.Value, treating it like a basic type.
  247. tReflectValue := a.reflectValueObj.Type()
  248. a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
  249. // Override shouldTrack of reflect.Value and *reflect.rtype.
  250. // Always track pointers of these types.
  251. a.trackTypes[tReflectValue] = true
  252. a.trackTypes[a.reflectRtypePtr] = true
  253. a.rtypes.SetHasher(a.hasher)
  254. a.reflectZeros.SetHasher(a.hasher)
  255. }
  256. if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
  257. a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
  258. }
  259. a.computeTrackBits()
  260. a.generate()
  261. a.showCounts()
  262. if optRenumber {
  263. a.renumber()
  264. }
  265. N := len(a.nodes) // excludes solver-created nodes
  266. if optHVN {
  267. if debugHVNCrossCheck {
  268. // Cross-check: run the solver once without
  269. // optimization, once with, and compare the
  270. // solutions.
  271. savedConstraints := a.constraints
  272. a.solve()
  273. a.dumpSolution("A.pts", N)
  274. // Restore.
  275. a.constraints = savedConstraints
  276. for _, n := range a.nodes {
  277. n.solve = new(solverState)
  278. }
  279. a.nodes = a.nodes[:N]
  280. // rtypes is effectively part of the solver state.
  281. a.rtypes = typeutil.Map{}
  282. a.rtypes.SetHasher(a.hasher)
  283. }
  284. a.hvn()
  285. }
  286. if debugHVNCrossCheck {
  287. runtime.GC()
  288. runtime.GC()
  289. }
  290. a.solve()
  291. // Compare solutions.
  292. if optHVN && debugHVNCrossCheck {
  293. a.dumpSolution("B.pts", N)
  294. if !diff("A.pts", "B.pts") {
  295. return nil, fmt.Errorf("internal error: optimization changed solution")
  296. }
  297. }
  298. // Create callgraph.Nodes in deterministic order.
  299. if cg := a.result.CallGraph; cg != nil {
  300. for _, caller := range a.cgnodes {
  301. cg.CreateNode(caller.fn)
  302. }
  303. }
  304. // Add dynamic edges to call graph.
  305. var space [100]int
  306. for _, caller := range a.cgnodes {
  307. for _, site := range caller.sites {
  308. for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
  309. a.callEdge(caller, site, nodeid(callee))
  310. }
  311. }
  312. }
  313. return a.result, nil
  314. }
  315. // callEdge is called for each edge in the callgraph.
  316. // calleeid is the callee's object node (has otFunction flag).
  317. //
  318. func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
  319. obj := a.nodes[calleeid].obj
  320. if obj.flags&otFunction == 0 {
  321. panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
  322. }
  323. callee := obj.cgn
  324. if cg := a.result.CallGraph; cg != nil {
  325. // TODO(adonovan): opt: I would expect duplicate edges
  326. // (to wrappers) to arise due to the elimination of
  327. // context information, but I haven't observed any.
  328. // Understand this better.
  329. callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
  330. }
  331. if a.log != nil {
  332. fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
  333. }
  334. // Warn about calls to non-intrinsic external functions.
  335. // TODO(adonovan): de-dup these messages.
  336. if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil {
  337. a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
  338. a.warnf(fn.Pos(), " (declared here)")
  339. }
  340. }
  341. // dumpSolution writes the PTS solution to the specified file.
  342. //
  343. // It only dumps the nodes that existed before solving. The order in
  344. // which solver-created nodes are created depends on pre-solver
  345. // optimization, so we can't include them in the cross-check.
  346. //
  347. func (a *analysis) dumpSolution(filename string, N int) {
  348. f, err := os.Create(filename)
  349. if err != nil {
  350. panic(err)
  351. }
  352. for id, n := range a.nodes[:N] {
  353. if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
  354. panic(err)
  355. }
  356. var sep string
  357. for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
  358. if l >= N {
  359. break
  360. }
  361. fmt.Fprintf(f, "%s%d", sep, l)
  362. sep = " "
  363. }
  364. fmt.Fprintf(f, "} : %s\n", n.typ)
  365. }
  366. if err := f.Close(); err != nil {
  367. panic(err)
  368. }
  369. }
  370. // showCounts logs the size of the constraint system. A typical
  371. // optimized distribution is 65% copy, 13% load, 11% addr, 5%
  372. // offsetAddr, 4% store, 2% others.
  373. //
  374. func (a *analysis) showCounts() {
  375. if a.log != nil {
  376. counts := make(map[reflect.Type]int)
  377. for _, c := range a.constraints {
  378. counts[reflect.TypeOf(c)]++
  379. }
  380. fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
  381. var lines []string
  382. for t, n := range counts {
  383. line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
  384. lines = append(lines, line)
  385. }
  386. sort.Sort(sort.Reverse(sort.StringSlice(lines)))
  387. for _, line := range lines {
  388. fmt.Fprintf(a.log, "\t%s\n", line)
  389. }
  390. fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
  391. // Show number of pointer equivalence classes.
  392. m := make(map[*solverState]bool)
  393. for _, n := range a.nodes {
  394. m[n.solve] = true
  395. }
  396. fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
  397. }
  398. }