You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

540 lines
12 KiB

  1. // The digraph command performs queries over unlabelled directed graphs
  2. // represented in text form. It is intended to integrate nicely with
  3. // typical UNIX command pipelines.
  4. //
  5. // Since directed graphs (import graphs, reference graphs, call graphs,
  6. // etc) often arise during software tool development and debugging, this
  7. // command is included in the go.tools repository.
  8. //
  9. // TODO(adonovan):
  10. // - support input files other than stdin
  11. // - suport alternative formats (AT&T GraphViz, CSV, etc),
  12. // a comment syntax, etc.
  13. // - allow queries to nest, like Blaze query language.
  14. //
  15. package main // import "golang.org/x/tools/cmd/digraph"
  16. import (
  17. "bufio"
  18. "bytes"
  19. "errors"
  20. "flag"
  21. "fmt"
  22. "io"
  23. "os"
  24. "sort"
  25. "strconv"
  26. "unicode"
  27. "unicode/utf8"
  28. )
  29. const Usage = `digraph: queries over directed graphs in text form.
  30. Graph format:
  31. Each line contains zero or more words. Words are separated by
  32. unquoted whitespace; words may contain Go-style double-quoted portions,
  33. allowing spaces and other characters to be expressed.
  34. Each field declares a node, and if there are more than one,
  35. an edge from the first to each subsequent one.
  36. The graph is provided on the standard input.
  37. For instance, the following (acyclic) graph specifies a partial order
  38. among the subtasks of getting dressed:
  39. % cat clothes.txt
  40. socks shoes
  41. "boxer shorts" pants
  42. pants belt shoes
  43. shirt tie sweater
  44. sweater jacket
  45. hat
  46. The line "shirt tie sweater" indicates the two edges shirt -> tie and
  47. shirt -> sweater, not shirt -> tie -> sweater.
  48. Supported queries:
  49. nodes
  50. the set of all nodes
  51. degree
  52. the in-degree and out-degree of each node.
  53. preds <label> ...
  54. the set of immediate predecessors of the specified nodes
  55. succs <label> ...
  56. the set of immediate successors of the specified nodes
  57. forward <label> ...
  58. the set of nodes transitively reachable from the specified nodes
  59. reverse <label> ...
  60. the set of nodes that transitively reach the specified nodes
  61. somepath <label> <label>
  62. the list of nodes on some arbitrary path from the first node to the second
  63. allpaths <label> <label>
  64. the set of nodes on all paths from the first node to the second
  65. sccs
  66. all strongly connected components (one per line)
  67. scc <label>
  68. the set of nodes nodes strongly connected to the specified one
  69. Example usage:
  70. Show the transitive closure of imports of the digraph tool itself:
  71. % go list -f '{{.ImportPath}}{{.Imports}}' ... | tr '[]' ' ' |
  72. digraph forward golang.org/x/tools/cmd/digraph
  73. Show which clothes (see above) must be donned before a jacket:
  74. % digraph reverse jacket <clothes.txt
  75. `
  76. func main() {
  77. flag.Parse()
  78. args := flag.Args()
  79. if len(args) == 0 {
  80. fmt.Println(Usage)
  81. return
  82. }
  83. if err := digraph(args[0], args[1:]); err != nil {
  84. fmt.Fprintf(os.Stderr, "digraph: %s\n", err)
  85. os.Exit(1)
  86. }
  87. }
  88. type nodelist []string
  89. func (l nodelist) println(sep string) {
  90. for i, label := range l {
  91. if i > 0 {
  92. fmt.Fprint(stdout, sep)
  93. }
  94. fmt.Fprint(stdout, label)
  95. }
  96. fmt.Fprintln(stdout)
  97. }
  98. type nodeset map[string]bool
  99. func (s nodeset) sort() nodelist {
  100. labels := make(nodelist, len(s))
  101. var i int
  102. for label := range s {
  103. labels[i] = label
  104. i++
  105. }
  106. sort.Strings(labels)
  107. return labels
  108. }
  109. func (s nodeset) addAll(x nodeset) {
  110. for label := range x {
  111. s[label] = true
  112. }
  113. }
  114. // A graph maps nodes to the non-nil set of their immediate successors.
  115. type graph map[string]nodeset
  116. func (g graph) addNode(label string) nodeset {
  117. edges := g[label]
  118. if edges == nil {
  119. edges = make(nodeset)
  120. g[label] = edges
  121. }
  122. return edges
  123. }
  124. func (g graph) addEdges(from string, to ...string) {
  125. edges := g.addNode(from)
  126. for _, to := range to {
  127. g.addNode(to)
  128. edges[to] = true
  129. }
  130. }
  131. func (g graph) reachableFrom(roots nodeset) nodeset {
  132. seen := make(nodeset)
  133. var visit func(label string)
  134. visit = func(label string) {
  135. if !seen[label] {
  136. seen[label] = true
  137. for e := range g[label] {
  138. visit(e)
  139. }
  140. }
  141. }
  142. for root := range roots {
  143. visit(root)
  144. }
  145. return seen
  146. }
  147. func (g graph) transpose() graph {
  148. rev := make(graph)
  149. for label, edges := range g {
  150. rev.addNode(label)
  151. for succ := range edges {
  152. rev.addEdges(succ, label)
  153. }
  154. }
  155. return rev
  156. }
  157. func (g graph) sccs() []nodeset {
  158. // Kosaraju's algorithm---Tarjan is overkill here.
  159. // Forward pass.
  160. S := make(nodelist, 0, len(g)) // postorder stack
  161. seen := make(nodeset)
  162. var visit func(label string)
  163. visit = func(label string) {
  164. if !seen[label] {
  165. seen[label] = true
  166. for e := range g[label] {
  167. visit(e)
  168. }
  169. S = append(S, label)
  170. }
  171. }
  172. for label := range g {
  173. visit(label)
  174. }
  175. // Reverse pass.
  176. rev := g.transpose()
  177. var scc nodeset
  178. seen = make(nodeset)
  179. var rvisit func(label string)
  180. rvisit = func(label string) {
  181. if !seen[label] {
  182. seen[label] = true
  183. scc[label] = true
  184. for e := range rev[label] {
  185. rvisit(e)
  186. }
  187. }
  188. }
  189. var sccs []nodeset
  190. for len(S) > 0 {
  191. top := S[len(S)-1]
  192. S = S[:len(S)-1] // pop
  193. if !seen[top] {
  194. scc = make(nodeset)
  195. rvisit(top)
  196. sccs = append(sccs, scc)
  197. }
  198. }
  199. return sccs
  200. }
  201. func parse(rd io.Reader) (graph, error) {
  202. g := make(graph)
  203. var linenum int
  204. in := bufio.NewScanner(rd)
  205. for in.Scan() {
  206. linenum++
  207. // Split into words, honoring double-quotes per Go spec.
  208. words, err := split(in.Text())
  209. if err != nil {
  210. return nil, fmt.Errorf("at line %d: %v", linenum, err)
  211. }
  212. if len(words) > 0 {
  213. g.addEdges(words[0], words[1:]...)
  214. }
  215. }
  216. if err := in.Err(); err != nil {
  217. return nil, err
  218. }
  219. return g, nil
  220. }
  221. var stdin io.Reader = os.Stdin
  222. var stdout io.Writer = os.Stdout
  223. func digraph(cmd string, args []string) error {
  224. // Parse the input graph.
  225. g, err := parse(stdin)
  226. if err != nil {
  227. return err
  228. }
  229. // Parse the command line.
  230. switch cmd {
  231. case "nodes":
  232. if len(args) != 0 {
  233. return fmt.Errorf("usage: digraph nodes")
  234. }
  235. nodes := make(nodeset)
  236. for label := range g {
  237. nodes[label] = true
  238. }
  239. nodes.sort().println("\n")
  240. case "degree":
  241. if len(args) != 0 {
  242. return fmt.Errorf("usage: digraph degree")
  243. }
  244. nodes := make(nodeset)
  245. for label := range g {
  246. nodes[label] = true
  247. }
  248. rev := g.transpose()
  249. for _, label := range nodes.sort() {
  250. fmt.Fprintf(stdout, "%d\t%d\t%s\n", len(rev[label]), len(g[label]), label)
  251. }
  252. case "succs", "preds":
  253. if len(args) == 0 {
  254. return fmt.Errorf("usage: digraph %s <label> ...", cmd)
  255. }
  256. g := g
  257. if cmd == "preds" {
  258. g = g.transpose()
  259. }
  260. result := make(nodeset)
  261. for _, root := range args {
  262. edges := g[root]
  263. if edges == nil {
  264. return fmt.Errorf("no such node %q", root)
  265. }
  266. result.addAll(edges)
  267. }
  268. result.sort().println("\n")
  269. case "forward", "reverse":
  270. if len(args) == 0 {
  271. return fmt.Errorf("usage: digraph %s <label> ...", cmd)
  272. }
  273. roots := make(nodeset)
  274. for _, root := range args {
  275. if g[root] == nil {
  276. return fmt.Errorf("no such node %q", root)
  277. }
  278. roots[root] = true
  279. }
  280. g := g
  281. if cmd == "reverse" {
  282. g = g.transpose()
  283. }
  284. g.reachableFrom(roots).sort().println("\n")
  285. case "somepath":
  286. if len(args) != 2 {
  287. return fmt.Errorf("usage: digraph somepath <from> <to>")
  288. }
  289. from, to := args[0], args[1]
  290. if g[from] == nil {
  291. return fmt.Errorf("no such 'from' node %q", from)
  292. }
  293. if g[to] == nil {
  294. return fmt.Errorf("no such 'to' node %q", to)
  295. }
  296. seen := make(nodeset)
  297. var visit func(path nodelist, label string) bool
  298. visit = func(path nodelist, label string) bool {
  299. if !seen[label] {
  300. seen[label] = true
  301. if label == to {
  302. append(path, label).println("\n")
  303. return true // unwind
  304. }
  305. for e := range g[label] {
  306. if visit(append(path, label), e) {
  307. return true
  308. }
  309. }
  310. }
  311. return false
  312. }
  313. if !visit(make(nodelist, 0, 100), from) {
  314. return fmt.Errorf("no path from %q to %q", args[0], args[1])
  315. }
  316. case "allpaths":
  317. if len(args) != 2 {
  318. return fmt.Errorf("usage: digraph allpaths <from> <to>")
  319. }
  320. from, to := args[0], args[1]
  321. if g[from] == nil {
  322. return fmt.Errorf("no such 'from' node %q", from)
  323. }
  324. if g[to] == nil {
  325. return fmt.Errorf("no such 'to' node %q", to)
  326. }
  327. seen := make(nodeset) // value of seen[x] indicates whether x is on some path to 'to'
  328. var visit func(label string) bool
  329. visit = func(label string) bool {
  330. reachesTo, ok := seen[label]
  331. if !ok {
  332. reachesTo = label == to
  333. seen[label] = reachesTo
  334. for e := range g[label] {
  335. if visit(e) {
  336. reachesTo = true
  337. }
  338. }
  339. seen[label] = reachesTo
  340. }
  341. return reachesTo
  342. }
  343. if !visit(from) {
  344. return fmt.Errorf("no path from %q to %q", from, to)
  345. }
  346. for label, reachesTo := range seen {
  347. if !reachesTo {
  348. delete(seen, label)
  349. }
  350. }
  351. seen.sort().println("\n")
  352. case "sccs":
  353. if len(args) != 0 {
  354. return fmt.Errorf("usage: digraph sccs")
  355. }
  356. for _, scc := range g.sccs() {
  357. scc.sort().println(" ")
  358. }
  359. case "scc":
  360. if len(args) != 1 {
  361. return fmt.Errorf("usage: digraph scc <label>")
  362. }
  363. label := args[0]
  364. if g[label] == nil {
  365. return fmt.Errorf("no such node %q", label)
  366. }
  367. for _, scc := range g.sccs() {
  368. if scc[label] {
  369. scc.sort().println("\n")
  370. break
  371. }
  372. }
  373. default:
  374. return fmt.Errorf("no such command %q", cmd)
  375. }
  376. return nil
  377. }
  378. // -- Utilities --------------------------------------------------------
  379. // split splits a line into words, which are generally separated by
  380. // spaces, but Go-style double-quoted string literals are also supported.
  381. // (This approximates the behaviour of the Bourne shell.)
  382. //
  383. // `one "two three"` -> ["one" "two three"]
  384. // `a"\n"b` -> ["a\nb"]
  385. //
  386. func split(line string) ([]string, error) {
  387. var (
  388. words []string
  389. inWord bool
  390. current bytes.Buffer
  391. )
  392. for len(line) > 0 {
  393. r, size := utf8.DecodeRuneInString(line)
  394. if unicode.IsSpace(r) {
  395. if inWord {
  396. words = append(words, current.String())
  397. current.Reset()
  398. inWord = false
  399. }
  400. } else if r == '"' {
  401. var ok bool
  402. size, ok = quotedLength(line)
  403. if !ok {
  404. return nil, errors.New("invalid quotation")
  405. }
  406. s, err := strconv.Unquote(line[:size])
  407. if err != nil {
  408. return nil, err
  409. }
  410. current.WriteString(s)
  411. inWord = true
  412. } else {
  413. current.WriteRune(r)
  414. inWord = true
  415. }
  416. line = line[size:]
  417. }
  418. if inWord {
  419. words = append(words, current.String())
  420. }
  421. return words, nil
  422. }
  423. // quotedLength returns the length in bytes of the prefix of input that
  424. // contain a possibly-valid double-quoted Go string literal.
  425. //
  426. // On success, n is at least two (""); input[:n] may be passed to
  427. // strconv.Unquote to interpret its value, and input[n:] contains the
  428. // rest of the input.
  429. //
  430. // On failure, quotedLength returns false, and the entire input can be
  431. // passed to strconv.Unquote if an informative error message is desired.
  432. //
  433. // quotedLength does not and need not detect all errors, such as
  434. // invalid hex or octal escape sequences, since it assumes
  435. // strconv.Unquote will be applied to the prefix. It guarantees only
  436. // that if there is a prefix of input containing a valid string literal,
  437. // its length is returned.
  438. //
  439. // TODO(adonovan): move this into a strconv-like utility package.
  440. //
  441. func quotedLength(input string) (n int, ok bool) {
  442. var offset int
  443. // next returns the rune at offset, or -1 on EOF.
  444. // offset advances to just after that rune.
  445. next := func() rune {
  446. if offset < len(input) {
  447. r, size := utf8.DecodeRuneInString(input[offset:])
  448. offset += size
  449. return r
  450. }
  451. return -1
  452. }
  453. if next() != '"' {
  454. return // error: not a quotation
  455. }
  456. for {
  457. r := next()
  458. if r == '\n' || r < 0 {
  459. return // error: string literal not terminated
  460. }
  461. if r == '"' {
  462. return offset, true // success
  463. }
  464. if r == '\\' {
  465. var skip int
  466. switch next() {
  467. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
  468. skip = 0
  469. case '0', '1', '2', '3', '4', '5', '6', '7':
  470. skip = 2
  471. case 'x':
  472. skip = 2
  473. case 'u':
  474. skip = 4
  475. case 'U':
  476. skip = 8
  477. default:
  478. return // error: invalid escape
  479. }
  480. for i := 0; i < skip; i++ {
  481. next()
  482. }
  483. }
  484. }
  485. }