You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

709 lines
12 KiB

  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. //go:generate go run gen.go
  6. //go:generate go run gen.go -test
  7. package main
  8. import (
  9. "bytes"
  10. "flag"
  11. "fmt"
  12. "go/format"
  13. "io/ioutil"
  14. "math/rand"
  15. "os"
  16. "sort"
  17. "strings"
  18. )
  19. // identifier converts s to a Go exported identifier.
  20. // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
  21. func identifier(s string) string {
  22. b := make([]byte, 0, len(s))
  23. cap := true
  24. for _, c := range s {
  25. if c == '-' {
  26. cap = true
  27. continue
  28. }
  29. if cap && 'a' <= c && c <= 'z' {
  30. c -= 'a' - 'A'
  31. }
  32. cap = false
  33. b = append(b, byte(c))
  34. }
  35. return string(b)
  36. }
  37. var test = flag.Bool("test", false, "generate table_test.go")
  38. func genFile(name string, buf *bytes.Buffer) {
  39. b, err := format.Source(buf.Bytes())
  40. if err != nil {
  41. fmt.Fprintln(os.Stderr, err)
  42. os.Exit(1)
  43. }
  44. if err := ioutil.WriteFile(name, b, 0644); err != nil {
  45. fmt.Fprintln(os.Stderr, err)
  46. os.Exit(1)
  47. }
  48. }
  49. func main() {
  50. flag.Parse()
  51. var all []string
  52. all = append(all, elements...)
  53. all = append(all, attributes...)
  54. all = append(all, eventHandlers...)
  55. all = append(all, extra...)
  56. sort.Strings(all)
  57. // uniq - lists have dups
  58. w := 0
  59. for _, s := range all {
  60. if w == 0 || all[w-1] != s {
  61. all[w] = s
  62. w++
  63. }
  64. }
  65. all = all[:w]
  66. if *test {
  67. var buf bytes.Buffer
  68. fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
  69. fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
  70. fmt.Fprintln(&buf, "package atom\n")
  71. fmt.Fprintln(&buf, "var testAtomList = []string{")
  72. for _, s := range all {
  73. fmt.Fprintf(&buf, "\t%q,\n", s)
  74. }
  75. fmt.Fprintln(&buf, "}")
  76. genFile("table_test.go", &buf)
  77. return
  78. }
  79. // Find hash that minimizes table size.
  80. var best *table
  81. for i := 0; i < 1000000; i++ {
  82. if best != nil && 1<<(best.k-1) < len(all) {
  83. break
  84. }
  85. h := rand.Uint32()
  86. for k := uint(0); k <= 16; k++ {
  87. if best != nil && k >= best.k {
  88. break
  89. }
  90. var t table
  91. if t.init(h, k, all) {
  92. best = &t
  93. break
  94. }
  95. }
  96. }
  97. if best == nil {
  98. fmt.Fprintf(os.Stderr, "failed to construct string table\n")
  99. os.Exit(1)
  100. }
  101. // Lay out strings, using overlaps when possible.
  102. layout := append([]string{}, all...)
  103. // Remove strings that are substrings of other strings
  104. for changed := true; changed; {
  105. changed = false
  106. for i, s := range layout {
  107. if s == "" {
  108. continue
  109. }
  110. for j, t := range layout {
  111. if i != j && t != "" && strings.Contains(s, t) {
  112. changed = true
  113. layout[j] = ""
  114. }
  115. }
  116. }
  117. }
  118. // Join strings where one suffix matches another prefix.
  119. for {
  120. // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
  121. // maximizing overlap length k.
  122. besti := -1
  123. bestj := -1
  124. bestk := 0
  125. for i, s := range layout {
  126. if s == "" {
  127. continue
  128. }
  129. for j, t := range layout {
  130. if i == j {
  131. continue
  132. }
  133. for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
  134. if s[len(s)-k:] == t[:k] {
  135. besti = i
  136. bestj = j
  137. bestk = k
  138. }
  139. }
  140. }
  141. }
  142. if bestk > 0 {
  143. layout[besti] += layout[bestj][bestk:]
  144. layout[bestj] = ""
  145. continue
  146. }
  147. break
  148. }
  149. text := strings.Join(layout, "")
  150. atom := map[string]uint32{}
  151. for _, s := range all {
  152. off := strings.Index(text, s)
  153. if off < 0 {
  154. panic("lost string " + s)
  155. }
  156. atom[s] = uint32(off<<8 | len(s))
  157. }
  158. var buf bytes.Buffer
  159. // Generate the Go code.
  160. fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
  161. fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
  162. fmt.Fprintln(&buf, "package atom\n\nconst (")
  163. // compute max len
  164. maxLen := 0
  165. for _, s := range all {
  166. if maxLen < len(s) {
  167. maxLen = len(s)
  168. }
  169. fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
  170. }
  171. fmt.Fprintln(&buf, ")\n")
  172. fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
  173. fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
  174. fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
  175. for i, s := range best.tab {
  176. if s == "" {
  177. continue
  178. }
  179. fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
  180. }
  181. fmt.Fprintf(&buf, "}\n")
  182. datasize := (1 << best.k) * 4
  183. fmt.Fprintln(&buf, "const atomText =")
  184. textsize := len(text)
  185. for len(text) > 60 {
  186. fmt.Fprintf(&buf, "\t%q +\n", text[:60])
  187. text = text[60:]
  188. }
  189. fmt.Fprintf(&buf, "\t%q\n\n", text)
  190. genFile("table.go", &buf)
  191. fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
  192. }
  193. type byLen []string
  194. func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
  195. func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  196. func (x byLen) Len() int { return len(x) }
  197. // fnv computes the FNV hash with an arbitrary starting value h.
  198. func fnv(h uint32, s string) uint32 {
  199. for i := 0; i < len(s); i++ {
  200. h ^= uint32(s[i])
  201. h *= 16777619
  202. }
  203. return h
  204. }
  205. // A table represents an attempt at constructing the lookup table.
  206. // The lookup table uses cuckoo hashing, meaning that each string
  207. // can be found in one of two positions.
  208. type table struct {
  209. h0 uint32
  210. k uint
  211. mask uint32
  212. tab []string
  213. }
  214. // hash returns the two hashes for s.
  215. func (t *table) hash(s string) (h1, h2 uint32) {
  216. h := fnv(t.h0, s)
  217. h1 = h & t.mask
  218. h2 = (h >> 16) & t.mask
  219. return
  220. }
  221. // init initializes the table with the given parameters.
  222. // h0 is the initial hash value,
  223. // k is the number of bits of hash value to use, and
  224. // x is the list of strings to store in the table.
  225. // init returns false if the table cannot be constructed.
  226. func (t *table) init(h0 uint32, k uint, x []string) bool {
  227. t.h0 = h0
  228. t.k = k
  229. t.tab = make([]string, 1<<k)
  230. t.mask = 1<<k - 1
  231. for _, s := range x {
  232. if !t.insert(s) {
  233. return false
  234. }
  235. }
  236. return true
  237. }
  238. // insert inserts s in the table.
  239. func (t *table) insert(s string) bool {
  240. h1, h2 := t.hash(s)
  241. if t.tab[h1] == "" {
  242. t.tab[h1] = s
  243. return true
  244. }
  245. if t.tab[h2] == "" {
  246. t.tab[h2] = s
  247. return true
  248. }
  249. if t.push(h1, 0) {
  250. t.tab[h1] = s
  251. return true
  252. }
  253. if t.push(h2, 0) {
  254. t.tab[h2] = s
  255. return true
  256. }
  257. return false
  258. }
  259. // push attempts to push aside the entry in slot i.
  260. func (t *table) push(i uint32, depth int) bool {
  261. if depth > len(t.tab) {
  262. return false
  263. }
  264. s := t.tab[i]
  265. h1, h2 := t.hash(s)
  266. j := h1 + h2 - i
  267. if t.tab[j] != "" && !t.push(j, depth+1) {
  268. return false
  269. }
  270. t.tab[j] = s
  271. return true
  272. }
  273. // The lists of element names and attribute keys were taken from
  274. // https://html.spec.whatwg.org/multipage/indices.html#index
  275. // as of the "HTML Living Standard - Last Updated 18 September 2017" version.
  276. // "command", "keygen" and "menuitem" have been removed from the spec,
  277. // but are kept here for backwards compatibility.
  278. var elements = []string{
  279. "a",
  280. "abbr",
  281. "address",
  282. "area",
  283. "article",
  284. "aside",
  285. "audio",
  286. "b",
  287. "base",
  288. "bdi",
  289. "bdo",
  290. "blockquote",
  291. "body",
  292. "br",
  293. "button",
  294. "canvas",
  295. "caption",
  296. "cite",
  297. "code",
  298. "col",
  299. "colgroup",
  300. "command",
  301. "data",
  302. "datalist",
  303. "dd",
  304. "del",
  305. "details",
  306. "dfn",
  307. "dialog",
  308. "div",
  309. "dl",
  310. "dt",
  311. "em",
  312. "embed",
  313. "fieldset",
  314. "figcaption",
  315. "figure",
  316. "footer",
  317. "form",
  318. "h1",
  319. "h2",
  320. "h3",
  321. "h4",
  322. "h5",
  323. "h6",
  324. "head",
  325. "header",
  326. "hgroup",
  327. "hr",
  328. "html",
  329. "i",
  330. "iframe",
  331. "img",
  332. "input",
  333. "ins",
  334. "kbd",
  335. "keygen",
  336. "label",
  337. "legend",
  338. "li",
  339. "link",
  340. "main",
  341. "map",
  342. "mark",
  343. "menu",
  344. "menuitem",
  345. "meta",
  346. "meter",
  347. "nav",
  348. "noscript",
  349. "object",
  350. "ol",
  351. "optgroup",
  352. "option",
  353. "output",
  354. "p",
  355. "param",
  356. "picture",
  357. "pre",
  358. "progress",
  359. "q",
  360. "rp",
  361. "rt",
  362. "ruby",
  363. "s",
  364. "samp",
  365. "script",
  366. "section",
  367. "select",
  368. "slot",
  369. "small",
  370. "source",
  371. "span",
  372. "strong",
  373. "style",
  374. "sub",
  375. "summary",
  376. "sup",
  377. "table",
  378. "tbody",
  379. "td",
  380. "template",
  381. "textarea",
  382. "tfoot",
  383. "th",
  384. "thead",
  385. "time",
  386. "title",
  387. "tr",
  388. "track",
  389. "u",
  390. "ul",
  391. "var",
  392. "video",
  393. "wbr",
  394. }
  395. // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
  396. //
  397. // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
  398. // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
  399. // but are kept here for backwards compatibility.
  400. var attributes = []string{
  401. "abbr",
  402. "accept",
  403. "accept-charset",
  404. "accesskey",
  405. "action",
  406. "allowfullscreen",
  407. "allowpaymentrequest",
  408. "allowusermedia",
  409. "alt",
  410. "as",
  411. "async",
  412. "autocomplete",
  413. "autofocus",
  414. "autoplay",
  415. "challenge",
  416. "charset",
  417. "checked",
  418. "cite",
  419. "class",
  420. "color",
  421. "cols",
  422. "colspan",
  423. "command",
  424. "content",
  425. "contenteditable",
  426. "contextmenu",
  427. "controls",
  428. "coords",
  429. "crossorigin",
  430. "data",
  431. "datetime",
  432. "default",
  433. "defer",
  434. "dir",
  435. "dirname",
  436. "disabled",
  437. "download",
  438. "draggable",
  439. "dropzone",
  440. "enctype",
  441. "for",
  442. "form",
  443. "formaction",
  444. "formenctype",
  445. "formmethod",
  446. "formnovalidate",
  447. "formtarget",
  448. "headers",
  449. "height",
  450. "hidden",
  451. "high",
  452. "href",
  453. "hreflang",
  454. "http-equiv",
  455. "icon",
  456. "id",
  457. "inputmode",
  458. "integrity",
  459. "is",
  460. "ismap",
  461. "itemid",
  462. "itemprop",
  463. "itemref",
  464. "itemscope",
  465. "itemtype",
  466. "keytype",
  467. "kind",
  468. "label",
  469. "lang",
  470. "list",
  471. "loop",
  472. "low",
  473. "manifest",
  474. "max",
  475. "maxlength",
  476. "media",
  477. "mediagroup",
  478. "method",
  479. "min",
  480. "minlength",
  481. "multiple",
  482. "muted",
  483. "name",
  484. "nomodule",
  485. "nonce",
  486. "novalidate",
  487. "open",
  488. "optimum",
  489. "pattern",
  490. "ping",
  491. "placeholder",
  492. "playsinline",
  493. "poster",
  494. "preload",
  495. "radiogroup",
  496. "readonly",
  497. "referrerpolicy",
  498. "rel",
  499. "required",
  500. "reversed",
  501. "rows",
  502. "rowspan",
  503. "sandbox",
  504. "spellcheck",
  505. "scope",
  506. "scoped",
  507. "seamless",
  508. "selected",
  509. "shape",
  510. "size",
  511. "sizes",
  512. "sortable",
  513. "sorted",
  514. "slot",
  515. "span",
  516. "spellcheck",
  517. "src",
  518. "srcdoc",
  519. "srclang",
  520. "srcset",
  521. "start",
  522. "step",
  523. "style",
  524. "tabindex",
  525. "target",
  526. "title",
  527. "translate",
  528. "type",
  529. "typemustmatch",
  530. "updateviacache",
  531. "usemap",
  532. "value",
  533. "width",
  534. "workertype",
  535. "wrap",
  536. }
  537. // "onautocomplete", "onautocompleteerror", "onmousewheel",
  538. // "onshow" and "onsort" have been removed from the spec,
  539. // but are kept here for backwards compatibility.
  540. var eventHandlers = []string{
  541. "onabort",
  542. "onautocomplete",
  543. "onautocompleteerror",
  544. "onauxclick",
  545. "onafterprint",
  546. "onbeforeprint",
  547. "onbeforeunload",
  548. "onblur",
  549. "oncancel",
  550. "oncanplay",
  551. "oncanplaythrough",
  552. "onchange",
  553. "onclick",
  554. "onclose",
  555. "oncontextmenu",
  556. "oncopy",
  557. "oncuechange",
  558. "oncut",
  559. "ondblclick",
  560. "ondrag",
  561. "ondragend",
  562. "ondragenter",
  563. "ondragexit",
  564. "ondragleave",
  565. "ondragover",
  566. "ondragstart",
  567. "ondrop",
  568. "ondurationchange",
  569. "onemptied",
  570. "onended",
  571. "onerror",
  572. "onfocus",
  573. "onhashchange",
  574. "oninput",
  575. "oninvalid",
  576. "onkeydown",
  577. "onkeypress",
  578. "onkeyup",
  579. "onlanguagechange",
  580. "onload",
  581. "onloadeddata",
  582. "onloadedmetadata",
  583. "onloadend",
  584. "onloadstart",
  585. "onmessage",
  586. "onmessageerror",
  587. "onmousedown",
  588. "onmouseenter",
  589. "onmouseleave",
  590. "onmousemove",
  591. "onmouseout",
  592. "onmouseover",
  593. "onmouseup",
  594. "onmousewheel",
  595. "onwheel",
  596. "onoffline",
  597. "ononline",
  598. "onpagehide",
  599. "onpageshow",
  600. "onpaste",
  601. "onpause",
  602. "onplay",
  603. "onplaying",
  604. "onpopstate",
  605. "onprogress",
  606. "onratechange",
  607. "onreset",
  608. "onresize",
  609. "onrejectionhandled",
  610. "onscroll",
  611. "onsecuritypolicyviolation",
  612. "onseeked",
  613. "onseeking",
  614. "onselect",
  615. "onshow",
  616. "onsort",
  617. "onstalled",
  618. "onstorage",
  619. "onsubmit",
  620. "onsuspend",
  621. "ontimeupdate",
  622. "ontoggle",
  623. "onunhandledrejection",
  624. "onunload",
  625. "onvolumechange",
  626. "onwaiting",
  627. }
  628. // extra are ad-hoc values not covered by any of the lists above.
  629. var extra = []string{
  630. "align",
  631. "annotation",
  632. "annotation-xml",
  633. "applet",
  634. "basefont",
  635. "bgsound",
  636. "big",
  637. "blink",
  638. "center",
  639. "color",
  640. "desc",
  641. "face",
  642. "font",
  643. "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
  644. "foreignobject",
  645. "frame",
  646. "frameset",
  647. "image",
  648. "isindex",
  649. "listing",
  650. "malignmark",
  651. "marquee",
  652. "math",
  653. "mglyph",
  654. "mi",
  655. "mn",
  656. "mo",
  657. "ms",
  658. "mtext",
  659. "nobr",
  660. "noembed",
  661. "noframes",
  662. "plaintext",
  663. "prompt",
  664. "public",
  665. "spacer",
  666. "strike",
  667. "svg",
  668. "system",
  669. "tt",
  670. "xmp",
  671. }