You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1353 lines
39 KiB

  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package snappy
  5. import (
  6. "bytes"
  7. "encoding/binary"
  8. "flag"
  9. "fmt"
  10. "io"
  11. "io/ioutil"
  12. "math/rand"
  13. "net/http"
  14. "os"
  15. "os/exec"
  16. "path/filepath"
  17. "runtime"
  18. "strings"
  19. "testing"
  20. )
  21. var (
  22. download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
  23. testdataDir = flag.String("testdataDir", "testdata", "Directory containing the test data")
  24. benchdataDir = flag.String("benchdataDir", "testdata/bench", "Directory containing the benchmark data")
  25. )
  26. // goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by
  27. // Go's encoder matches byte-for-byte what the C++ snappy encoder produces, on
  28. // this GOARCH. There is more than one valid encoding of any given input, and
  29. // there is more than one good algorithm along the frontier of trading off
  30. // throughput for output size. Nonetheless, we presume that the C++ encoder's
  31. // algorithm is a good one and has been tested on a wide range of inputs, so
  32. // matching that exactly should mean that the Go encoder's algorithm is also
  33. // good, without needing to gather our own corpus of test data.
  34. //
  35. // The exact algorithm used by the C++ code is potentially endian dependent, as
  36. // it puns a byte pointer to a uint32 pointer to load, hash and compare 4 bytes
  37. // at a time. The Go implementation is endian agnostic, in that its output is
  38. // the same (as little-endian C++ code), regardless of the CPU's endianness.
  39. //
  40. // Thus, when comparing Go's output to C++ output generated beforehand, such as
  41. // the "testdata/pi.txt.rawsnappy" file generated by C++ code on a little-
  42. // endian system, we can run that test regardless of the runtime.GOARCH value.
  43. //
  44. // When comparing Go's output to dynamically generated C++ output, i.e. the
  45. // result of fork/exec'ing a C++ program, we can run that test only on
  46. // little-endian systems, because the C++ output might be different on
  47. // big-endian systems. The runtime package doesn't export endianness per se,
  48. // but we can restrict this match-C++ test to common little-endian systems.
  49. const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm"
  50. func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
  51. got := maxEncodedLenOfMaxBlockSize
  52. want := MaxEncodedLen(maxBlockSize)
  53. if got != want {
  54. t.Fatalf("got %d, want %d", got, want)
  55. }
  56. }
  57. func cmp(a, b []byte) error {
  58. if bytes.Equal(a, b) {
  59. return nil
  60. }
  61. if len(a) != len(b) {
  62. return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
  63. }
  64. for i := range a {
  65. if a[i] != b[i] {
  66. return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
  67. }
  68. }
  69. return nil
  70. }
  71. func roundtrip(b, ebuf, dbuf []byte) error {
  72. d, err := Decode(dbuf, Encode(ebuf, b))
  73. if err != nil {
  74. return fmt.Errorf("decoding error: %v", err)
  75. }
  76. if err := cmp(d, b); err != nil {
  77. return fmt.Errorf("roundtrip mismatch: %v", err)
  78. }
  79. return nil
  80. }
  81. func TestEmpty(t *testing.T) {
  82. if err := roundtrip(nil, nil, nil); err != nil {
  83. t.Fatal(err)
  84. }
  85. }
  86. func TestSmallCopy(t *testing.T) {
  87. for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  88. for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
  89. for i := 0; i < 32; i++ {
  90. s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
  91. if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
  92. t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
  93. }
  94. }
  95. }
  96. }
  97. }
  98. func TestSmallRand(t *testing.T) {
  99. rng := rand.New(rand.NewSource(1))
  100. for n := 1; n < 20000; n += 23 {
  101. b := make([]byte, n)
  102. for i := range b {
  103. b[i] = uint8(rng.Intn(256))
  104. }
  105. if err := roundtrip(b, nil, nil); err != nil {
  106. t.Fatal(err)
  107. }
  108. }
  109. }
  110. func TestSmallRegular(t *testing.T) {
  111. for n := 1; n < 20000; n += 23 {
  112. b := make([]byte, n)
  113. for i := range b {
  114. b[i] = uint8(i%10 + 'a')
  115. }
  116. if err := roundtrip(b, nil, nil); err != nil {
  117. t.Fatal(err)
  118. }
  119. }
  120. }
  121. func TestInvalidVarint(t *testing.T) {
  122. testCases := []struct {
  123. desc string
  124. input string
  125. }{{
  126. "invalid varint, final byte has continuation bit set",
  127. "\xff",
  128. }, {
  129. "invalid varint, value overflows uint64",
  130. "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
  131. }, {
  132. // https://github.com/google/snappy/blob/master/format_description.txt
  133. // says that "the stream starts with the uncompressed length [as a
  134. // varint] (up to a maximum of 2^32 - 1)".
  135. "valid varint (as uint64), but value overflows uint32",
  136. "\x80\x80\x80\x80\x10",
  137. }}
  138. for _, tc := range testCases {
  139. input := []byte(tc.input)
  140. if _, err := DecodedLen(input); err != ErrCorrupt {
  141. t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
  142. }
  143. if _, err := Decode(nil, input); err != ErrCorrupt {
  144. t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
  145. }
  146. }
  147. }
  148. func TestDecode(t *testing.T) {
  149. lit40Bytes := make([]byte, 40)
  150. for i := range lit40Bytes {
  151. lit40Bytes[i] = byte(i)
  152. }
  153. lit40 := string(lit40Bytes)
  154. testCases := []struct {
  155. desc string
  156. input string
  157. want string
  158. wantErr error
  159. }{{
  160. `decodedLen=0; valid input`,
  161. "\x00",
  162. "",
  163. nil,
  164. }, {
  165. `decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
  166. "\x03" + "\x08\xff\xff\xff",
  167. "\xff\xff\xff",
  168. nil,
  169. }, {
  170. `decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
  171. "\x02" + "\x08\xff\xff\xff",
  172. "",
  173. ErrCorrupt,
  174. }, {
  175. `decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
  176. "\x03" + "\x08\xff\xff",
  177. "",
  178. ErrCorrupt,
  179. }, {
  180. `decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
  181. "\x28" + "\x9c" + lit40,
  182. lit40,
  183. nil,
  184. }, {
  185. `decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
  186. "\x01" + "\xf0",
  187. "",
  188. ErrCorrupt,
  189. }, {
  190. `decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
  191. "\x03" + "\xf0\x02\xff\xff\xff",
  192. "\xff\xff\xff",
  193. nil,
  194. }, {
  195. `decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
  196. "\x01" + "\xf4\x00",
  197. "",
  198. ErrCorrupt,
  199. }, {
  200. `decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
  201. "\x03" + "\xf4\x02\x00\xff\xff\xff",
  202. "\xff\xff\xff",
  203. nil,
  204. }, {
  205. `decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
  206. "\x01" + "\xf8\x00\x00",
  207. "",
  208. ErrCorrupt,
  209. }, {
  210. `decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
  211. "\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
  212. "\xff\xff\xff",
  213. nil,
  214. }, {
  215. `decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
  216. "\x01" + "\xfc\x00\x00\x00",
  217. "",
  218. ErrCorrupt,
  219. }, {
  220. `decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
  221. "\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  222. "",
  223. ErrCorrupt,
  224. }, {
  225. `decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
  226. "\x04" + "\xfc\x02\x00\x00\x00\xff",
  227. "",
  228. ErrCorrupt,
  229. }, {
  230. `decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
  231. "\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
  232. "\xff\xff\xff",
  233. nil,
  234. }, {
  235. `decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
  236. "\x04" + "\x01",
  237. "",
  238. ErrCorrupt,
  239. }, {
  240. `decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
  241. "\x04" + "\x02\x00",
  242. "",
  243. ErrCorrupt,
  244. }, {
  245. `decodedLen=4; tagCopy4, 4 extra length|offset bytes; not enough extra bytes`,
  246. "\x04" + "\x03\x00\x00\x00",
  247. "",
  248. ErrCorrupt,
  249. }, {
  250. `decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
  251. "\x04" + "\x0cabcd",
  252. "abcd",
  253. nil,
  254. }, {
  255. `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
  256. "\x0d" + "\x0cabcd" + "\x15\x04",
  257. "abcdabcdabcda",
  258. nil,
  259. }, {
  260. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
  261. "\x08" + "\x0cabcd" + "\x01\x04",
  262. "abcdabcd",
  263. nil,
  264. }, {
  265. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
  266. "\x08" + "\x0cabcd" + "\x01\x02",
  267. "abcdcdcd",
  268. nil,
  269. }, {
  270. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
  271. "\x08" + "\x0cabcd" + "\x01\x01",
  272. "abcddddd",
  273. nil,
  274. }, {
  275. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
  276. "\x08" + "\x0cabcd" + "\x01\x00",
  277. "",
  278. ErrCorrupt,
  279. }, {
  280. `decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
  281. "\x09" + "\x0cabcd" + "\x01\x04",
  282. "",
  283. ErrCorrupt,
  284. }, {
  285. `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
  286. "\x08" + "\x0cabcd" + "\x01\x05",
  287. "",
  288. ErrCorrupt,
  289. }, {
  290. `decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
  291. "\x07" + "\x0cabcd" + "\x01\x04",
  292. "",
  293. ErrCorrupt,
  294. }, {
  295. `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
  296. "\x06" + "\x0cabcd" + "\x06\x03\x00",
  297. "abcdbc",
  298. nil,
  299. }, {
  300. `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy4; length=2 offset=3; valid input`,
  301. "\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
  302. "abcdbc",
  303. nil,
  304. }}
  305. const (
  306. // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
  307. // not present in either the input or the output. It is written to dBuf
  308. // to check that Decode does not write bytes past the end of
  309. // dBuf[:dLen].
  310. //
  311. // The magic number 37 was chosen because it is prime. A more 'natural'
  312. // number like 32 might lead to a false negative if, for example, a
  313. // byte was incorrectly copied 4*8 bytes later.
  314. notPresentBase = 0xa0
  315. notPresentLen = 37
  316. )
  317. var dBuf [100]byte
  318. loop:
  319. for i, tc := range testCases {
  320. input := []byte(tc.input)
  321. for _, x := range input {
  322. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  323. t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
  324. continue loop
  325. }
  326. }
  327. dLen, n := binary.Uvarint(input)
  328. if n <= 0 {
  329. t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
  330. continue
  331. }
  332. if dLen > uint64(len(dBuf)) {
  333. t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
  334. continue
  335. }
  336. for j := range dBuf {
  337. dBuf[j] = byte(notPresentBase + j%notPresentLen)
  338. }
  339. g, gotErr := Decode(dBuf[:], input)
  340. if got := string(g); got != tc.want || gotErr != tc.wantErr {
  341. t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v",
  342. i, tc.desc, got, gotErr, tc.want, tc.wantErr)
  343. continue
  344. }
  345. for j, x := range dBuf {
  346. if uint64(j) < dLen {
  347. continue
  348. }
  349. if w := byte(notPresentBase + j%notPresentLen); x != w {
  350. t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
  351. i, tc.desc, j, x, w, dBuf)
  352. continue loop
  353. }
  354. }
  355. }
  356. }
  357. func TestDecodeCopy4(t *testing.T) {
  358. dots := strings.Repeat(".", 65536)
  359. input := strings.Join([]string{
  360. "\x89\x80\x04", // decodedLen = 65545.
  361. "\x0cpqrs", // 4-byte literal "pqrs".
  362. "\xf4\xff\xff" + dots, // 65536-byte literal dots.
  363. "\x13\x04\x00\x01\x00", // tagCopy4; length=5 offset=65540.
  364. }, "")
  365. gotBytes, err := Decode(nil, []byte(input))
  366. if err != nil {
  367. t.Fatal(err)
  368. }
  369. got := string(gotBytes)
  370. want := "pqrs" + dots + "pqrs."
  371. if len(got) != len(want) {
  372. t.Fatalf("got %d bytes, want %d", len(got), len(want))
  373. }
  374. if got != want {
  375. for i := 0; i < len(got); i++ {
  376. if g, w := got[i], want[i]; g != w {
  377. t.Fatalf("byte #%d: got %#02x, want %#02x", i, g, w)
  378. }
  379. }
  380. }
  381. }
  382. // TestDecodeLengthOffset tests decoding an encoding of the form literal +
  383. // copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
  384. func TestDecodeLengthOffset(t *testing.T) {
  385. const (
  386. prefix = "abcdefghijklmnopqr"
  387. suffix = "ABCDEFGHIJKLMNOPQR"
  388. // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
  389. // not present in either the input or the output. It is written to
  390. // gotBuf to check that Decode does not write bytes past the end of
  391. // gotBuf[:totalLen].
  392. //
  393. // The magic number 37 was chosen because it is prime. A more 'natural'
  394. // number like 32 might lead to a false negative if, for example, a
  395. // byte was incorrectly copied 4*8 bytes later.
  396. notPresentBase = 0xa0
  397. notPresentLen = 37
  398. )
  399. var gotBuf, wantBuf, inputBuf [128]byte
  400. for length := 1; length <= 18; length++ {
  401. for offset := 1; offset <= 18; offset++ {
  402. loop:
  403. for suffixLen := 0; suffixLen <= 18; suffixLen++ {
  404. totalLen := len(prefix) + length + suffixLen
  405. inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
  406. inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
  407. inputLen++
  408. inputLen += copy(inputBuf[inputLen:], prefix)
  409. inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
  410. inputBuf[inputLen+1] = byte(offset)
  411. inputBuf[inputLen+2] = 0x00
  412. inputLen += 3
  413. if suffixLen > 0 {
  414. inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
  415. inputLen++
  416. inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
  417. }
  418. input := inputBuf[:inputLen]
  419. for i := range gotBuf {
  420. gotBuf[i] = byte(notPresentBase + i%notPresentLen)
  421. }
  422. got, err := Decode(gotBuf[:], input)
  423. if err != nil {
  424. t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
  425. continue
  426. }
  427. wantLen := 0
  428. wantLen += copy(wantBuf[wantLen:], prefix)
  429. for i := 0; i < length; i++ {
  430. wantBuf[wantLen] = wantBuf[wantLen-offset]
  431. wantLen++
  432. }
  433. wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
  434. want := wantBuf[:wantLen]
  435. for _, x := range input {
  436. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  437. t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
  438. length, offset, suffixLen, x, input)
  439. continue loop
  440. }
  441. }
  442. for i, x := range gotBuf {
  443. if i < totalLen {
  444. continue
  445. }
  446. if w := byte(notPresentBase + i%notPresentLen); x != w {
  447. t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
  448. "Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
  449. length, offset, suffixLen, totalLen, i, x, w, gotBuf)
  450. continue loop
  451. }
  452. }
  453. for _, x := range want {
  454. if notPresentBase <= x && x < notPresentBase+notPresentLen {
  455. t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
  456. length, offset, suffixLen, x, want)
  457. continue loop
  458. }
  459. }
  460. if !bytes.Equal(got, want) {
  461. t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x",
  462. length, offset, suffixLen, input, got, want)
  463. continue
  464. }
  465. }
  466. }
  467. }
  468. }
  469. const (
  470. goldenText = "Mark.Twain-Tom.Sawyer.txt"
  471. goldenCompressed = goldenText + ".rawsnappy"
  472. )
  473. func TestDecodeGoldenInput(t *testing.T) {
  474. tDir := filepath.FromSlash(*testdataDir)
  475. src, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
  476. if err != nil {
  477. t.Fatalf("ReadFile: %v", err)
  478. }
  479. got, err := Decode(nil, src)
  480. if err != nil {
  481. t.Fatalf("Decode: %v", err)
  482. }
  483. want, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  484. if err != nil {
  485. t.Fatalf("ReadFile: %v", err)
  486. }
  487. if err := cmp(got, want); err != nil {
  488. t.Fatal(err)
  489. }
  490. }
  491. func TestEncodeGoldenInput(t *testing.T) {
  492. tDir := filepath.FromSlash(*testdataDir)
  493. src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  494. if err != nil {
  495. t.Fatalf("ReadFile: %v", err)
  496. }
  497. got := Encode(nil, src)
  498. want, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
  499. if err != nil {
  500. t.Fatalf("ReadFile: %v", err)
  501. }
  502. if err := cmp(got, want); err != nil {
  503. t.Fatal(err)
  504. }
  505. }
  506. func TestExtendMatchGoldenInput(t *testing.T) {
  507. tDir := filepath.FromSlash(*testdataDir)
  508. src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  509. if err != nil {
  510. t.Fatalf("ReadFile: %v", err)
  511. }
  512. for i, tc := range extendMatchGoldenTestCases {
  513. got := extendMatch(src, tc.i, tc.j)
  514. if got != tc.want {
  515. t.Errorf("test #%d: i, j = %5d, %5d: got %5d (= j + %6d), want %5d (= j + %6d)",
  516. i, tc.i, tc.j, got, got-tc.j, tc.want, tc.want-tc.j)
  517. }
  518. }
  519. }
  520. func TestExtendMatch(t *testing.T) {
  521. // ref is a simple, reference implementation of extendMatch.
  522. ref := func(src []byte, i, j int) int {
  523. for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
  524. }
  525. return j
  526. }
  527. nums := []int{0, 1, 2, 7, 8, 9, 29, 30, 31, 32, 33, 34, 38, 39, 40}
  528. for yIndex := 40; yIndex > 30; yIndex-- {
  529. xxx := bytes.Repeat([]byte("x"), 40)
  530. if yIndex < len(xxx) {
  531. xxx[yIndex] = 'y'
  532. }
  533. for _, i := range nums {
  534. for _, j := range nums {
  535. if i >= j {
  536. continue
  537. }
  538. got := extendMatch(xxx, i, j)
  539. want := ref(xxx, i, j)
  540. if got != want {
  541. t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
  542. }
  543. }
  544. }
  545. }
  546. }
  547. const snappytoolCmdName = "cmd/snappytool/snappytool"
  548. func skipTestSameEncodingAsCpp() (msg string) {
  549. if !goEncoderShouldMatchCppEncoder {
  550. return fmt.Sprintf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH)
  551. }
  552. if _, err := os.Stat(snappytoolCmdName); err != nil {
  553. return fmt.Sprintf("could not find snappytool: %v", err)
  554. }
  555. return ""
  556. }
  557. func runTestSameEncodingAsCpp(src []byte) error {
  558. got := Encode(nil, src)
  559. cmd := exec.Command(snappytoolCmdName, "-e")
  560. cmd.Stdin = bytes.NewReader(src)
  561. want, err := cmd.Output()
  562. if err != nil {
  563. return fmt.Errorf("could not run snappytool: %v", err)
  564. }
  565. return cmp(got, want)
  566. }
  567. func TestSameEncodingAsCppShortCopies(t *testing.T) {
  568. if msg := skipTestSameEncodingAsCpp(); msg != "" {
  569. t.Skip(msg)
  570. }
  571. src := bytes.Repeat([]byte{'a'}, 20)
  572. for i := 0; i <= len(src); i++ {
  573. if err := runTestSameEncodingAsCpp(src[:i]); err != nil {
  574. t.Errorf("i=%d: %v", i, err)
  575. }
  576. }
  577. }
  578. func TestSameEncodingAsCppLongFiles(t *testing.T) {
  579. if msg := skipTestSameEncodingAsCpp(); msg != "" {
  580. t.Skip(msg)
  581. }
  582. bDir := filepath.FromSlash(*benchdataDir)
  583. failed := false
  584. for i, tf := range testFiles {
  585. if err := downloadBenchmarkFiles(t, tf.filename); err != nil {
  586. t.Fatalf("failed to download testdata: %s", err)
  587. }
  588. data := readFile(t, filepath.Join(bDir, tf.filename))
  589. if n := tf.sizeLimit; 0 < n && n < len(data) {
  590. data = data[:n]
  591. }
  592. if err := runTestSameEncodingAsCpp(data); err != nil {
  593. t.Errorf("i=%d: %v", i, err)
  594. failed = true
  595. }
  596. }
  597. if failed {
  598. t.Errorf("was the snappytool program built against the C++ snappy library version " +
  599. "d53de187 or later, commited on 2016-04-05? See " +
  600. "https://github.com/google/snappy/commit/d53de18799418e113e44444252a39b12a0e4e0cc")
  601. }
  602. }
  603. // TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
  604. // described in decode_amd64.s and its claim of a 10 byte overrun worst case.
  605. func TestSlowForwardCopyOverrun(t *testing.T) {
  606. const base = 100
  607. for length := 1; length < 18; length++ {
  608. for offset := 1; offset < 18; offset++ {
  609. highWaterMark := base
  610. d := base
  611. l := length
  612. o := offset
  613. // makeOffsetAtLeast8
  614. for o < 8 {
  615. if end := d + 8; highWaterMark < end {
  616. highWaterMark = end
  617. }
  618. l -= o
  619. d += o
  620. o += o
  621. }
  622. // fixUpSlowForwardCopy
  623. a := d
  624. d += l
  625. // finishSlowForwardCopy
  626. for l > 0 {
  627. if end := a + 8; highWaterMark < end {
  628. highWaterMark = end
  629. }
  630. a += 8
  631. l -= 8
  632. }
  633. dWant := base + length
  634. overrun := highWaterMark - dWant
  635. if d != dWant || overrun < 0 || 10 < overrun {
  636. t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
  637. length, offset, d, overrun, dWant)
  638. }
  639. }
  640. }
  641. }
  642. // TestEncodeNoiseThenRepeats encodes input for which the first half is very
  643. // incompressible and the second half is very compressible. The encoded form's
  644. // length should be closer to 50% of the original length than 100%.
  645. func TestEncodeNoiseThenRepeats(t *testing.T) {
  646. for _, origLen := range []int{256 * 1024, 2048 * 1024} {
  647. src := make([]byte, origLen)
  648. rng := rand.New(rand.NewSource(1))
  649. firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
  650. for i := range firstHalf {
  651. firstHalf[i] = uint8(rng.Intn(256))
  652. }
  653. for i := range secondHalf {
  654. secondHalf[i] = uint8(i >> 8)
  655. }
  656. dst := Encode(nil, src)
  657. if got, want := len(dst), origLen*3/4; got >= want {
  658. t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
  659. }
  660. }
  661. }
  662. func TestFramingFormat(t *testing.T) {
  663. // src is comprised of alternating 1e5-sized sequences of random
  664. // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
  665. // because it is larger than maxBlockSize (64k).
  666. src := make([]byte, 1e6)
  667. rng := rand.New(rand.NewSource(1))
  668. for i := 0; i < 10; i++ {
  669. if i%2 == 0 {
  670. for j := 0; j < 1e5; j++ {
  671. src[1e5*i+j] = uint8(rng.Intn(256))
  672. }
  673. } else {
  674. for j := 0; j < 1e5; j++ {
  675. src[1e5*i+j] = uint8(i)
  676. }
  677. }
  678. }
  679. buf := new(bytes.Buffer)
  680. if _, err := NewWriter(buf).Write(src); err != nil {
  681. t.Fatalf("Write: encoding: %v", err)
  682. }
  683. dst, err := ioutil.ReadAll(NewReader(buf))
  684. if err != nil {
  685. t.Fatalf("ReadAll: decoding: %v", err)
  686. }
  687. if err := cmp(dst, src); err != nil {
  688. t.Fatal(err)
  689. }
  690. }
  691. func TestWriterGoldenOutput(t *testing.T) {
  692. buf := new(bytes.Buffer)
  693. w := NewBufferedWriter(buf)
  694. defer w.Close()
  695. w.Write([]byte("abcd")) // Not compressible.
  696. w.Flush()
  697. w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible.
  698. w.Flush()
  699. // The next chunk is also compressible, but a naive, greedy encoding of the
  700. // overall length 67 copy as a length 64 copy (the longest expressible as a
  701. // tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte
  702. // tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4
  703. // bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2
  704. // (of length 60) and a 2-byte tagCopy1 (of length 7).
  705. w.Write(bytes.Repeat([]byte{'B'}, 68))
  706. w.Write([]byte("efC")) // Not compressible.
  707. w.Write(bytes.Repeat([]byte{'C'}, 20)) // Compressible.
  708. w.Write(bytes.Repeat([]byte{'B'}, 20)) // Compressible.
  709. w.Write([]byte("g")) // Not compressible.
  710. w.Flush()
  711. got := buf.String()
  712. want := strings.Join([]string{
  713. magicChunk,
  714. "\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
  715. "\x68\x10\xe6\xb6", // Checksum.
  716. "\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  717. "\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum).
  718. "\x5f\xeb\xf2\x10", // Checksum.
  719. "\x96\x01", // Compressed payload: Uncompressed length (varint encoded): 150.
  720. "\x00\x41", // Compressed payload: tagLiteral, length=1, "A".
  721. "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
  722. "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1.
  723. "\x52\x01\x00", // Compressed payload: tagCopy2, length=21, offset=1.
  724. "\x00\x18\x00\x00", // Compressed chunk, 24 bytes long (including 4 byte checksum).
  725. "\x30\x85\x69\xeb", // Checksum.
  726. "\x70", // Compressed payload: Uncompressed length (varint encoded): 112.
  727. "\x00\x42", // Compressed payload: tagLiteral, length=1, "B".
  728. "\xee\x01\x00", // Compressed payload: tagCopy2, length=60, offset=1.
  729. "\x0d\x01", // Compressed payload: tagCopy1, length=7, offset=1.
  730. "\x08\x65\x66\x43", // Compressed payload: tagLiteral, length=3, "efC".
  731. "\x4e\x01\x00", // Compressed payload: tagCopy2, length=20, offset=1.
  732. "\x4e\x5a\x00", // Compressed payload: tagCopy2, length=20, offset=90.
  733. "\x00\x67", // Compressed payload: tagLiteral, length=1, "g".
  734. }, "")
  735. if got != want {
  736. t.Fatalf("\ngot: % x\nwant: % x", got, want)
  737. }
  738. }
  739. func TestEmitLiteral(t *testing.T) {
  740. testCases := []struct {
  741. length int
  742. want string
  743. }{
  744. {1, "\x00"},
  745. {2, "\x04"},
  746. {59, "\xe8"},
  747. {60, "\xec"},
  748. {61, "\xf0\x3c"},
  749. {62, "\xf0\x3d"},
  750. {254, "\xf0\xfd"},
  751. {255, "\xf0\xfe"},
  752. {256, "\xf0\xff"},
  753. {257, "\xf4\x00\x01"},
  754. {65534, "\xf4\xfd\xff"},
  755. {65535, "\xf4\xfe\xff"},
  756. {65536, "\xf4\xff\xff"},
  757. }
  758. dst := make([]byte, 70000)
  759. nines := bytes.Repeat([]byte{0x99}, 65536)
  760. for _, tc := range testCases {
  761. lit := nines[:tc.length]
  762. n := emitLiteral(dst, lit)
  763. if !bytes.HasSuffix(dst[:n], lit) {
  764. t.Errorf("length=%d: did not end with that many literal bytes", tc.length)
  765. continue
  766. }
  767. got := string(dst[:n-tc.length])
  768. if got != tc.want {
  769. t.Errorf("length=%d:\ngot % x\nwant % x", tc.length, got, tc.want)
  770. continue
  771. }
  772. }
  773. }
  774. func TestEmitCopy(t *testing.T) {
  775. testCases := []struct {
  776. offset int
  777. length int
  778. want string
  779. }{
  780. {8, 04, "\x01\x08"},
  781. {8, 11, "\x1d\x08"},
  782. {8, 12, "\x2e\x08\x00"},
  783. {8, 13, "\x32\x08\x00"},
  784. {8, 59, "\xea\x08\x00"},
  785. {8, 60, "\xee\x08\x00"},
  786. {8, 61, "\xf2\x08\x00"},
  787. {8, 62, "\xf6\x08\x00"},
  788. {8, 63, "\xfa\x08\x00"},
  789. {8, 64, "\xfe\x08\x00"},
  790. {8, 65, "\xee\x08\x00\x05\x08"},
  791. {8, 66, "\xee\x08\x00\x09\x08"},
  792. {8, 67, "\xee\x08\x00\x0d\x08"},
  793. {8, 68, "\xfe\x08\x00\x01\x08"},
  794. {8, 69, "\xfe\x08\x00\x05\x08"},
  795. {8, 80, "\xfe\x08\x00\x3e\x08\x00"},
  796. {256, 04, "\x21\x00"},
  797. {256, 11, "\x3d\x00"},
  798. {256, 12, "\x2e\x00\x01"},
  799. {256, 13, "\x32\x00\x01"},
  800. {256, 59, "\xea\x00\x01"},
  801. {256, 60, "\xee\x00\x01"},
  802. {256, 61, "\xf2\x00\x01"},
  803. {256, 62, "\xf6\x00\x01"},
  804. {256, 63, "\xfa\x00\x01"},
  805. {256, 64, "\xfe\x00\x01"},
  806. {256, 65, "\xee\x00\x01\x25\x00"},
  807. {256, 66, "\xee\x00\x01\x29\x00"},
  808. {256, 67, "\xee\x00\x01\x2d\x00"},
  809. {256, 68, "\xfe\x00\x01\x21\x00"},
  810. {256, 69, "\xfe\x00\x01\x25\x00"},
  811. {256, 80, "\xfe\x00\x01\x3e\x00\x01"},
  812. {2048, 04, "\x0e\x00\x08"},
  813. {2048, 11, "\x2a\x00\x08"},
  814. {2048, 12, "\x2e\x00\x08"},
  815. {2048, 13, "\x32\x00\x08"},
  816. {2048, 59, "\xea\x00\x08"},
  817. {2048, 60, "\xee\x00\x08"},
  818. {2048, 61, "\xf2\x00\x08"},
  819. {2048, 62, "\xf6\x00\x08"},
  820. {2048, 63, "\xfa\x00\x08"},
  821. {2048, 64, "\xfe\x00\x08"},
  822. {2048, 65, "\xee\x00\x08\x12\x00\x08"},
  823. {2048, 66, "\xee\x00\x08\x16\x00\x08"},
  824. {2048, 67, "\xee\x00\x08\x1a\x00\x08"},
  825. {2048, 68, "\xfe\x00\x08\x0e\x00\x08"},
  826. {2048, 69, "\xfe\x00\x08\x12\x00\x08"},
  827. {2048, 80, "\xfe\x00\x08\x3e\x00\x08"},
  828. }
  829. dst := make([]byte, 1024)
  830. for _, tc := range testCases {
  831. n := emitCopy(dst, tc.offset, tc.length)
  832. got := string(dst[:n])
  833. if got != tc.want {
  834. t.Errorf("offset=%d, length=%d:\ngot % x\nwant % x", tc.offset, tc.length, got, tc.want)
  835. }
  836. }
  837. }
  838. func TestNewBufferedWriter(t *testing.T) {
  839. // Test all 32 possible sub-sequences of these 5 input slices.
  840. //
  841. // Their lengths sum to 400,000, which is over 6 times the Writer ibuf
  842. // capacity: 6 * maxBlockSize is 393,216.
  843. inputs := [][]byte{
  844. bytes.Repeat([]byte{'a'}, 40000),
  845. bytes.Repeat([]byte{'b'}, 150000),
  846. bytes.Repeat([]byte{'c'}, 60000),
  847. bytes.Repeat([]byte{'d'}, 120000),
  848. bytes.Repeat([]byte{'e'}, 30000),
  849. }
  850. loop:
  851. for i := 0; i < 1<<uint(len(inputs)); i++ {
  852. var want []byte
  853. buf := new(bytes.Buffer)
  854. w := NewBufferedWriter(buf)
  855. for j, input := range inputs {
  856. if i&(1<<uint(j)) == 0 {
  857. continue
  858. }
  859. if _, err := w.Write(input); err != nil {
  860. t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
  861. continue loop
  862. }
  863. want = append(want, input...)
  864. }
  865. if err := w.Close(); err != nil {
  866. t.Errorf("i=%#02x: Close: %v", i, err)
  867. continue
  868. }
  869. got, err := ioutil.ReadAll(NewReader(buf))
  870. if err != nil {
  871. t.Errorf("i=%#02x: ReadAll: %v", i, err)
  872. continue
  873. }
  874. if err := cmp(got, want); err != nil {
  875. t.Errorf("i=%#02x: %v", i, err)
  876. continue
  877. }
  878. }
  879. }
  880. func TestFlush(t *testing.T) {
  881. buf := new(bytes.Buffer)
  882. w := NewBufferedWriter(buf)
  883. defer w.Close()
  884. if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
  885. t.Fatalf("Write: %v", err)
  886. }
  887. if n := buf.Len(); n != 0 {
  888. t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
  889. }
  890. if err := w.Flush(); err != nil {
  891. t.Fatalf("Flush: %v", err)
  892. }
  893. if n := buf.Len(); n == 0 {
  894. t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
  895. }
  896. }
  897. func TestReaderUncompressedDataOK(t *testing.T) {
  898. r := NewReader(strings.NewReader(magicChunk +
  899. "\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
  900. "\x68\x10\xe6\xb6" + // Checksum.
  901. "\x61\x62\x63\x64", // Uncompressed payload: "abcd".
  902. ))
  903. g, err := ioutil.ReadAll(r)
  904. if err != nil {
  905. t.Fatal(err)
  906. }
  907. if got, want := string(g), "abcd"; got != want {
  908. t.Fatalf("got %q, want %q", got, want)
  909. }
  910. }
  911. func TestReaderUncompressedDataNoPayload(t *testing.T) {
  912. r := NewReader(strings.NewReader(magicChunk +
  913. "\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long.
  914. "", // No payload; corrupt input.
  915. ))
  916. if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
  917. t.Fatalf("got %v, want %v", err, ErrCorrupt)
  918. }
  919. }
  920. func TestReaderUncompressedDataTooLong(t *testing.T) {
  921. // https://github.com/google/snappy/blob/master/framing_format.txt section
  922. // 4.3 says that "the maximum legal chunk length... is 65540", or 0x10004.
  923. const n = 0x10005
  924. r := NewReader(strings.NewReader(magicChunk +
  925. "\x01\x05\x00\x01" + // Uncompressed chunk, n bytes long.
  926. strings.Repeat("\x00", n),
  927. ))
  928. if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
  929. t.Fatalf("got %v, want %v", err, ErrCorrupt)
  930. }
  931. }
  932. func TestReaderReset(t *testing.T) {
  933. gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
  934. buf := new(bytes.Buffer)
  935. if _, err := NewWriter(buf).Write(gold); err != nil {
  936. t.Fatalf("Write: %v", err)
  937. }
  938. encoded, invalid, partial := buf.String(), "invalid", "partial"
  939. r := NewReader(nil)
  940. for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
  941. if s == partial {
  942. r.Reset(strings.NewReader(encoded))
  943. if _, err := r.Read(make([]byte, 101)); err != nil {
  944. t.Errorf("#%d: %v", i, err)
  945. continue
  946. }
  947. continue
  948. }
  949. r.Reset(strings.NewReader(s))
  950. got, err := ioutil.ReadAll(r)
  951. switch s {
  952. case encoded:
  953. if err != nil {
  954. t.Errorf("#%d: %v", i, err)
  955. continue
  956. }
  957. if err := cmp(got, gold); err != nil {
  958. t.Errorf("#%d: %v", i, err)
  959. continue
  960. }
  961. case invalid:
  962. if err == nil {
  963. t.Errorf("#%d: got nil error, want non-nil", i)
  964. continue
  965. }
  966. }
  967. }
  968. }
  969. func TestWriterReset(t *testing.T) {
  970. gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
  971. const n = 20
  972. for _, buffered := range []bool{false, true} {
  973. var w *Writer
  974. if buffered {
  975. w = NewBufferedWriter(nil)
  976. defer w.Close()
  977. } else {
  978. w = NewWriter(nil)
  979. }
  980. var gots, wants [][]byte
  981. failed := false
  982. for i := 0; i <= n; i++ {
  983. buf := new(bytes.Buffer)
  984. w.Reset(buf)
  985. want := gold[:len(gold)*i/n]
  986. if _, err := w.Write(want); err != nil {
  987. t.Errorf("#%d: Write: %v", i, err)
  988. failed = true
  989. continue
  990. }
  991. if buffered {
  992. if err := w.Flush(); err != nil {
  993. t.Errorf("#%d: Flush: %v", i, err)
  994. failed = true
  995. continue
  996. }
  997. }
  998. got, err := ioutil.ReadAll(NewReader(buf))
  999. if err != nil {
  1000. t.Errorf("#%d: ReadAll: %v", i, err)
  1001. failed = true
  1002. continue
  1003. }
  1004. gots = append(gots, got)
  1005. wants = append(wants, want)
  1006. }
  1007. if failed {
  1008. continue
  1009. }
  1010. for i := range gots {
  1011. if err := cmp(gots[i], wants[i]); err != nil {
  1012. t.Errorf("#%d: %v", i, err)
  1013. }
  1014. }
  1015. }
  1016. }
  1017. func TestWriterResetWithoutFlush(t *testing.T) {
  1018. buf0 := new(bytes.Buffer)
  1019. buf1 := new(bytes.Buffer)
  1020. w := NewBufferedWriter(buf0)
  1021. if _, err := w.Write([]byte("xxx")); err != nil {
  1022. t.Fatalf("Write #0: %v", err)
  1023. }
  1024. // Note that we don't Flush the Writer before calling Reset.
  1025. w.Reset(buf1)
  1026. if _, err := w.Write([]byte("yyy")); err != nil {
  1027. t.Fatalf("Write #1: %v", err)
  1028. }
  1029. if err := w.Flush(); err != nil {
  1030. t.Fatalf("Flush: %v", err)
  1031. }
  1032. got, err := ioutil.ReadAll(NewReader(buf1))
  1033. if err != nil {
  1034. t.Fatalf("ReadAll: %v", err)
  1035. }
  1036. if err := cmp(got, []byte("yyy")); err != nil {
  1037. t.Fatal(err)
  1038. }
  1039. }
  1040. type writeCounter int
  1041. func (c *writeCounter) Write(p []byte) (int, error) {
  1042. *c++
  1043. return len(p), nil
  1044. }
  1045. // TestNumUnderlyingWrites tests that each Writer flush only makes one or two
  1046. // Write calls on its underlying io.Writer, depending on whether or not the
  1047. // flushed buffer was compressible.
  1048. func TestNumUnderlyingWrites(t *testing.T) {
  1049. testCases := []struct {
  1050. input []byte
  1051. want int
  1052. }{
  1053. {bytes.Repeat([]byte{'x'}, 100), 1},
  1054. {bytes.Repeat([]byte{'y'}, 100), 1},
  1055. {[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
  1056. }
  1057. var c writeCounter
  1058. w := NewBufferedWriter(&c)
  1059. defer w.Close()
  1060. for i, tc := range testCases {
  1061. c = 0
  1062. if _, err := w.Write(tc.input); err != nil {
  1063. t.Errorf("#%d: Write: %v", i, err)
  1064. continue
  1065. }
  1066. if err := w.Flush(); err != nil {
  1067. t.Errorf("#%d: Flush: %v", i, err)
  1068. continue
  1069. }
  1070. if int(c) != tc.want {
  1071. t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
  1072. continue
  1073. }
  1074. }
  1075. }
  1076. func benchDecode(b *testing.B, src []byte) {
  1077. encoded := Encode(nil, src)
  1078. // Bandwidth is in amount of uncompressed data.
  1079. b.SetBytes(int64(len(src)))
  1080. b.ResetTimer()
  1081. for i := 0; i < b.N; i++ {
  1082. Decode(src, encoded)
  1083. }
  1084. }
  1085. func benchEncode(b *testing.B, src []byte) {
  1086. // Bandwidth is in amount of uncompressed data.
  1087. b.SetBytes(int64(len(src)))
  1088. dst := make([]byte, MaxEncodedLen(len(src)))
  1089. b.ResetTimer()
  1090. for i := 0; i < b.N; i++ {
  1091. Encode(dst, src)
  1092. }
  1093. }
  1094. func testOrBenchmark(b testing.TB) string {
  1095. if _, ok := b.(*testing.B); ok {
  1096. return "benchmark"
  1097. }
  1098. return "test"
  1099. }
  1100. func readFile(b testing.TB, filename string) []byte {
  1101. src, err := ioutil.ReadFile(filename)
  1102. if err != nil {
  1103. b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
  1104. }
  1105. if len(src) == 0 {
  1106. b.Fatalf("%s has zero length", filename)
  1107. }
  1108. return src
  1109. }
  1110. // expand returns a slice of length n containing repeated copies of src.
  1111. func expand(src []byte, n int) []byte {
  1112. dst := make([]byte, n)
  1113. for x := dst; len(x) > 0; {
  1114. i := copy(x, src)
  1115. x = x[i:]
  1116. }
  1117. return dst
  1118. }
  1119. func benchWords(b *testing.B, n int, decode bool) {
  1120. // Note: the file is OS-language dependent so the resulting values are not
  1121. // directly comparable for non-US-English OS installations.
  1122. data := expand(readFile(b, "/usr/share/dict/words"), n)
  1123. if decode {
  1124. benchDecode(b, data)
  1125. } else {
  1126. benchEncode(b, data)
  1127. }
  1128. }
  1129. func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
  1130. func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
  1131. func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
  1132. func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
  1133. func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
  1134. func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
  1135. func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
  1136. func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
  1137. func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
  1138. func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
  1139. func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
  1140. func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
  1141. func BenchmarkRandomEncode(b *testing.B) {
  1142. rng := rand.New(rand.NewSource(1))
  1143. data := make([]byte, 1<<20)
  1144. for i := range data {
  1145. data[i] = uint8(rng.Intn(256))
  1146. }
  1147. benchEncode(b, data)
  1148. }
  1149. // testFiles' values are copied directly from
  1150. // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
  1151. // The label field is unused in snappy-go.
  1152. var testFiles = []struct {
  1153. label string
  1154. filename string
  1155. sizeLimit int
  1156. }{
  1157. {"html", "html", 0},
  1158. {"urls", "urls.10K", 0},
  1159. {"jpg", "fireworks.jpeg", 0},
  1160. {"jpg_200", "fireworks.jpeg", 200},
  1161. {"pdf", "paper-100k.pdf", 0},
  1162. {"html4", "html_x_4", 0},
  1163. {"txt1", "alice29.txt", 0},
  1164. {"txt2", "asyoulik.txt", 0},
  1165. {"txt3", "lcet10.txt", 0},
  1166. {"txt4", "plrabn12.txt", 0},
  1167. {"pb", "geo.protodata", 0},
  1168. {"gaviota", "kppkn.gtb", 0},
  1169. }
  1170. const (
  1171. // The benchmark data files are at this canonical URL.
  1172. benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
  1173. )
  1174. func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
  1175. bDir := filepath.FromSlash(*benchdataDir)
  1176. filename := filepath.Join(bDir, basename)
  1177. if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
  1178. return nil
  1179. }
  1180. if !*download {
  1181. b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
  1182. }
  1183. // Download the official snappy C++ implementation reference test data
  1184. // files for benchmarking.
  1185. if err := os.MkdirAll(bDir, 0777); err != nil && !os.IsExist(err) {
  1186. return fmt.Errorf("failed to create %s: %s", bDir, err)
  1187. }
  1188. f, err := os.Create(filename)
  1189. if err != nil {
  1190. return fmt.Errorf("failed to create %s: %s", filename, err)
  1191. }
  1192. defer f.Close()
  1193. defer func() {
  1194. if errRet != nil {
  1195. os.Remove(filename)
  1196. }
  1197. }()
  1198. url := benchURL + basename
  1199. resp, err := http.Get(url)
  1200. if err != nil {
  1201. return fmt.Errorf("failed to download %s: %s", url, err)
  1202. }
  1203. defer resp.Body.Close()
  1204. if s := resp.StatusCode; s != http.StatusOK {
  1205. return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
  1206. }
  1207. _, err = io.Copy(f, resp.Body)
  1208. if err != nil {
  1209. return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
  1210. }
  1211. return nil
  1212. }
  1213. func benchFile(b *testing.B, i int, decode bool) {
  1214. if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
  1215. b.Fatalf("failed to download testdata: %s", err)
  1216. }
  1217. bDir := filepath.FromSlash(*benchdataDir)
  1218. data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
  1219. if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
  1220. data = data[:n]
  1221. }
  1222. if decode {
  1223. benchDecode(b, data)
  1224. } else {
  1225. benchEncode(b, data)
  1226. }
  1227. }
  1228. // Naming convention is kept similar to what snappy's C++ implementation uses.
  1229. func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) }
  1230. func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }
  1231. func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) }
  1232. func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) }
  1233. func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) }
  1234. func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) }
  1235. func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) }
  1236. func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) }
  1237. func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) }
  1238. func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) }
  1239. func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
  1240. func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
  1241. func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) }
  1242. func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) }
  1243. func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) }
  1244. func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) }
  1245. func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) }
  1246. func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) }
  1247. func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) }
  1248. func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) }
  1249. func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) }
  1250. func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) }
  1251. func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
  1252. func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }
  1253. func BenchmarkExtendMatch(b *testing.B) {
  1254. tDir := filepath.FromSlash(*testdataDir)
  1255. src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
  1256. if err != nil {
  1257. b.Fatalf("ReadFile: %v", err)
  1258. }
  1259. b.ResetTimer()
  1260. for i := 0; i < b.N; i++ {
  1261. for _, tc := range extendMatchGoldenTestCases {
  1262. extendMatch(src, tc.i, tc.j)
  1263. }
  1264. }
  1265. }