You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

489 lines
12 KiB

  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package fastjson
  5. import (
  6. "bytes"
  7. "errors"
  8. "io"
  9. )
  10. // A Decoder reads and decodes JSON objects from an input stream.
  11. type Decoder struct {
  12. r io.Reader
  13. buf []byte
  14. d decodeState
  15. scanp int // start of unread data in buf
  16. scan scanner
  17. err error
  18. tokenState int
  19. tokenStack []int
  20. lastEnd int // index in stateRecord, where previous object in stream ends
  21. }
  22. // NewDecoder returns a new decoder that reads from r.
  23. //
  24. // The decoder introduces its own buffering and may
  25. // read data from r beyond the JSON values requested.
  26. func NewDecoder(r io.Reader) *Decoder {
  27. return &Decoder{r: r}
  28. }
  29. // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
  30. // Number instead of as a float64.
  31. func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
  32. // Decode reads the next JSON-encoded value from its
  33. // input and stores it in the value pointed to by v.
  34. //
  35. // See the documentation for Unmarshal for details about
  36. // the conversion of JSON into a Go value.
  37. func (dec *Decoder) Decode(v interface{}) error {
  38. if dec.err != nil {
  39. return dec.err
  40. }
  41. if err := dec.tokenPrepareForDecode(); err != nil {
  42. return err
  43. }
  44. if !dec.tokenValueAllowed() {
  45. return &SyntaxError{msg: "not at beginning of value"}
  46. }
  47. // Read whole value into buffer.
  48. n, err := dec.readValue()
  49. //dec.scan.printArrayofRecords()
  50. if err != nil {
  51. return err
  52. }
  53. dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
  54. dec.d.scan.stateRecord = dec.scan.stateRecord[dec.lastEnd:]
  55. dec.lastEnd = len(dec.scan.stateRecord) //saves the begin of stateRecord of next object
  56. dec.scanp += n
  57. // Don't save err from unmarshal into dec.err:
  58. // the connection is still usable since we read a complete JSON
  59. // object from it before the error happened.
  60. err = dec.d.unmarshal(v)
  61. // fixup token streaming state
  62. dec.tokenValueEnd()
  63. return err
  64. }
  65. // Buffered returns a reader of the data remaining in the Decoder's
  66. // buffer. The reader is valid until the next call to Decode.
  67. func (dec *Decoder) Buffered() io.Reader {
  68. return bytes.NewReader(dec.buf[dec.scanp:])
  69. }
  70. // readValue reads a JSON value into dec.buf.
  71. // It returns the length of the encoding.
  72. func (dec *Decoder) readValue() (int, error) {
  73. dec.scan.reset()
  74. scanp := dec.scanp
  75. var err error
  76. scanedBytes := 0
  77. Input:
  78. for {
  79. dec.scan.length_data = len(dec.buf) - scanp
  80. // Look in the buffer for a new value.
  81. for i, c := range dec.buf[scanp:] {
  82. dec.scan.bytes++
  83. v := dec.scan.step(&dec.scan, c)
  84. dec.scan.fillRecord(scanedBytes, v)
  85. scanedBytes++
  86. if v == scanEnd {
  87. scanp += i
  88. break Input
  89. }
  90. // scanEnd is delayed one byte.
  91. // We might block trying to get that byte from src,
  92. // so instead invent a space byte.
  93. if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
  94. scanp += i + 1
  95. break Input
  96. }
  97. if v == scanError {
  98. dec.err = dec.scan.err
  99. return 0, dec.scan.err
  100. }
  101. }
  102. scanp = len(dec.buf)
  103. n := scanp - dec.scanp
  104. // Did the last read have an error?
  105. // Delayed until now to allow buffer scan.
  106. if err != nil {
  107. if err == io.EOF {
  108. if dec.scan.step(&dec.scan, ' ') == scanEnd {
  109. dec.scan.fillRecord(n, scanEnd) //passes length of read json value
  110. break Input
  111. }
  112. if nonSpace(dec.buf) {
  113. err = io.ErrUnexpectedEOF
  114. }
  115. }
  116. dec.err = err
  117. return 0, err
  118. }
  119. err = dec.refill()
  120. scanp = dec.scanp + n
  121. }
  122. return scanp - dec.scanp, nil
  123. }
  124. func (dec *Decoder) refill() error {
  125. // Make room to read more into the buffer.
  126. // First slide down data already consumed.
  127. if dec.scanp > 0 {
  128. n := copy(dec.buf, dec.buf[dec.scanp:])
  129. dec.buf = dec.buf[:n]
  130. dec.scanp = 0
  131. }
  132. // Grow buffer if not large enough.
  133. const minRead = 512
  134. if cap(dec.buf)-len(dec.buf) < minRead {
  135. newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
  136. copy(newBuf, dec.buf)
  137. dec.buf = newBuf
  138. }
  139. // Read. Delay error for next iteration (after scan).
  140. n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
  141. dec.buf = dec.buf[0 : len(dec.buf)+n]
  142. return err
  143. }
  144. func nonSpace(b []byte) bool {
  145. for _, c := range b {
  146. if !isSpace(c) {
  147. return true
  148. }
  149. }
  150. return false
  151. }
  152. // An Encoder writes JSON objects to an output stream.
  153. type Encoder struct {
  154. w io.Writer
  155. err error
  156. }
  157. // NewEncoder returns a new encoder that writes to w.
  158. func NewEncoder(w io.Writer) *Encoder {
  159. return &Encoder{w: w}
  160. }
  161. // Encode writes the JSON encoding of v to the stream,
  162. // followed by a newline character.
  163. //
  164. // See the documentation for Marshal for details about the
  165. // conversion of Go values to JSON.
  166. func (enc *Encoder) Encode(v interface{}) error {
  167. if enc.err != nil {
  168. return enc.err
  169. }
  170. e := newEncodeState()
  171. err := e.marshal(v)
  172. if err != nil {
  173. return err
  174. }
  175. // Terminate each value with a newline.
  176. // This makes the output look a little nicer
  177. // when debugging, and some kind of space
  178. // is required if the encoded value was a number,
  179. // so that the reader knows there aren't more
  180. // digits coming.
  181. e.WriteByte('\n')
  182. if _, err = enc.w.Write(e.Bytes()); err != nil {
  183. enc.err = err
  184. }
  185. encodeStatePool.Put(e)
  186. return err
  187. }
  188. // RawMessage is a raw encoded JSON object.
  189. // It implements Marshaler and Unmarshaler and can
  190. // be used to delay JSON decoding or precompute a JSON encoding.
  191. type RawMessage []byte
  192. // MarshalJSON returns *m as the JSON encoding of m.
  193. func (m *RawMessage) MarshalJSON() ([]byte, error) {
  194. return *m, nil
  195. }
  196. // UnmarshalJSON sets *m to a copy of data.
  197. func (m *RawMessage) UnmarshalJSON(data []byte) error {
  198. if m == nil {
  199. return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
  200. }
  201. *m = append((*m)[0:0], data...)
  202. return nil
  203. }
  204. var _ Marshaler = (*RawMessage)(nil)
  205. var _ Unmarshaler = (*RawMessage)(nil)
  206. // A Token holds a value of one of these types:
  207. //
  208. // Delim, for the four JSON delimiters [ ] { }
  209. // bool, for JSON booleans
  210. // float64, for JSON numbers
  211. // Number, for JSON numbers
  212. // string, for JSON string literals
  213. // nil, for JSON null
  214. //
  215. type Token interface{}
  216. const (
  217. tokenTopValue = iota
  218. tokenArrayStart
  219. tokenArrayValue
  220. tokenArrayComma
  221. tokenObjectStart
  222. tokenObjectKey
  223. tokenObjectColon
  224. tokenObjectValue
  225. tokenObjectComma
  226. )
  227. // advance tokenstate from a separator state to a value state
  228. func (dec *Decoder) tokenPrepareForDecode() error {
  229. // Note: Not calling peek before switch, to avoid
  230. // putting peek into the standard Decode path.
  231. // peek is only called when using the Token API.
  232. switch dec.tokenState {
  233. case tokenArrayComma:
  234. c, err := dec.peek()
  235. if err != nil {
  236. return err
  237. }
  238. if c != ',' {
  239. return &SyntaxError{"expected comma after array element", 0}
  240. }
  241. dec.scanp++
  242. dec.tokenState = tokenArrayValue
  243. case tokenObjectColon:
  244. c, err := dec.peek()
  245. if err != nil {
  246. return err
  247. }
  248. if c != ':' {
  249. return &SyntaxError{"expected colon after object key", 0}
  250. }
  251. dec.scanp++
  252. dec.tokenState = tokenObjectValue
  253. }
  254. return nil
  255. }
  256. func (dec *Decoder) tokenValueAllowed() bool {
  257. switch dec.tokenState {
  258. case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
  259. return true
  260. }
  261. return false
  262. }
  263. func (dec *Decoder) tokenValueEnd() {
  264. switch dec.tokenState {
  265. case tokenArrayStart, tokenArrayValue:
  266. dec.tokenState = tokenArrayComma
  267. case tokenObjectValue:
  268. dec.tokenState = tokenObjectComma
  269. }
  270. }
  271. // A Delim is a JSON array or object delimiter, one of [ ] { or }.
  272. type Delim rune
  273. func (d Delim) String() string {
  274. return string(d)
  275. }
  276. // Token returns the next JSON token in the input stream.
  277. // At the end of the input stream, Token returns nil, io.EOF.
  278. //
  279. // Token guarantees that the delimiters [ ] { } it returns are
  280. // properly nested and matched: if Token encounters an unexpected
  281. // delimiter in the input, it will return an error.
  282. //
  283. // The input stream consists of basic JSON values—bool, string,
  284. // number, and null—along with delimiters [ ] { } of type Delim
  285. // to mark the start and end of arrays and objects.
  286. // Commas and colons are elided.
  287. func (dec *Decoder) Token() (Token, error) {
  288. for {
  289. c, err := dec.peek()
  290. if err != nil {
  291. return nil, err
  292. }
  293. switch c {
  294. case '[':
  295. if !dec.tokenValueAllowed() {
  296. return dec.tokenError(c)
  297. }
  298. dec.scanp++
  299. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  300. dec.tokenState = tokenArrayStart
  301. return Delim('['), nil
  302. case ']':
  303. if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
  304. return dec.tokenError(c)
  305. }
  306. dec.scanp++
  307. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  308. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  309. dec.tokenValueEnd()
  310. return Delim(']'), nil
  311. case '{':
  312. if !dec.tokenValueAllowed() {
  313. return dec.tokenError(c)
  314. }
  315. dec.scanp++
  316. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  317. dec.tokenState = tokenObjectStart
  318. return Delim('{'), nil
  319. case '}':
  320. if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
  321. return dec.tokenError(c)
  322. }
  323. dec.scanp++
  324. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  325. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  326. dec.tokenValueEnd()
  327. return Delim('}'), nil
  328. case ':':
  329. if dec.tokenState != tokenObjectColon {
  330. return dec.tokenError(c)
  331. }
  332. dec.scanp++
  333. dec.tokenState = tokenObjectValue
  334. continue
  335. case ',':
  336. if dec.tokenState == tokenArrayComma {
  337. dec.scanp++
  338. dec.tokenState = tokenArrayValue
  339. continue
  340. }
  341. if dec.tokenState == tokenObjectComma {
  342. dec.scanp++
  343. dec.tokenState = tokenObjectKey
  344. continue
  345. }
  346. return dec.tokenError(c)
  347. case '"':
  348. if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
  349. var x string
  350. old := dec.tokenState
  351. dec.tokenState = tokenTopValue
  352. err := dec.Decode(&x)
  353. dec.tokenState = old
  354. if err != nil {
  355. clearOffset(err)
  356. return nil, err
  357. }
  358. dec.tokenState = tokenObjectColon
  359. return x, nil
  360. }
  361. fallthrough
  362. default:
  363. if !dec.tokenValueAllowed() {
  364. return dec.tokenError(c)
  365. }
  366. var x interface{}
  367. if err := dec.Decode(&x); err != nil {
  368. clearOffset(err)
  369. return nil, err
  370. }
  371. return x, nil
  372. }
  373. }
  374. }
  375. func clearOffset(err error) {
  376. if s, ok := err.(*SyntaxError); ok {
  377. s.Offset = 0
  378. }
  379. }
  380. func (dec *Decoder) tokenError(c byte) (Token, error) {
  381. var context string
  382. switch dec.tokenState {
  383. case tokenTopValue:
  384. context = " looking for beginning of value"
  385. case tokenArrayStart, tokenArrayValue, tokenObjectValue:
  386. context = " looking for beginning of value"
  387. case tokenArrayComma:
  388. context = " after array element"
  389. case tokenObjectKey:
  390. context = " looking for beginning of object key string"
  391. case tokenObjectColon:
  392. context = " after object key"
  393. case tokenObjectComma:
  394. context = " after object key:value pair"
  395. }
  396. return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
  397. }
  398. // More reports whether there is another element in the
  399. // current array or object being parsed.
  400. func (dec *Decoder) More() bool {
  401. c, err := dec.peek()
  402. return err == nil && c != ']' && c != '}'
  403. }
  404. func (dec *Decoder) peek() (byte, error) {
  405. var err error
  406. for {
  407. for i := dec.scanp; i < len(dec.buf); i++ {
  408. c := dec.buf[i]
  409. if isSpace(c) {
  410. continue
  411. }
  412. dec.scanp = i
  413. return c, nil
  414. }
  415. // buffer has been scanned, now report any error
  416. if err != nil {
  417. return 0, err
  418. }
  419. err = dec.refill()
  420. }
  421. }
  422. /*
  423. TODO
  424. // EncodeToken writes the given JSON token to the stream.
  425. // It returns an error if the delimiters [ ] { } are not properly used.
  426. //
  427. // EncodeToken does not call Flush, because usually it is part of
  428. // a larger operation such as Encode, and those will call Flush when finished.
  429. // Callers that create an Encoder and then invoke EncodeToken directly,
  430. // without using Encode, need to call Flush when finished to ensure that
  431. // the JSON is written to the underlying writer.
  432. func (e *Encoder) EncodeToken(t Token) error {
  433. ...
  434. }
  435. */