inflate.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package flate implements the DEFLATE compressed data format, described in
  5. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  6. // formats.
  7. package flate
  8. import (
  9. "bufio"
  10. "io"
  11. "math/bits"
  12. "strconv"
  13. "sync"
  14. )
  15. const (
  16. maxCodeLen = 16 // max length of Huffman code
  17. maxCodeLenMask = 15 // mask for max length of Huffman code
  18. // The next three numbers come from the RFC section 3.2.7, with the
  19. // additional proviso in section 3.2.5 which implies that distance codes
  20. // 30 and 31 should never occur in compressed data.
  21. maxNumLit = 286
  22. maxNumDist = 30
  23. numCodes = 19 // number of codes in Huffman meta-code
  24. )
  25. // Initialize the fixedHuffmanDecoder only once upon first use.
  26. var fixedOnce sync.Once
  27. var fixedHuffmanDecoder huffmanDecoder
  28. // A CorruptInputError reports the presence of corrupt input at a given offset.
  29. type CorruptInputError int64
  30. func (e CorruptInputError) Error() string {
  31. return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
  32. }
  33. // An InternalError reports an error in the flate code itself.
  34. type InternalError string
  35. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  36. // A ReadError reports an error encountered while reading input.
  37. //
  38. // Deprecated: No longer returned.
  39. type ReadError struct {
  40. Offset int64 // byte offset where error occurred
  41. Err error // error returned by underlying Read
  42. }
  43. func (e *ReadError) Error() string {
  44. return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  45. }
  46. // A WriteError reports an error encountered while writing output.
  47. //
  48. // Deprecated: No longer returned.
  49. type WriteError struct {
  50. Offset int64 // byte offset where error occurred
  51. Err error // error returned by underlying Write
  52. }
  53. func (e *WriteError) Error() string {
  54. return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  55. }
  56. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  57. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  58. // instead of allocating a new one.
  59. type Resetter interface {
  60. // Reset discards any buffered data and resets the Resetter as if it was
  61. // newly initialized with the given reader.
  62. Reset(r io.Reader, dict []byte) error
  63. }
  64. // The data structure for decoding Huffman tables is based on that of
  65. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  66. // For codes smaller than the table width, there are multiple entries
  67. // (each combination of trailing bits has the same value). For codes
  68. // larger than the table width, the table contains a link to an overflow
  69. // table. The width of each entry in the link table is the maximum code
  70. // size minus the chunk width.
  71. //
  72. // Note that you can do a lookup in the table even without all bits
  73. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  74. // have the property that shorter codes come before longer ones, the
  75. // bit length estimate in the result is a lower bound on the actual
  76. // number of bits.
  77. //
  78. // See the following:
  79. // http://www.gzip.org/algorithm.txt
  80. // chunk & 15 is number of bits
  81. // chunk >> 4 is value, including table link
  82. const (
  83. huffmanChunkBits = 9
  84. huffmanNumChunks = 1 << huffmanChunkBits
  85. huffmanCountMask = 15
  86. huffmanValueShift = 4
  87. )
  88. type huffmanDecoder struct {
  89. min int // the minimum code length
  90. chunks *[huffmanNumChunks]uint32 // chunks as described above
  91. links [][]uint32 // overflow links
  92. linkMask uint32 // mask the width of the link table
  93. }
  94. // Initialize Huffman decoding tables from array of code lengths.
  95. // Following this function, h is guaranteed to be initialized into a complete
  96. // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
  97. // degenerate case where the tree has only a single symbol with length 1. Empty
  98. // trees are permitted.
  99. func (h *huffmanDecoder) init(lengths []int) bool {
  100. // Sanity enables additional runtime tests during Huffman
  101. // table construction. It's intended to be used during
  102. // development to supplement the currently ad-hoc unit tests.
  103. const sanity = false
  104. if h.chunks == nil {
  105. h.chunks = &[huffmanNumChunks]uint32{}
  106. }
  107. if h.min != 0 {
  108. *h = huffmanDecoder{chunks: h.chunks, links: h.links}
  109. }
  110. // Count number of codes of each length,
  111. // compute min and max length.
  112. var count [maxCodeLen]int
  113. var min, max int
  114. for _, n := range lengths {
  115. if n == 0 {
  116. continue
  117. }
  118. if min == 0 || n < min {
  119. min = n
  120. }
  121. if n > max {
  122. max = n
  123. }
  124. count[n&maxCodeLenMask]++
  125. }
  126. // Empty tree. The decompressor.huffSym function will fail later if the tree
  127. // is used. Technically, an empty tree is only valid for the HDIST tree and
  128. // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
  129. // is guaranteed to fail since it will attempt to use the tree to decode the
  130. // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
  131. // guaranteed to fail later since the compressed data section must be
  132. // composed of at least one symbol (the end-of-block marker).
  133. if max == 0 {
  134. return true
  135. }
  136. code := 0
  137. var nextcode [maxCodeLen]int
  138. for i := min; i <= max; i++ {
  139. code <<= 1
  140. nextcode[i&maxCodeLenMask] = code
  141. code += count[i&maxCodeLenMask]
  142. }
  143. // Check that the coding is complete (i.e., that we've
  144. // assigned all 2-to-the-max possible bit sequences).
  145. // Exception: To be compatible with zlib, we also need to
  146. // accept degenerate single-code codings. See also
  147. // TestDegenerateHuffmanCoding.
  148. if code != 1<<uint(max) && !(code == 1 && max == 1) {
  149. return false
  150. }
  151. h.min = min
  152. chunks := h.chunks[:]
  153. for i := range chunks {
  154. chunks[i] = 0
  155. }
  156. if max > huffmanChunkBits {
  157. numLinks := 1 << (uint(max) - huffmanChunkBits)
  158. h.linkMask = uint32(numLinks - 1)
  159. // create link tables
  160. link := nextcode[huffmanChunkBits+1] >> 1
  161. if cap(h.links) < huffmanNumChunks-link {
  162. h.links = make([][]uint32, huffmanNumChunks-link)
  163. } else {
  164. h.links = h.links[:huffmanNumChunks-link]
  165. }
  166. for j := uint(link); j < huffmanNumChunks; j++ {
  167. reverse := int(bits.Reverse16(uint16(j)))
  168. reverse >>= uint(16 - huffmanChunkBits)
  169. off := j - uint(link)
  170. if sanity && h.chunks[reverse] != 0 {
  171. panic("impossible: overwriting existing chunk")
  172. }
  173. h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
  174. if cap(h.links[off]) < numLinks {
  175. h.links[off] = make([]uint32, numLinks)
  176. } else {
  177. links := h.links[off][:0]
  178. h.links[off] = links[:numLinks]
  179. }
  180. }
  181. } else {
  182. h.links = h.links[:0]
  183. }
  184. for i, n := range lengths {
  185. if n == 0 {
  186. continue
  187. }
  188. code := nextcode[n]
  189. nextcode[n]++
  190. chunk := uint32(i<<huffmanValueShift | n)
  191. reverse := int(bits.Reverse16(uint16(code)))
  192. reverse >>= uint(16 - n)
  193. if n <= huffmanChunkBits {
  194. for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
  195. // We should never need to overwrite
  196. // an existing chunk. Also, 0 is
  197. // never a valid chunk, because the
  198. // lower 4 "count" bits should be
  199. // between 1 and 15.
  200. if sanity && h.chunks[off] != 0 {
  201. panic("impossible: overwriting existing chunk")
  202. }
  203. h.chunks[off] = chunk
  204. }
  205. } else {
  206. j := reverse & (huffmanNumChunks - 1)
  207. if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
  208. // Longer codes should have been
  209. // associated with a link table above.
  210. panic("impossible: not an indirect chunk")
  211. }
  212. value := h.chunks[j] >> huffmanValueShift
  213. linktab := h.links[value]
  214. reverse >>= huffmanChunkBits
  215. for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
  216. if sanity && linktab[off] != 0 {
  217. panic("impossible: overwriting existing chunk")
  218. }
  219. linktab[off] = chunk
  220. }
  221. }
  222. }
  223. if sanity {
  224. // Above we've sanity checked that we never overwrote
  225. // an existing entry. Here we additionally check that
  226. // we filled the tables completely.
  227. for i, chunk := range h.chunks {
  228. if chunk == 0 {
  229. // As an exception, in the degenerate
  230. // single-code case, we allow odd
  231. // chunks to be missing.
  232. if code == 1 && i%2 == 1 {
  233. continue
  234. }
  235. panic("impossible: missing chunk")
  236. }
  237. }
  238. for _, linktab := range h.links {
  239. for _, chunk := range linktab {
  240. if chunk == 0 {
  241. panic("impossible: missing chunk")
  242. }
  243. }
  244. }
  245. }
  246. return true
  247. }
  248. // The actual read interface needed by NewReader.
  249. // If the passed in io.Reader does not also have ReadByte,
  250. // the NewReader will introduce its own buffering.
  251. type Reader interface {
  252. io.Reader
  253. io.ByteReader
  254. }
  255. // Decompress state.
  256. type decompressor struct {
  257. // Input source.
  258. r Reader
  259. roffset int64
  260. // Input bits, in top of b.
  261. b uint32
  262. nb uint
  263. // Huffman decoders for literal/length, distance.
  264. h1, h2 huffmanDecoder
  265. // Length arrays used to define Huffman codes.
  266. bits *[maxNumLit + maxNumDist]int
  267. codebits *[numCodes]int
  268. // Output history, buffer.
  269. dict dictDecoder
  270. // Temporary buffer (avoids repeated allocation).
  271. buf [4]byte
  272. // Next step in the decompression,
  273. // and decompression state.
  274. step func(*decompressor)
  275. stepState int
  276. final bool
  277. err error
  278. toRead []byte
  279. hl, hd *huffmanDecoder
  280. copyLen int
  281. copyDist int
  282. }
  283. func (f *decompressor) nextBlock() {
  284. for f.nb < 1+2 {
  285. if f.err = f.moreBits(); f.err != nil {
  286. return
  287. }
  288. }
  289. f.final = f.b&1 == 1
  290. f.b >>= 1
  291. typ := f.b & 3
  292. f.b >>= 2
  293. f.nb -= 1 + 2
  294. switch typ {
  295. case 0:
  296. f.dataBlock()
  297. case 1:
  298. // compressed, fixed Huffman tables
  299. f.hl = &fixedHuffmanDecoder
  300. f.hd = nil
  301. f.huffmanBlock()
  302. case 2:
  303. // compressed, dynamic Huffman tables
  304. if f.err = f.readHuffman(); f.err != nil {
  305. break
  306. }
  307. f.hl = &f.h1
  308. f.hd = &f.h2
  309. f.huffmanBlock()
  310. default:
  311. // 3 is reserved.
  312. f.err = CorruptInputError(f.roffset)
  313. }
  314. }
  315. func (f *decompressor) Read(b []byte) (int, error) {
  316. for {
  317. if len(f.toRead) > 0 {
  318. n := copy(b, f.toRead)
  319. f.toRead = f.toRead[n:]
  320. if len(f.toRead) == 0 {
  321. return n, f.err
  322. }
  323. return n, nil
  324. }
  325. if f.err != nil {
  326. return 0, f.err
  327. }
  328. f.step(f)
  329. if f.err != nil && len(f.toRead) == 0 {
  330. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  331. }
  332. }
  333. }
  334. // Support the io.WriteTo interface for io.Copy and friends.
  335. func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
  336. total := int64(0)
  337. flushed := false
  338. for {
  339. if len(f.toRead) > 0 {
  340. n, err := w.Write(f.toRead)
  341. total += int64(n)
  342. if err != nil {
  343. f.err = err
  344. return total, err
  345. }
  346. if n != len(f.toRead) {
  347. return total, io.ErrShortWrite
  348. }
  349. f.toRead = f.toRead[:0]
  350. }
  351. if f.err != nil && flushed {
  352. if f.err == io.EOF {
  353. return total, nil
  354. }
  355. return total, f.err
  356. }
  357. if f.err == nil {
  358. f.step(f)
  359. }
  360. if len(f.toRead) == 0 && f.err != nil && !flushed {
  361. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  362. flushed = true
  363. }
  364. }
  365. }
  366. func (f *decompressor) Close() error {
  367. if f.err == io.EOF {
  368. return nil
  369. }
  370. return f.err
  371. }
  372. // RFC 1951 section 3.2.7.
  373. // Compression with dynamic Huffman codes
  374. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  375. func (f *decompressor) readHuffman() error {
  376. // HLIT[5], HDIST[5], HCLEN[4].
  377. for f.nb < 5+5+4 {
  378. if err := f.moreBits(); err != nil {
  379. return err
  380. }
  381. }
  382. nlit := int(f.b&0x1F) + 257
  383. if nlit > maxNumLit {
  384. return CorruptInputError(f.roffset)
  385. }
  386. f.b >>= 5
  387. ndist := int(f.b&0x1F) + 1
  388. if ndist > maxNumDist {
  389. return CorruptInputError(f.roffset)
  390. }
  391. f.b >>= 5
  392. nclen := int(f.b&0xF) + 4
  393. // numCodes is 19, so nclen is always valid.
  394. f.b >>= 4
  395. f.nb -= 5 + 5 + 4
  396. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  397. for i := 0; i < nclen; i++ {
  398. for f.nb < 3 {
  399. if err := f.moreBits(); err != nil {
  400. return err
  401. }
  402. }
  403. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  404. f.b >>= 3
  405. f.nb -= 3
  406. }
  407. for i := nclen; i < len(codeOrder); i++ {
  408. f.codebits[codeOrder[i]] = 0
  409. }
  410. if !f.h1.init(f.codebits[0:]) {
  411. return CorruptInputError(f.roffset)
  412. }
  413. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  414. // using the code length Huffman code.
  415. for i, n := 0, nlit+ndist; i < n; {
  416. x, err := f.huffSym(&f.h1)
  417. if err != nil {
  418. return err
  419. }
  420. if x < 16 {
  421. // Actual length.
  422. f.bits[i] = x
  423. i++
  424. continue
  425. }
  426. // Repeat previous length or zero.
  427. var rep int
  428. var nb uint
  429. var b int
  430. switch x {
  431. default:
  432. return InternalError("unexpected length code")
  433. case 16:
  434. rep = 3
  435. nb = 2
  436. if i == 0 {
  437. return CorruptInputError(f.roffset)
  438. }
  439. b = f.bits[i-1]
  440. case 17:
  441. rep = 3
  442. nb = 3
  443. b = 0
  444. case 18:
  445. rep = 11
  446. nb = 7
  447. b = 0
  448. }
  449. for f.nb < nb {
  450. if err := f.moreBits(); err != nil {
  451. return err
  452. }
  453. }
  454. rep += int(f.b & uint32(1<<nb-1))
  455. f.b >>= nb
  456. f.nb -= nb
  457. if i+rep > n {
  458. return CorruptInputError(f.roffset)
  459. }
  460. for j := 0; j < rep; j++ {
  461. f.bits[i] = b
  462. i++
  463. }
  464. }
  465. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  466. return CorruptInputError(f.roffset)
  467. }
  468. // As an optimization, we can initialize the min bits to read at a time
  469. // for the HLIT tree to the length of the EOB marker since we know that
  470. // every block must terminate with one. This preserves the property that
  471. // we never read any extra bytes after the end of the DEFLATE stream.
  472. if f.h1.min < f.bits[endBlockMarker] {
  473. f.h1.min = f.bits[endBlockMarker]
  474. }
  475. return nil
  476. }
  477. // Decode a single Huffman block from f.
  478. // hl and hd are the Huffman states for the lit/length values
  479. // and the distance values, respectively. If hd == nil, using the
  480. // fixed distance encoding associated with fixed Huffman blocks.
  481. func (f *decompressor) huffmanBlock() {
  482. const (
  483. stateInit = iota // Zero value must be stateInit
  484. stateDict
  485. )
  486. switch f.stepState {
  487. case stateInit:
  488. goto readLiteral
  489. case stateDict:
  490. goto copyHistory
  491. }
  492. readLiteral:
  493. // Read literal and/or (length, distance) according to RFC section 3.2.3.
  494. {
  495. v, err := f.huffSym(f.hl)
  496. if err != nil {
  497. f.err = err
  498. return
  499. }
  500. var n uint // number of bits extra
  501. var length int
  502. switch {
  503. case v < 256:
  504. f.dict.writeByte(byte(v))
  505. if f.dict.availWrite() == 0 {
  506. f.toRead = f.dict.readFlush()
  507. f.step = (*decompressor).huffmanBlock
  508. f.stepState = stateInit
  509. return
  510. }
  511. goto readLiteral
  512. case v == 256:
  513. f.finishBlock()
  514. return
  515. // otherwise, reference to older data
  516. case v < 265:
  517. length = v - (257 - 3)
  518. n = 0
  519. case v < 269:
  520. length = v*2 - (265*2 - 11)
  521. n = 1
  522. case v < 273:
  523. length = v*4 - (269*4 - 19)
  524. n = 2
  525. case v < 277:
  526. length = v*8 - (273*8 - 35)
  527. n = 3
  528. case v < 281:
  529. length = v*16 - (277*16 - 67)
  530. n = 4
  531. case v < 285:
  532. length = v*32 - (281*32 - 131)
  533. n = 5
  534. case v < maxNumLit:
  535. length = 258
  536. n = 0
  537. default:
  538. f.err = CorruptInputError(f.roffset)
  539. return
  540. }
  541. if n > 0 {
  542. for f.nb < n {
  543. if err = f.moreBits(); err != nil {
  544. f.err = err
  545. return
  546. }
  547. }
  548. length += int(f.b & uint32(1<<n-1))
  549. f.b >>= n
  550. f.nb -= n
  551. }
  552. var dist int
  553. if f.hd == nil {
  554. for f.nb < 5 {
  555. if err = f.moreBits(); err != nil {
  556. f.err = err
  557. return
  558. }
  559. }
  560. dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
  561. f.b >>= 5
  562. f.nb -= 5
  563. } else {
  564. if dist, err = f.huffSym(f.hd); err != nil {
  565. f.err = err
  566. return
  567. }
  568. }
  569. switch {
  570. case dist < 4:
  571. dist++
  572. case dist < maxNumDist:
  573. nb := uint(dist-2) >> 1
  574. // have 1 bit in bottom of dist, need nb more.
  575. extra := (dist & 1) << nb
  576. for f.nb < nb {
  577. if err = f.moreBits(); err != nil {
  578. f.err = err
  579. return
  580. }
  581. }
  582. extra |= int(f.b & uint32(1<<nb-1))
  583. f.b >>= nb
  584. f.nb -= nb
  585. dist = 1<<(nb+1) + 1 + extra
  586. default:
  587. f.err = CorruptInputError(f.roffset)
  588. return
  589. }
  590. // No check on length; encoding can be prescient.
  591. if dist > f.dict.histSize() {
  592. f.err = CorruptInputError(f.roffset)
  593. return
  594. }
  595. f.copyLen, f.copyDist = length, dist
  596. goto copyHistory
  597. }
  598. copyHistory:
  599. // Perform a backwards copy according to RFC section 3.2.3.
  600. {
  601. cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
  602. if cnt == 0 {
  603. cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
  604. }
  605. f.copyLen -= cnt
  606. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  607. f.toRead = f.dict.readFlush()
  608. f.step = (*decompressor).huffmanBlock // We need to continue this work
  609. f.stepState = stateDict
  610. return
  611. }
  612. goto readLiteral
  613. }
  614. }
  615. // Copy a single uncompressed data block from input to output.
  616. func (f *decompressor) dataBlock() {
  617. // Uncompressed.
  618. // Discard current half-byte.
  619. f.nb = 0
  620. f.b = 0
  621. // Length then ones-complement of length.
  622. nr, err := io.ReadFull(f.r, f.buf[0:4])
  623. f.roffset += int64(nr)
  624. if err != nil {
  625. f.err = noEOF(err)
  626. return
  627. }
  628. n := int(f.buf[0]) | int(f.buf[1])<<8
  629. nn := int(f.buf[2]) | int(f.buf[3])<<8
  630. if uint16(nn) != uint16(^n) {
  631. f.err = CorruptInputError(f.roffset)
  632. return
  633. }
  634. if n == 0 {
  635. f.toRead = f.dict.readFlush()
  636. f.finishBlock()
  637. return
  638. }
  639. f.copyLen = n
  640. f.copyData()
  641. }
  642. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  643. // It pauses for reads when f.hist is full.
  644. func (f *decompressor) copyData() {
  645. buf := f.dict.writeSlice()
  646. if len(buf) > f.copyLen {
  647. buf = buf[:f.copyLen]
  648. }
  649. cnt, err := io.ReadFull(f.r, buf)
  650. f.roffset += int64(cnt)
  651. f.copyLen -= cnt
  652. f.dict.writeMark(cnt)
  653. if err != nil {
  654. f.err = noEOF(err)
  655. return
  656. }
  657. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  658. f.toRead = f.dict.readFlush()
  659. f.step = (*decompressor).copyData
  660. return
  661. }
  662. f.finishBlock()
  663. }
  664. func (f *decompressor) finishBlock() {
  665. if f.final {
  666. if f.dict.availRead() > 0 {
  667. f.toRead = f.dict.readFlush()
  668. }
  669. f.err = io.EOF
  670. }
  671. f.step = (*decompressor).nextBlock
  672. }
  673. // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
  674. func noEOF(e error) error {
  675. if e == io.EOF {
  676. return io.ErrUnexpectedEOF
  677. }
  678. return e
  679. }
  680. func (f *decompressor) moreBits() error {
  681. c, err := f.r.ReadByte()
  682. if err != nil {
  683. return noEOF(err)
  684. }
  685. f.roffset++
  686. f.b |= uint32(c) << f.nb
  687. f.nb += 8
  688. return nil
  689. }
  690. // Read the next Huffman-encoded symbol from f according to h.
  691. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  692. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  693. // with single element, huffSym must error on these two edge cases. In both
  694. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  695. // satisfy the n == 0 check below.
  696. n := uint(h.min)
  697. // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
  698. // but is smart enough to keep local variables in registers, so use nb and b,
  699. // inline call to moreBits and reassign b,nb back to f on return.
  700. nb, b := f.nb, f.b
  701. for {
  702. for nb < n {
  703. c, err := f.r.ReadByte()
  704. if err != nil {
  705. f.b = b
  706. f.nb = nb
  707. return 0, noEOF(err)
  708. }
  709. f.roffset++
  710. b |= uint32(c) << (nb & 31)
  711. nb += 8
  712. }
  713. chunk := h.chunks[b&(huffmanNumChunks-1)]
  714. n = uint(chunk & huffmanCountMask)
  715. if n > huffmanChunkBits {
  716. chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
  717. n = uint(chunk & huffmanCountMask)
  718. }
  719. if n <= nb {
  720. if n == 0 {
  721. f.b = b
  722. f.nb = nb
  723. f.err = CorruptInputError(f.roffset)
  724. return 0, f.err
  725. }
  726. f.b = b >> (n & 31)
  727. f.nb = nb - n
  728. return int(chunk >> huffmanValueShift), nil
  729. }
  730. }
  731. }
  732. func makeReader(r io.Reader) Reader {
  733. if rr, ok := r.(Reader); ok {
  734. return rr
  735. }
  736. return bufio.NewReader(r)
  737. }
  738. func fixedHuffmanDecoderInit() {
  739. fixedOnce.Do(func() {
  740. // These come from the RFC section 3.2.6.
  741. var bits [288]int
  742. for i := 0; i < 144; i++ {
  743. bits[i] = 8
  744. }
  745. for i := 144; i < 256; i++ {
  746. bits[i] = 9
  747. }
  748. for i := 256; i < 280; i++ {
  749. bits[i] = 7
  750. }
  751. for i := 280; i < 288; i++ {
  752. bits[i] = 8
  753. }
  754. fixedHuffmanDecoder.init(bits[:])
  755. })
  756. }
  757. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  758. *f = decompressor{
  759. r: makeReader(r),
  760. bits: f.bits,
  761. codebits: f.codebits,
  762. h1: f.h1,
  763. h2: f.h2,
  764. dict: f.dict,
  765. step: (*decompressor).nextBlock,
  766. }
  767. f.dict.init(maxMatchOffset, dict)
  768. return nil
  769. }
  770. // NewReader returns a new ReadCloser that can be used
  771. // to read the uncompressed version of r.
  772. // If r does not also implement io.ByteReader,
  773. // the decompressor may read more data than necessary from r.
  774. // It is the caller's responsibility to call Close on the ReadCloser
  775. // when finished reading.
  776. //
  777. // The ReadCloser returned by NewReader also implements Resetter.
  778. func NewReader(r io.Reader) io.ReadCloser {
  779. fixedHuffmanDecoderInit()
  780. var f decompressor
  781. f.r = makeReader(r)
  782. f.bits = new([maxNumLit + maxNumDist]int)
  783. f.codebits = new([numCodes]int)
  784. f.step = (*decompressor).nextBlock
  785. f.dict.init(maxMatchOffset, nil)
  786. return &f
  787. }
  788. // NewReaderDict is like NewReader but initializes the reader
  789. // with a preset dictionary. The returned Reader behaves as if
  790. // the uncompressed data stream started with the given dictionary,
  791. // which has already been read. NewReaderDict is typically used
  792. // to read data compressed by NewWriterDict.
  793. //
  794. // The ReadCloser returned by NewReader also implements Resetter.
  795. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  796. fixedHuffmanDecoderInit()
  797. var f decompressor
  798. f.r = makeReader(r)
  799. f.bits = new([maxNumLit + maxNumDist]int)
  800. f.codebits = new([numCodes]int)
  801. f.step = (*decompressor).nextBlock
  802. f.dict.init(maxMatchOffset, dict)
  803. return &f
  804. }