decode.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package vp8 implements a decoder for the VP8 lossy image format.
  5. //
  6. // The VP8 specification is RFC 6386.
  7. package vp8 // import "golang.org/x/image/vp8"
  8. // This file implements the top-level decoding algorithm.
  9. import (
  10. "errors"
  11. "image"
  12. "io"
  13. )
  14. // limitReader wraps an io.Reader to read at most n bytes from it.
  15. type limitReader struct {
  16. r io.Reader
  17. n int
  18. }
  19. // ReadFull reads exactly len(p) bytes into p.
  20. func (r *limitReader) ReadFull(p []byte) error {
  21. if len(p) > r.n {
  22. return io.ErrUnexpectedEOF
  23. }
  24. n, err := io.ReadFull(r.r, p)
  25. r.n -= n
  26. return err
  27. }
  28. // FrameHeader is a frame header, as specified in section 9.1.
  29. type FrameHeader struct {
  30. KeyFrame bool
  31. VersionNumber uint8
  32. ShowFrame bool
  33. FirstPartitionLen uint32
  34. Width int
  35. Height int
  36. XScale uint8
  37. YScale uint8
  38. }
  39. const (
  40. nSegment = 4
  41. nSegmentProb = 3
  42. )
  43. // segmentHeader holds segment-related header information.
  44. type segmentHeader struct {
  45. useSegment bool
  46. updateMap bool
  47. relativeDelta bool
  48. quantizer [nSegment]int8
  49. filterStrength [nSegment]int8
  50. prob [nSegmentProb]uint8
  51. }
  52. const (
  53. nRefLFDelta = 4
  54. nModeLFDelta = 4
  55. )
  56. // filterHeader holds filter-related header information.
  57. type filterHeader struct {
  58. simple bool
  59. level int8
  60. sharpness uint8
  61. useLFDelta bool
  62. refLFDelta [nRefLFDelta]int8
  63. modeLFDelta [nModeLFDelta]int8
  64. perSegmentLevel [nSegment]int8
  65. }
  66. // mb is the per-macroblock decode state. A decoder maintains mbw+1 of these
  67. // as it is decoding macroblocks left-to-right and top-to-bottom: mbw for the
  68. // macroblocks in the row above, and one for the macroblock to the left.
  69. type mb struct {
  70. // pred is the predictor mode for the 4 bottom or right 4x4 luma regions.
  71. pred [4]uint8
  72. // nzMask is a mask of 8 bits: 4 for the bottom or right 4x4 luma regions,
  73. // and 2 + 2 for the bottom or right 4x4 chroma regions. A 1 bit indicates
  74. // that that region has non-zero coefficients.
  75. nzMask uint8
  76. // nzY16 is a 0/1 value that is 1 if the macroblock used Y16 prediction and
  77. // had non-zero coefficients.
  78. nzY16 uint8
  79. }
  80. // Decoder decodes VP8 bitstreams into frames. Decoding one frame consists of
  81. // calling Init, DecodeFrameHeader and then DecodeFrame in that order.
  82. // A Decoder can be re-used to decode multiple frames.
  83. type Decoder struct {
  84. // r is the input bitsream.
  85. r limitReader
  86. // scratch is a scratch buffer.
  87. scratch [8]byte
  88. // img is the YCbCr image to decode into.
  89. img *image.YCbCr
  90. // mbw and mbh are the number of 16x16 macroblocks wide and high the image is.
  91. mbw, mbh int
  92. // frameHeader is the frame header. When decoding multiple frames,
  93. // frames that aren't key frames will inherit the Width, Height,
  94. // XScale and YScale of the most recent key frame.
  95. frameHeader FrameHeader
  96. // Other headers.
  97. segmentHeader segmentHeader
  98. filterHeader filterHeader
  99. // The image data is divided into a number of independent partitions.
  100. // There is 1 "first partition" and between 1 and 8 "other partitions"
  101. // for coefficient data.
  102. fp partition
  103. op [8]partition
  104. nOP int
  105. // Quantization factors.
  106. quant [nSegment]quant
  107. // DCT/WHT coefficient decoding probabilities.
  108. tokenProb [nPlane][nBand][nContext][nProb]uint8
  109. useSkipProb bool
  110. skipProb uint8
  111. // Loop filter parameters.
  112. filterParams [nSegment][2]filterParam
  113. perMBFilterParams []filterParam
  114. // The eight fields below relate to the current macroblock being decoded.
  115. //
  116. // Segment-based adjustments.
  117. segment int
  118. // Per-macroblock state for the macroblock immediately left of and those
  119. // macroblocks immediately above the current macroblock.
  120. leftMB mb
  121. upMB []mb
  122. // Bitmasks for which 4x4 regions of coeff contain non-zero coefficients.
  123. nzDCMask, nzACMask uint32
  124. // Predictor modes.
  125. usePredY16 bool // The libwebp C code calls this !is_i4x4_.
  126. predY16 uint8
  127. predC8 uint8
  128. predY4 [4][4]uint8
  129. // The two fields below form a workspace for reconstructing a macroblock.
  130. // Their specific sizes are documented in reconstruct.go.
  131. coeff [1*16*16 + 2*8*8 + 1*4*4]int16
  132. ybr [1 + 16 + 1 + 8][32]uint8
  133. }
  134. // NewDecoder returns a new Decoder.
  135. func NewDecoder() *Decoder {
  136. return &Decoder{}
  137. }
  138. // Init initializes the decoder to read at most n bytes from r.
  139. func (d *Decoder) Init(r io.Reader, n int) {
  140. d.r = limitReader{r, n}
  141. }
  142. // DecodeFrameHeader decodes the frame header.
  143. func (d *Decoder) DecodeFrameHeader() (fh FrameHeader, err error) {
  144. // All frame headers are at least 3 bytes long.
  145. b := d.scratch[:3]
  146. if err = d.r.ReadFull(b); err != nil {
  147. return
  148. }
  149. d.frameHeader.KeyFrame = (b[0] & 1) == 0
  150. d.frameHeader.VersionNumber = (b[0] >> 1) & 7
  151. d.frameHeader.ShowFrame = (b[0]>>4)&1 == 1
  152. d.frameHeader.FirstPartitionLen = uint32(b[0])>>5 | uint32(b[1])<<3 | uint32(b[2])<<11
  153. if !d.frameHeader.KeyFrame {
  154. return d.frameHeader, nil
  155. }
  156. // Frame headers for key frames are an additional 7 bytes long.
  157. b = d.scratch[:7]
  158. if err = d.r.ReadFull(b); err != nil {
  159. return
  160. }
  161. // Check the magic sync code.
  162. if b[0] != 0x9d || b[1] != 0x01 || b[2] != 0x2a {
  163. err = errors.New("vp8: invalid format")
  164. return
  165. }
  166. d.frameHeader.Width = int(b[4]&0x3f)<<8 | int(b[3])
  167. d.frameHeader.Height = int(b[6]&0x3f)<<8 | int(b[5])
  168. d.frameHeader.XScale = b[4] >> 6
  169. d.frameHeader.YScale = b[6] >> 6
  170. d.mbw = (d.frameHeader.Width + 0x0f) >> 4
  171. d.mbh = (d.frameHeader.Height + 0x0f) >> 4
  172. d.segmentHeader = segmentHeader{
  173. prob: [3]uint8{0xff, 0xff, 0xff},
  174. }
  175. d.tokenProb = defaultTokenProb
  176. d.segment = 0
  177. return d.frameHeader, nil
  178. }
  179. // ensureImg ensures that d.img is large enough to hold the decoded frame.
  180. func (d *Decoder) ensureImg() {
  181. if d.img != nil {
  182. p0, p1 := d.img.Rect.Min, d.img.Rect.Max
  183. if p0.X == 0 && p0.Y == 0 && p1.X >= 16*d.mbw && p1.Y >= 16*d.mbh {
  184. return
  185. }
  186. }
  187. m := image.NewYCbCr(image.Rect(0, 0, 16*d.mbw, 16*d.mbh), image.YCbCrSubsampleRatio420)
  188. d.img = m.SubImage(image.Rect(0, 0, d.frameHeader.Width, d.frameHeader.Height)).(*image.YCbCr)
  189. d.perMBFilterParams = make([]filterParam, d.mbw*d.mbh)
  190. d.upMB = make([]mb, d.mbw)
  191. }
  192. // parseSegmentHeader parses the segment header, as specified in section 9.3.
  193. func (d *Decoder) parseSegmentHeader() {
  194. d.segmentHeader.useSegment = d.fp.readBit(uniformProb)
  195. if !d.segmentHeader.useSegment {
  196. d.segmentHeader.updateMap = false
  197. return
  198. }
  199. d.segmentHeader.updateMap = d.fp.readBit(uniformProb)
  200. if d.fp.readBit(uniformProb) {
  201. d.segmentHeader.relativeDelta = !d.fp.readBit(uniformProb)
  202. for i := range d.segmentHeader.quantizer {
  203. d.segmentHeader.quantizer[i] = int8(d.fp.readOptionalInt(uniformProb, 7))
  204. }
  205. for i := range d.segmentHeader.filterStrength {
  206. d.segmentHeader.filterStrength[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
  207. }
  208. }
  209. if !d.segmentHeader.updateMap {
  210. return
  211. }
  212. for i := range d.segmentHeader.prob {
  213. if d.fp.readBit(uniformProb) {
  214. d.segmentHeader.prob[i] = uint8(d.fp.readUint(uniformProb, 8))
  215. } else {
  216. d.segmentHeader.prob[i] = 0xff
  217. }
  218. }
  219. }
  220. // parseFilterHeader parses the filter header, as specified in section 9.4.
  221. func (d *Decoder) parseFilterHeader() {
  222. d.filterHeader.simple = d.fp.readBit(uniformProb)
  223. d.filterHeader.level = int8(d.fp.readUint(uniformProb, 6))
  224. d.filterHeader.sharpness = uint8(d.fp.readUint(uniformProb, 3))
  225. d.filterHeader.useLFDelta = d.fp.readBit(uniformProb)
  226. if d.filterHeader.useLFDelta && d.fp.readBit(uniformProb) {
  227. for i := range d.filterHeader.refLFDelta {
  228. d.filterHeader.refLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
  229. }
  230. for i := range d.filterHeader.modeLFDelta {
  231. d.filterHeader.modeLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
  232. }
  233. }
  234. if d.filterHeader.level == 0 {
  235. return
  236. }
  237. if d.segmentHeader.useSegment {
  238. for i := range d.filterHeader.perSegmentLevel {
  239. strength := d.segmentHeader.filterStrength[i]
  240. if d.segmentHeader.relativeDelta {
  241. strength += d.filterHeader.level
  242. }
  243. d.filterHeader.perSegmentLevel[i] = strength
  244. }
  245. } else {
  246. d.filterHeader.perSegmentLevel[0] = d.filterHeader.level
  247. }
  248. d.computeFilterParams()
  249. }
  250. // parseOtherPartitions parses the other partitions, as specified in section 9.5.
  251. func (d *Decoder) parseOtherPartitions() error {
  252. const maxNOP = 1 << 3
  253. var partLens [maxNOP]int
  254. d.nOP = 1 << d.fp.readUint(uniformProb, 2)
  255. // The final partition length is implied by the the remaining chunk data
  256. // (d.r.n) and the other d.nOP-1 partition lengths. Those d.nOP-1 partition
  257. // lengths are stored as 24-bit uints, i.e. up to 16 MiB per partition.
  258. n := 3 * (d.nOP - 1)
  259. partLens[d.nOP-1] = d.r.n - n
  260. if partLens[d.nOP-1] < 0 {
  261. return io.ErrUnexpectedEOF
  262. }
  263. if n > 0 {
  264. buf := make([]byte, n)
  265. if err := d.r.ReadFull(buf); err != nil {
  266. return err
  267. }
  268. for i := 0; i < d.nOP-1; i++ {
  269. pl := int(buf[3*i+0]) | int(buf[3*i+1])<<8 | int(buf[3*i+2])<<16
  270. if pl > partLens[d.nOP-1] {
  271. return io.ErrUnexpectedEOF
  272. }
  273. partLens[i] = pl
  274. partLens[d.nOP-1] -= pl
  275. }
  276. }
  277. // We check if the final partition length can also fit into a 24-bit uint.
  278. // Strictly speaking, this isn't part of the spec, but it guards against a
  279. // malicious WEBP image that is too large to ReadFull the encoded DCT
  280. // coefficients into memory, whether that's because the actual WEBP file is
  281. // too large, or whether its RIFF metadata lists too large a chunk.
  282. if 1<<24 <= partLens[d.nOP-1] {
  283. return errors.New("vp8: too much data to decode")
  284. }
  285. buf := make([]byte, d.r.n)
  286. if err := d.r.ReadFull(buf); err != nil {
  287. return err
  288. }
  289. for i, pl := range partLens {
  290. if i == d.nOP {
  291. break
  292. }
  293. d.op[i].init(buf[:pl])
  294. buf = buf[pl:]
  295. }
  296. return nil
  297. }
  298. // parseOtherHeaders parses header information other than the frame header.
  299. func (d *Decoder) parseOtherHeaders() error {
  300. // Initialize and parse the first partition.
  301. firstPartition := make([]byte, d.frameHeader.FirstPartitionLen)
  302. if err := d.r.ReadFull(firstPartition); err != nil {
  303. return err
  304. }
  305. d.fp.init(firstPartition)
  306. if d.frameHeader.KeyFrame {
  307. // Read and ignore the color space and pixel clamp values. They are
  308. // specified in section 9.2, but are unimplemented.
  309. d.fp.readBit(uniformProb)
  310. d.fp.readBit(uniformProb)
  311. }
  312. d.parseSegmentHeader()
  313. d.parseFilterHeader()
  314. if err := d.parseOtherPartitions(); err != nil {
  315. return err
  316. }
  317. d.parseQuant()
  318. if !d.frameHeader.KeyFrame {
  319. // Golden and AltRef frames are specified in section 9.7.
  320. // TODO(nigeltao): implement. Note that they are only used for video, not still images.
  321. return errors.New("vp8: Golden / AltRef frames are not implemented")
  322. }
  323. // Read and ignore the refreshLastFrameBuffer bit, specified in section 9.8.
  324. // It applies only to video, and not still images.
  325. d.fp.readBit(uniformProb)
  326. d.parseTokenProb()
  327. d.useSkipProb = d.fp.readBit(uniformProb)
  328. if d.useSkipProb {
  329. d.skipProb = uint8(d.fp.readUint(uniformProb, 8))
  330. }
  331. if d.fp.unexpectedEOF {
  332. return io.ErrUnexpectedEOF
  333. }
  334. return nil
  335. }
  336. // DecodeFrame decodes the frame and returns it as an YCbCr image.
  337. // The image's contents are valid up until the next call to Decoder.Init.
  338. func (d *Decoder) DecodeFrame() (*image.YCbCr, error) {
  339. d.ensureImg()
  340. if err := d.parseOtherHeaders(); err != nil {
  341. return nil, err
  342. }
  343. // Reconstruct the rows.
  344. for mbx := 0; mbx < d.mbw; mbx++ {
  345. d.upMB[mbx] = mb{}
  346. }
  347. for mby := 0; mby < d.mbh; mby++ {
  348. d.leftMB = mb{}
  349. for mbx := 0; mbx < d.mbw; mbx++ {
  350. skip := d.reconstruct(mbx, mby)
  351. fs := d.filterParams[d.segment][btou(!d.usePredY16)]
  352. fs.inner = fs.inner || !skip
  353. d.perMBFilterParams[d.mbw*mby+mbx] = fs
  354. }
  355. }
  356. if d.fp.unexpectedEOF {
  357. return nil, io.ErrUnexpectedEOF
  358. }
  359. for i := 0; i < d.nOP; i++ {
  360. if d.op[i].unexpectedEOF {
  361. return nil, io.ErrUnexpectedEOF
  362. }
  363. }
  364. // Apply the loop filter.
  365. //
  366. // Even if we are using per-segment levels, section 15 says that "loop
  367. // filtering must be skipped entirely if loop_filter_level at either the
  368. // frame header level or macroblock override level is 0".
  369. if d.filterHeader.level != 0 {
  370. if d.filterHeader.simple {
  371. d.simpleFilter()
  372. } else {
  373. d.normalFilter()
  374. }
  375. }
  376. return d.img, nil
  377. }