reconstruct.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package vp8
  5. // This file implements decoding DCT/WHT residual coefficients and
  6. // reconstructing YCbCr data equal to predicted values plus residuals.
  7. //
  8. // There are 1*16*16 + 2*8*8 + 1*4*4 coefficients per macroblock:
  9. // - 1*16*16 luma DCT coefficients,
  10. // - 2*8*8 chroma DCT coefficients, and
  11. // - 1*4*4 luma WHT coefficients.
  12. // Coefficients are read in lots of 16, and the later coefficients in each lot
  13. // are often zero.
  14. //
  15. // The YCbCr data consists of 1*16*16 luma values and 2*8*8 chroma values,
  16. // plus previously decoded values along the top and left borders. The combined
  17. // values are laid out as a [1+16+1+8][32]uint8 so that vertically adjacent
  18. // samples are 32 bytes apart. In detail, the layout is:
  19. //
  20. // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  21. // . . . . . . . a b b b b b b b b b b b b b b b b c c c c . . . . 0
  22. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 1
  23. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 2
  24. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 3
  25. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 4
  26. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 5
  27. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 6
  28. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 7
  29. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 8
  30. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 9
  31. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 10
  32. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 11
  33. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 12
  34. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 13
  35. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 14
  36. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 15
  37. // . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 16
  38. // . . . . . . . e f f f f f f f f . . . . . . . g h h h h h h h h 17
  39. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 18
  40. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 19
  41. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 20
  42. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 21
  43. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 22
  44. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 23
  45. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 24
  46. // . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 25
  47. //
  48. // Y, B and R are the reconstructed luma (Y) and chroma (B, R) values.
  49. // The Y values are predicted (either as one 16x16 region or 16 4x4 regions)
  50. // based on the row above's Y values (some combination of {abc} or {dYC}) and
  51. // the column left's Y values (either {ad} or {bY}). Similarly, B and R values
  52. // are predicted on the row above and column left of their respective 8x8
  53. // region: {efi} for B, {ghj} for R.
  54. //
  55. // For uppermost macroblocks (i.e. those with mby == 0), the {abcefgh} values
  56. // are initialized to 0x81. Otherwise, they are copied from the bottom row of
  57. // the macroblock above. The {c} values are then duplicated from row 0 to rows
  58. // 4, 8 and 12 of the ybr workspace.
  59. // Similarly, for leftmost macroblocks (i.e. those with mbx == 0), the {adeigj}
  60. // values are initialized to 0x7f. Otherwise, they are copied from the right
  61. // column of the macroblock to the left.
  62. // For the top-left macroblock (with mby == 0 && mbx == 0), {aeg} is 0x81.
  63. //
  64. // When moving from one macroblock to the next horizontally, the {adeigj}
  65. // values can simply be copied from the workspace to itself, shifted by 8 or
  66. // 16 columns. When moving from one macroblock to the next vertically,
  67. // filtering can occur and hence the row values have to be copied from the
  68. // post-filtered image instead of the pre-filtered workspace.
  69. const (
  70. bCoeffBase = 1*16*16 + 0*8*8
  71. rCoeffBase = 1*16*16 + 1*8*8
  72. whtCoeffBase = 1*16*16 + 2*8*8
  73. )
  74. const (
  75. ybrYX = 8
  76. ybrYY = 1
  77. ybrBX = 8
  78. ybrBY = 18
  79. ybrRX = 24
  80. ybrRY = 18
  81. )
  82. // prepareYBR prepares the {abcdefghij} elements of ybr.
  83. func (d *Decoder) prepareYBR(mbx, mby int) {
  84. if mbx == 0 {
  85. for y := 0; y < 17; y++ {
  86. d.ybr[y][7] = 0x81
  87. }
  88. for y := 17; y < 26; y++ {
  89. d.ybr[y][7] = 0x81
  90. d.ybr[y][23] = 0x81
  91. }
  92. } else {
  93. for y := 0; y < 17; y++ {
  94. d.ybr[y][7] = d.ybr[y][7+16]
  95. }
  96. for y := 17; y < 26; y++ {
  97. d.ybr[y][7] = d.ybr[y][15]
  98. d.ybr[y][23] = d.ybr[y][31]
  99. }
  100. }
  101. if mby == 0 {
  102. for x := 7; x < 28; x++ {
  103. d.ybr[0][x] = 0x7f
  104. }
  105. for x := 7; x < 16; x++ {
  106. d.ybr[17][x] = 0x7f
  107. }
  108. for x := 23; x < 32; x++ {
  109. d.ybr[17][x] = 0x7f
  110. }
  111. } else {
  112. for i := 0; i < 16; i++ {
  113. d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
  114. }
  115. for i := 0; i < 8; i++ {
  116. d.ybr[17][8+i] = d.img.Cb[(8*mby-1)*d.img.CStride+8*mbx+i]
  117. }
  118. for i := 0; i < 8; i++ {
  119. d.ybr[17][24+i] = d.img.Cr[(8*mby-1)*d.img.CStride+8*mbx+i]
  120. }
  121. if mbx == d.mbw-1 {
  122. for i := 16; i < 20; i++ {
  123. d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+15]
  124. }
  125. } else {
  126. for i := 16; i < 20; i++ {
  127. d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
  128. }
  129. }
  130. }
  131. for y := 4; y < 16; y += 4 {
  132. d.ybr[y][24] = d.ybr[0][24]
  133. d.ybr[y][25] = d.ybr[0][25]
  134. d.ybr[y][26] = d.ybr[0][26]
  135. d.ybr[y][27] = d.ybr[0][27]
  136. }
  137. }
  138. // btou converts a bool to a 0/1 value.
  139. func btou(b bool) uint8 {
  140. if b {
  141. return 1
  142. }
  143. return 0
  144. }
  145. // pack packs four 0/1 values into four bits of a uint32.
  146. func pack(x [4]uint8, shift int) uint32 {
  147. u := uint32(x[0])<<0 | uint32(x[1])<<1 | uint32(x[2])<<2 | uint32(x[3])<<3
  148. return u << uint(shift)
  149. }
  150. // unpack unpacks four 0/1 values from a four-bit value.
  151. var unpack = [16][4]uint8{
  152. {0, 0, 0, 0},
  153. {1, 0, 0, 0},
  154. {0, 1, 0, 0},
  155. {1, 1, 0, 0},
  156. {0, 0, 1, 0},
  157. {1, 0, 1, 0},
  158. {0, 1, 1, 0},
  159. {1, 1, 1, 0},
  160. {0, 0, 0, 1},
  161. {1, 0, 0, 1},
  162. {0, 1, 0, 1},
  163. {1, 1, 0, 1},
  164. {0, 0, 1, 1},
  165. {1, 0, 1, 1},
  166. {0, 1, 1, 1},
  167. {1, 1, 1, 1},
  168. }
  169. var (
  170. // The mapping from 4x4 region position to band is specified in section 13.3.
  171. bands = [17]uint8{0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0}
  172. // Category probabilties are specified in section 13.2.
  173. // Decoding categories 1 and 2 are done inline.
  174. cat3456 = [4][12]uint8{
  175. {173, 148, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  176. {176, 155, 140, 135, 0, 0, 0, 0, 0, 0, 0, 0},
  177. {180, 157, 141, 134, 130, 0, 0, 0, 0, 0, 0, 0},
  178. {254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0},
  179. }
  180. // The zigzag order is:
  181. // 0 1 5 6
  182. // 2 4 7 12
  183. // 3 8 11 13
  184. // 9 10 14 15
  185. zigzag = [16]uint8{0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15}
  186. )
  187. // parseResiduals4 parses a 4x4 region of residual coefficients, as specified
  188. // in section 13.3, and returns a 0/1 value indicating whether there was at
  189. // least one non-zero coefficient.
  190. // r is the partition to read bits from.
  191. // plane and context describe which token probability table to use. context is
  192. // either 0, 1 or 2, and equals how many of the macroblock left and macroblock
  193. // above have non-zero coefficients.
  194. // quant are the DC/AC quantization factors.
  195. // skipFirstCoeff is whether the DC coefficient has already been parsed.
  196. // coeffBase is the base index of d.coeff to write to.
  197. func (d *Decoder) parseResiduals4(r *partition, plane int, context uint8, quant [2]uint16, skipFirstCoeff bool, coeffBase int) uint8 {
  198. prob, n := &d.tokenProb[plane], 0
  199. if skipFirstCoeff {
  200. n = 1
  201. }
  202. p := prob[bands[n]][context]
  203. if !r.readBit(p[0]) {
  204. return 0
  205. }
  206. for n != 16 {
  207. n++
  208. if !r.readBit(p[1]) {
  209. p = prob[bands[n]][0]
  210. continue
  211. }
  212. var v uint32
  213. if !r.readBit(p[2]) {
  214. v = 1
  215. p = prob[bands[n]][1]
  216. } else {
  217. if !r.readBit(p[3]) {
  218. if !r.readBit(p[4]) {
  219. v = 2
  220. } else {
  221. v = 3 + r.readUint(p[5], 1)
  222. }
  223. } else if !r.readBit(p[6]) {
  224. if !r.readBit(p[7]) {
  225. // Category 1.
  226. v = 5 + r.readUint(159, 1)
  227. } else {
  228. // Category 2.
  229. v = 7 + 2*r.readUint(165, 1) + r.readUint(145, 1)
  230. }
  231. } else {
  232. // Categories 3, 4, 5 or 6.
  233. b1 := r.readUint(p[8], 1)
  234. b0 := r.readUint(p[9+b1], 1)
  235. cat := 2*b1 + b0
  236. tab := &cat3456[cat]
  237. v = 0
  238. for i := 0; tab[i] != 0; i++ {
  239. v *= 2
  240. v += r.readUint(tab[i], 1)
  241. }
  242. v += 3 + (8 << cat)
  243. }
  244. p = prob[bands[n]][2]
  245. }
  246. z := zigzag[n-1]
  247. c := int32(v) * int32(quant[btou(z > 0)])
  248. if r.readBit(uniformProb) {
  249. c = -c
  250. }
  251. d.coeff[coeffBase+int(z)] = int16(c)
  252. if n == 16 || !r.readBit(p[0]) {
  253. return 1
  254. }
  255. }
  256. return 1
  257. }
  258. // parseResiduals parses the residuals and returns whether inner loop filtering
  259. // should be skipped for this macroblock.
  260. func (d *Decoder) parseResiduals(mbx, mby int) (skip bool) {
  261. partition := &d.op[mby&(d.nOP-1)]
  262. plane := planeY1SansY2
  263. quant := &d.quant[d.segment]
  264. // Parse the DC coefficient of each 4x4 luma region.
  265. if d.usePredY16 {
  266. nz := d.parseResiduals4(partition, planeY2, d.leftMB.nzY16+d.upMB[mbx].nzY16, quant.y2, false, whtCoeffBase)
  267. d.leftMB.nzY16 = nz
  268. d.upMB[mbx].nzY16 = nz
  269. d.inverseWHT16()
  270. plane = planeY1WithY2
  271. }
  272. var (
  273. nzDC, nzAC [4]uint8
  274. nzDCMask, nzACMask uint32
  275. coeffBase int
  276. )
  277. // Parse the luma coefficients.
  278. lnz := unpack[d.leftMB.nzMask&0x0f]
  279. unz := unpack[d.upMB[mbx].nzMask&0x0f]
  280. for y := 0; y < 4; y++ {
  281. nz := lnz[y]
  282. for x := 0; x < 4; x++ {
  283. nz = d.parseResiduals4(partition, plane, nz+unz[x], quant.y1, d.usePredY16, coeffBase)
  284. unz[x] = nz
  285. nzAC[x] = nz
  286. nzDC[x] = btou(d.coeff[coeffBase] != 0)
  287. coeffBase += 16
  288. }
  289. lnz[y] = nz
  290. nzDCMask |= pack(nzDC, y*4)
  291. nzACMask |= pack(nzAC, y*4)
  292. }
  293. lnzMask := pack(lnz, 0)
  294. unzMask := pack(unz, 0)
  295. // Parse the chroma coefficients.
  296. lnz = unpack[d.leftMB.nzMask>>4]
  297. unz = unpack[d.upMB[mbx].nzMask>>4]
  298. for c := 0; c < 4; c += 2 {
  299. for y := 0; y < 2; y++ {
  300. nz := lnz[y+c]
  301. for x := 0; x < 2; x++ {
  302. nz = d.parseResiduals4(partition, planeUV, nz+unz[x+c], quant.uv, false, coeffBase)
  303. unz[x+c] = nz
  304. nzAC[y*2+x] = nz
  305. nzDC[y*2+x] = btou(d.coeff[coeffBase] != 0)
  306. coeffBase += 16
  307. }
  308. lnz[y+c] = nz
  309. }
  310. nzDCMask |= pack(nzDC, 16+c*2)
  311. nzACMask |= pack(nzAC, 16+c*2)
  312. }
  313. lnzMask |= pack(lnz, 4)
  314. unzMask |= pack(unz, 4)
  315. // Save decoder state.
  316. d.leftMB.nzMask = uint8(lnzMask)
  317. d.upMB[mbx].nzMask = uint8(unzMask)
  318. d.nzDCMask = nzDCMask
  319. d.nzACMask = nzACMask
  320. // Section 15.1 of the spec says that "Steps 2 and 4 [of the loop filter]
  321. // are skipped... [if] there is no DCT coefficient coded for the whole
  322. // macroblock."
  323. return nzDCMask == 0 && nzACMask == 0
  324. }
  325. // reconstructMacroblock applies the predictor functions and adds the inverse-
  326. // DCT transformed residuals to recover the YCbCr data.
  327. func (d *Decoder) reconstructMacroblock(mbx, mby int) {
  328. if d.usePredY16 {
  329. p := checkTopLeftPred(mbx, mby, d.predY16)
  330. predFunc16[p](d, 1, 8)
  331. for j := 0; j < 4; j++ {
  332. for i := 0; i < 4; i++ {
  333. n := 4*j + i
  334. y := 4*j + 1
  335. x := 4*i + 8
  336. mask := uint32(1) << uint(n)
  337. if d.nzACMask&mask != 0 {
  338. d.inverseDCT4(y, x, 16*n)
  339. } else if d.nzDCMask&mask != 0 {
  340. d.inverseDCT4DCOnly(y, x, 16*n)
  341. }
  342. }
  343. }
  344. } else {
  345. for j := 0; j < 4; j++ {
  346. for i := 0; i < 4; i++ {
  347. n := 4*j + i
  348. y := 4*j + 1
  349. x := 4*i + 8
  350. predFunc4[d.predY4[j][i]](d, y, x)
  351. mask := uint32(1) << uint(n)
  352. if d.nzACMask&mask != 0 {
  353. d.inverseDCT4(y, x, 16*n)
  354. } else if d.nzDCMask&mask != 0 {
  355. d.inverseDCT4DCOnly(y, x, 16*n)
  356. }
  357. }
  358. }
  359. }
  360. p := checkTopLeftPred(mbx, mby, d.predC8)
  361. predFunc8[p](d, ybrBY, ybrBX)
  362. if d.nzACMask&0x0f0000 != 0 {
  363. d.inverseDCT8(ybrBY, ybrBX, bCoeffBase)
  364. } else if d.nzDCMask&0x0f0000 != 0 {
  365. d.inverseDCT8DCOnly(ybrBY, ybrBX, bCoeffBase)
  366. }
  367. predFunc8[p](d, ybrRY, ybrRX)
  368. if d.nzACMask&0xf00000 != 0 {
  369. d.inverseDCT8(ybrRY, ybrRX, rCoeffBase)
  370. } else if d.nzDCMask&0xf00000 != 0 {
  371. d.inverseDCT8DCOnly(ybrRY, ybrRX, rCoeffBase)
  372. }
  373. }
  374. // reconstruct reconstructs one macroblock and returns whether inner loop
  375. // filtering should be skipped for it.
  376. func (d *Decoder) reconstruct(mbx, mby int) (skip bool) {
  377. if d.segmentHeader.updateMap {
  378. if !d.fp.readBit(d.segmentHeader.prob[0]) {
  379. d.segment = int(d.fp.readUint(d.segmentHeader.prob[1], 1))
  380. } else {
  381. d.segment = int(d.fp.readUint(d.segmentHeader.prob[2], 1)) + 2
  382. }
  383. }
  384. if d.useSkipProb {
  385. skip = d.fp.readBit(d.skipProb)
  386. }
  387. // Prepare the workspace.
  388. for i := range d.coeff {
  389. d.coeff[i] = 0
  390. }
  391. d.prepareYBR(mbx, mby)
  392. // Parse the predictor modes.
  393. d.usePredY16 = d.fp.readBit(145)
  394. if d.usePredY16 {
  395. d.parsePredModeY16(mbx)
  396. } else {
  397. d.parsePredModeY4(mbx)
  398. }
  399. d.parsePredModeC8()
  400. // Parse the residuals.
  401. if !skip {
  402. skip = d.parseResiduals(mbx, mby)
  403. } else {
  404. if d.usePredY16 {
  405. d.leftMB.nzY16 = 0
  406. d.upMB[mbx].nzY16 = 0
  407. }
  408. d.leftMB.nzMask = 0
  409. d.upMB[mbx].nzMask = 0
  410. d.nzDCMask = 0
  411. d.nzACMask = 0
  412. }
  413. // Reconstruct the YCbCr data and copy it to the image.
  414. d.reconstructMacroblock(mbx, mby)
  415. for i, y := (mby*d.img.YStride+mbx)*16, 0; y < 16; i, y = i+d.img.YStride, y+1 {
  416. copy(d.img.Y[i:i+16], d.ybr[ybrYY+y][ybrYX:ybrYX+16])
  417. }
  418. for i, y := (mby*d.img.CStride+mbx)*8, 0; y < 8; i, y = i+d.img.CStride, y+1 {
  419. copy(d.img.Cb[i:i+8], d.ybr[ybrBY+y][ybrBX:ybrBX+8])
  420. copy(d.img.Cr[i:i+8], d.ybr[ybrRY+y][ybrRX:ybrRX+8])
  421. }
  422. return skip
  423. }