raster_fixed.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package vector
  5. // This file contains a fixed point math implementation of the vector
  6. // graphics rasterizer.
  7. const (
  8. // ϕ is the number of binary digits after the fixed point.
  9. //
  10. // For example, if ϕ == 10 (and int1ϕ is based on the int32 type) then we
  11. // are using 22.10 fixed point math.
  12. //
  13. // When changing this number, also change the assembly code (search for ϕ
  14. // in the .s files).
  15. ϕ = 9
  16. fxOne int1ϕ = 1 << ϕ
  17. fxOneAndAHalf int1ϕ = 1<<ϕ + 1<<(ϕ-1)
  18. fxOneMinusIota int1ϕ = 1<<ϕ - 1 // Used for rounding up.
  19. )
  20. // int1ϕ is a signed fixed-point number with 1*ϕ binary digits after the fixed
  21. // point.
  22. type int1ϕ int32
  23. // int2ϕ is a signed fixed-point number with 2*ϕ binary digits after the fixed
  24. // point.
  25. //
  26. // The Rasterizer's bufU32 field, nominally of type []uint32 (since that slice
  27. // is also used by other code), can be thought of as a []int2ϕ during the
  28. // fixedLineTo method. Lines of code that are actually like:
  29. // buf[i] += uint32(etc) // buf has type []uint32.
  30. // can be thought of as
  31. // buf[i] += int2ϕ(etc) // buf has type []int2ϕ.
  32. type int2ϕ int32
  33. func fixedMax(x, y int1ϕ) int1ϕ {
  34. if x > y {
  35. return x
  36. }
  37. return y
  38. }
  39. func fixedMin(x, y int1ϕ) int1ϕ {
  40. if x < y {
  41. return x
  42. }
  43. return y
  44. }
  45. func fixedFloor(x int1ϕ) int32 { return int32(x >> ϕ) }
  46. func fixedCeil(x int1ϕ) int32 { return int32((x + fxOneMinusIota) >> ϕ) }
  47. func (z *Rasterizer) fixedLineTo(bx, by float32) {
  48. ax, ay := z.penX, z.penY
  49. z.penX, z.penY = bx, by
  50. dir := int1ϕ(1)
  51. if ay > by {
  52. dir, ax, ay, bx, by = -1, bx, by, ax, ay
  53. }
  54. // Horizontal line segments yield no change in coverage. Almost horizontal
  55. // segments would yield some change, in ideal math, but the computation
  56. // further below, involving 1 / (by - ay), is unstable in fixed point math,
  57. // so we treat the segment as if it was perfectly horizontal.
  58. if by-ay <= 0.000001 {
  59. return
  60. }
  61. dxdy := (bx - ax) / (by - ay)
  62. ayϕ := int1ϕ(ay * float32(fxOne))
  63. byϕ := int1ϕ(by * float32(fxOne))
  64. x := int1ϕ(ax * float32(fxOne))
  65. y := fixedFloor(ayϕ)
  66. yMax := fixedCeil(byϕ)
  67. if yMax > int32(z.size.Y) {
  68. yMax = int32(z.size.Y)
  69. }
  70. width := int32(z.size.X)
  71. for ; y < yMax; y++ {
  72. dy := fixedMin(int1ϕ(y+1)<<ϕ, byϕ) - fixedMax(int1ϕ(y)<<ϕ, ayϕ)
  73. xNext := x + int1ϕ(float32(dy)*dxdy)
  74. if y < 0 {
  75. x = xNext
  76. continue
  77. }
  78. buf := z.bufU32[y*width:]
  79. d := dy * dir // d ranges up to ±1<<(1*ϕ).
  80. x0, x1 := x, xNext
  81. if x > xNext {
  82. x0, x1 = x1, x0
  83. }
  84. x0i := fixedFloor(x0)
  85. x0Floor := int1ϕ(x0i) << ϕ
  86. x1i := fixedCeil(x1)
  87. x1Ceil := int1ϕ(x1i) << ϕ
  88. if x1i <= x0i+1 {
  89. xmf := (x+xNext)>>1 - x0Floor
  90. if i := clamp(x0i+0, width); i < uint(len(buf)) {
  91. buf[i] += uint32(d * (fxOne - xmf))
  92. }
  93. if i := clamp(x0i+1, width); i < uint(len(buf)) {
  94. buf[i] += uint32(d * xmf)
  95. }
  96. } else {
  97. oneOverS := x1 - x0
  98. twoOverS := 2 * oneOverS
  99. x0f := x0 - x0Floor
  100. oneMinusX0f := fxOne - x0f
  101. oneMinusX0fSquared := oneMinusX0f * oneMinusX0f
  102. x1f := x1 - x1Ceil + fxOne
  103. x1fSquared := x1f * x1f
  104. // These next two variables are unused, as rounding errors are
  105. // minimized when we delay the division by oneOverS for as long as
  106. // possible. These lines of code (and the "In ideal math" comments
  107. // below) are commented out instead of deleted in order to aid the
  108. // comparison with the floating point version of the rasterizer.
  109. //
  110. // a0 := ((oneMinusX0f * oneMinusX0f) >> 1) / oneOverS
  111. // am := ((x1f * x1f) >> 1) / oneOverS
  112. if i := clamp(x0i, width); i < uint(len(buf)) {
  113. // In ideal math: buf[i] += uint32(d * a0)
  114. D := oneMinusX0fSquared // D ranges up to ±1<<(2*ϕ).
  115. D *= d // D ranges up to ±1<<(3*ϕ).
  116. D /= twoOverS
  117. buf[i] += uint32(D)
  118. }
  119. if x1i == x0i+2 {
  120. if i := clamp(x0i+1, width); i < uint(len(buf)) {
  121. // In ideal math: buf[i] += uint32(d * (fxOne - a0 - am))
  122. //
  123. // (x1i == x0i+2) and (twoOverS == 2 * (x1 - x0)) implies
  124. // that twoOverS ranges up to +1<<(1*ϕ+2).
  125. D := twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared // D ranges up to ±1<<(2*ϕ+2).
  126. D *= d // D ranges up to ±1<<(3*ϕ+2).
  127. D /= twoOverS
  128. buf[i] += uint32(D)
  129. }
  130. } else {
  131. // This is commented out for the same reason as a0 and am.
  132. //
  133. // a1 := ((fxOneAndAHalf - x0f) << ϕ) / oneOverS
  134. if i := clamp(x0i+1, width); i < uint(len(buf)) {
  135. // In ideal math:
  136. // buf[i] += uint32(d * (a1 - a0))
  137. // or equivalently (but better in non-ideal, integer math,
  138. // with respect to rounding errors),
  139. // buf[i] += uint32(A * d / twoOverS)
  140. // where
  141. // A = (a1 - a0) * twoOverS
  142. // = a1*twoOverS - a0*twoOverS
  143. // Noting that twoOverS/oneOverS equals 2, substituting for
  144. // a0 and then a1, given above, yields:
  145. // A = a1*twoOverS - oneMinusX0fSquared
  146. // = (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared
  147. // = fxOneAndAHalf<<(ϕ+1) - x0f<<(ϕ+1) - oneMinusX0fSquared
  148. //
  149. // This is a positive number minus two non-negative
  150. // numbers. For an upper bound on A, the positive number is
  151. // P = fxOneAndAHalf<<(ϕ+1)
  152. // < (2*fxOne)<<(ϕ+1)
  153. // = fxOne<<(ϕ+2)
  154. // = 1<<(2*ϕ+2)
  155. //
  156. // For a lower bound on A, the two non-negative numbers are
  157. // N = x0f<<(ϕ+1) + oneMinusX0fSquared
  158. // ≤ x0f<<(ϕ+1) + fxOne*fxOne
  159. // = x0f<<(ϕ+1) + 1<<(2*ϕ)
  160. // < x0f<<(ϕ+1) + 1<<(2*ϕ+1)
  161. // ≤ fxOne<<(ϕ+1) + 1<<(2*ϕ+1)
  162. // = 1<<(2*ϕ+1) + 1<<(2*ϕ+1)
  163. // = 1<<(2*ϕ+2)
  164. //
  165. // Thus, A ranges up to ±1<<(2*ϕ+2). It is possible to
  166. // derive a tighter bound, but this bound is sufficient to
  167. // reason about overflow.
  168. D := (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared // D ranges up to ±1<<(2*ϕ+2).
  169. D *= d // D ranges up to ±1<<(3*ϕ+2).
  170. D /= twoOverS
  171. buf[i] += uint32(D)
  172. }
  173. dTimesS := uint32((d << (2 * ϕ)) / oneOverS)
  174. for xi := x0i + 2; xi < x1i-1; xi++ {
  175. if i := clamp(xi, width); i < uint(len(buf)) {
  176. buf[i] += dTimesS
  177. }
  178. }
  179. // This is commented out for the same reason as a0 and am.
  180. //
  181. // a2 := a1 + (int1ϕ(x1i-x0i-3)<<(2*ϕ))/oneOverS
  182. if i := clamp(x1i-1, width); i < uint(len(buf)) {
  183. // In ideal math:
  184. // buf[i] += uint32(d * (fxOne - a2 - am))
  185. // or equivalently (but better in non-ideal, integer math,
  186. // with respect to rounding errors),
  187. // buf[i] += uint32(A * d / twoOverS)
  188. // where
  189. // A = (fxOne - a2 - am) * twoOverS
  190. // = twoOverS<<ϕ - a2*twoOverS - am*twoOverS
  191. // Noting that twoOverS/oneOverS equals 2, substituting for
  192. // am and then a2, given above, yields:
  193. // A = twoOverS<<ϕ - a2*twoOverS - x1f*x1f
  194. // = twoOverS<<ϕ - a1*twoOverS - (int1ϕ(x1i-x0i-3)<<(2*ϕ))*2 - x1f*x1f
  195. // = twoOverS<<ϕ - a1*twoOverS - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
  196. // Substituting for a1, given above, yields:
  197. // A = twoOverS<<ϕ - ((fxOneAndAHalf-x0f)<<ϕ)*2 - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
  198. // = twoOverS<<ϕ - (fxOneAndAHalf-x0f)<<(ϕ+1) - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
  199. // = B<<ϕ - x1f*x1f
  200. // where
  201. // B = twoOverS - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  202. // = (x1-x0)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  203. //
  204. // Re-arranging the defintions given above:
  205. // x0Floor := int1ϕ(x0i) << ϕ
  206. // x0f := x0 - x0Floor
  207. // x1Ceil := int1ϕ(x1i) << ϕ
  208. // x1f := x1 - x1Ceil + fxOne
  209. // combined with fxOne = 1<<ϕ yields:
  210. // x0 = x0f + int1ϕ(x0i)<<ϕ
  211. // x1 = x1f + int1ϕ(x1i-1)<<ϕ
  212. // so that expanding (x1-x0) yields:
  213. // B = (x1f-x0f + int1ϕ(x1i-x0i-1)<<ϕ)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  214. // = (x1f-x0f)<<1 + int1ϕ(x1i-x0i-1)<<(ϕ+1) - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  215. // A large part of the second and fourth terms cancel:
  216. // B = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(-2)<<(ϕ+1)
  217. // = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 + 1<<(ϕ+2)
  218. // = (x1f - fxOneAndAHalf)<<1 + 1<<(ϕ+2)
  219. // The first term, (x1f - fxOneAndAHalf)<<1, is a negative
  220. // number, bounded below by -fxOneAndAHalf<<1, which is
  221. // greater than -fxOne<<2, or -1<<(ϕ+2). Thus, B ranges up
  222. // to ±1<<(ϕ+2). One final simplification:
  223. // B = x1f<<1 + (1<<(ϕ+2) - fxOneAndAHalf<<1)
  224. const C = 1<<(ϕ+2) - fxOneAndAHalf<<1
  225. D := x1f<<1 + C // D ranges up to ±1<<(1*ϕ+2).
  226. D <<= ϕ // D ranges up to ±1<<(2*ϕ+2).
  227. D -= x1fSquared // D ranges up to ±1<<(2*ϕ+3).
  228. D *= d // D ranges up to ±1<<(3*ϕ+3).
  229. D /= twoOverS
  230. buf[i] += uint32(D)
  231. }
  232. }
  233. if i := clamp(x1i, width); i < uint(len(buf)) {
  234. // In ideal math: buf[i] += uint32(d * am)
  235. D := x1fSquared // D ranges up to ±1<<(2*ϕ).
  236. D *= d // D ranges up to ±1<<(3*ϕ).
  237. D /= twoOverS
  238. buf[i] += uint32(D)
  239. }
  240. }
  241. x = xNext
  242. }
  243. }
  244. func fixedAccumulateOpOver(dst []uint8, src []uint32) {
  245. // Sanity check that len(dst) >= len(src).
  246. if len(dst) < len(src) {
  247. return
  248. }
  249. acc := int2ϕ(0)
  250. for i, v := range src {
  251. acc += int2ϕ(v)
  252. a := acc
  253. if a < 0 {
  254. a = -a
  255. }
  256. a >>= 2*ϕ - 16
  257. if a > 0xffff {
  258. a = 0xffff
  259. }
  260. // This algorithm comes from the standard library's image/draw package.
  261. dstA := uint32(dst[i]) * 0x101
  262. maskA := uint32(a)
  263. outA := dstA*(0xffff-maskA)/0xffff + maskA
  264. dst[i] = uint8(outA >> 8)
  265. }
  266. }
  267. func fixedAccumulateOpSrc(dst []uint8, src []uint32) {
  268. // Sanity check that len(dst) >= len(src).
  269. if len(dst) < len(src) {
  270. return
  271. }
  272. acc := int2ϕ(0)
  273. for i, v := range src {
  274. acc += int2ϕ(v)
  275. a := acc
  276. if a < 0 {
  277. a = -a
  278. }
  279. a >>= 2*ϕ - 8
  280. if a > 0xff {
  281. a = 0xff
  282. }
  283. dst[i] = uint8(a)
  284. }
  285. }
  286. func fixedAccumulateMask(buf []uint32) {
  287. acc := int2ϕ(0)
  288. for i, v := range buf {
  289. acc += int2ϕ(v)
  290. a := acc
  291. if a < 0 {
  292. a = -a
  293. }
  294. a >>= 2*ϕ - 16
  295. if a > 0xffff {
  296. a = 0xffff
  297. }
  298. buf[i] = uint32(a)
  299. }
  300. }