Răsfoiți Sursa

Merge pull request #81 from orisano/master

improve performance of Resize
Grigory Dryapak 7 ani în urmă
părinte
comite
9458da53d1
2 a modificat fișierele cu 147 adăugiri și 108 ștergeri
  1. 22 18
      resize.go
  2. 125 90
      scanner.go

+ 22 - 18
resize.go

@@ -116,23 +116,25 @@ func resizeHorizontal(img image.Image, width int, filter ResampleFilter) *image.
 		for y := range ys {
 			src.scan(0, y, src.w, y+1, scanLine)
 			j0 := y * dst.Stride
-			for x := 0; x < width; x++ {
+			for x := range weights {
 				var r, g, b, a float64
 				for _, w := range weights[x] {
 					i := w.index * 4
-					aw := float64(scanLine[i+3]) * w.weight
-					r += float64(scanLine[i+0]) * aw
-					g += float64(scanLine[i+1]) * aw
-					b += float64(scanLine[i+2]) * aw
+					s := scanLine[i : i+4 : i+4]
+					aw := float64(s[3]) * w.weight
+					r += float64(s[0]) * aw
+					g += float64(s[1]) * aw
+					b += float64(s[2]) * aw
 					a += aw
 				}
 				if a != 0 {
 					aInv := 1 / a
 					j := j0 + x*4
-					dst.Pix[j+0] = clamp(r * aInv)
-					dst.Pix[j+1] = clamp(g * aInv)
-					dst.Pix[j+2] = clamp(b * aInv)
-					dst.Pix[j+3] = clamp(a)
+					d := dst.Pix[j : j+4 : j+4]
+					d[0] = clamp(r * aInv)
+					d[1] = clamp(g * aInv)
+					d[2] = clamp(b * aInv)
+					d[3] = clamp(a)
 				}
 			}
 		}
@@ -148,23 +150,25 @@ func resizeVertical(img image.Image, height int, filter ResampleFilter) *image.N
 		scanLine := make([]uint8, src.h*4)
 		for x := range xs {
 			src.scan(x, 0, x+1, src.h, scanLine)
-			for y := 0; y < height; y++ {
+			for y := range weights {
 				var r, g, b, a float64
 				for _, w := range weights[y] {
 					i := w.index * 4
-					aw := float64(scanLine[i+3]) * w.weight
-					r += float64(scanLine[i+0]) * aw
-					g += float64(scanLine[i+1]) * aw
-					b += float64(scanLine[i+2]) * aw
+					s := scanLine[i : i+4 : i+4]
+					aw := float64(s[3]) * w.weight
+					r += float64(s[0]) * aw
+					g += float64(s[1]) * aw
+					b += float64(s[2]) * aw
 					a += aw
 				}
 				if a != 0 {
 					aInv := 1 / a
 					j := y*dst.Stride + x*4
-					dst.Pix[j+0] = clamp(r * aInv)
-					dst.Pix[j+1] = clamp(g * aInv)
-					dst.Pix[j+2] = clamp(b * aInv)
-					dst.Pix[j+3] = clamp(a)
+					d := dst.Pix[j : j+4 : j+4]
+					d[0] = clamp(r * aInv)
+					d[1] = clamp(g * aInv)
+					d[2] = clamp(b * aInv)
+					d[3] = clamp(a)
 				}
 			}
 		}

+ 125 - 90
scanner.go

@@ -33,10 +33,23 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 		size := (x2 - x1) * 4
 		j := 0
 		i := y1*img.Stride + x1*4
-		for y := y1; y < y2; y++ {
-			copy(dst[j:j+size], img.Pix[i:i+size])
-			j += size
-			i += img.Stride
+		if size == 4 {
+			for y := y1; y < y2; y++ {
+				d := dst[j : j+4 : j+4]
+				s := img.Pix[i : i+4 : i+4]
+				d[0] = s[0]
+				d[1] = s[1]
+				d[2] = s[2]
+				d[3] = s[3]
+				j += size
+				i += img.Stride
+			}
+		} else {
+			for y := y1; y < y2; y++ {
+				copy(dst[j:j+size], img.Pix[i:i+size])
+				j += size
+				i += img.Stride
+			}
 		}
 
 	case *image.NRGBA64:
@@ -44,10 +57,12 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 		for y := y1; y < y2; y++ {
 			i := y*img.Stride + x1*8
 			for x := x1; x < x2; x++ {
-				dst[j+0] = img.Pix[i+0]
-				dst[j+1] = img.Pix[i+2]
-				dst[j+2] = img.Pix[i+4]
-				dst[j+3] = img.Pix[i+6]
+				s := img.Pix[i : i+8 : i+8]
+				d := dst[j : j+4 : j+4]
+				d[0] = s[0]
+				d[1] = s[2]
+				d[2] = s[4]
+				d[3] = s[6]
 				j += 4
 				i += 8
 			}
@@ -58,26 +73,31 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 		for y := y1; y < y2; y++ {
 			i := y*img.Stride + x1*4
 			for x := x1; x < x2; x++ {
+				d := dst[j : j+4 : j+4]
 				a := img.Pix[i+3]
 				switch a {
 				case 0:
-					dst[j+0] = 0
-					dst[j+1] = 0
-					dst[j+2] = 0
+					d[0] = 0
+					d[1] = 0
+					d[2] = 0
+					d[3] = a
 				case 0xff:
-					dst[j+0] = img.Pix[i+0]
-					dst[j+1] = img.Pix[i+1]
-					dst[j+2] = img.Pix[i+2]
+					s := img.Pix[i : i+4 : i+4]
+					d[0] = s[0]
+					d[1] = s[1]
+					d[2] = s[2]
+					d[3] = a
 				default:
-					r16 := uint16(img.Pix[i+0])
-					g16 := uint16(img.Pix[i+1])
-					b16 := uint16(img.Pix[i+2])
+					s := img.Pix[i : i+4 : i+4]
+					r16 := uint16(s[0])
+					g16 := uint16(s[1])
+					b16 := uint16(s[2])
 					a16 := uint16(a)
-					dst[j+0] = uint8(r16 * 0xff / a16)
-					dst[j+1] = uint8(g16 * 0xff / a16)
-					dst[j+2] = uint8(b16 * 0xff / a16)
+					d[0] = uint8(r16 * 0xff / a16)
+					d[1] = uint8(g16 * 0xff / a16)
+					d[2] = uint8(b16 * 0xff / a16)
+					d[3] = a
 				}
-				dst[j+3] = a
 				j += 4
 				i += 4
 			}
@@ -88,26 +108,28 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 		for y := y1; y < y2; y++ {
 			i := y*img.Stride + x1*8
 			for x := x1; x < x2; x++ {
-				a := img.Pix[i+6]
+				s := img.Pix[i : i+8 : i+8]
+				d := dst[j : j+4 : j+4]
+				a := s[6]
 				switch a {
 				case 0:
-					dst[j+0] = 0
-					dst[j+1] = 0
-					dst[j+2] = 0
+					d[0] = 0
+					d[1] = 0
+					d[2] = 0
 				case 0xff:
-					dst[j+0] = img.Pix[i+0]
-					dst[j+1] = img.Pix[i+2]
-					dst[j+2] = img.Pix[i+4]
+					d[0] = s[0]
+					d[1] = s[2]
+					d[2] = s[4]
 				default:
-					r32 := uint32(img.Pix[i+0])<<8 | uint32(img.Pix[i+1])
-					g32 := uint32(img.Pix[i+2])<<8 | uint32(img.Pix[i+3])
-					b32 := uint32(img.Pix[i+4])<<8 | uint32(img.Pix[i+5])
-					a32 := uint32(img.Pix[i+6])<<8 | uint32(img.Pix[i+7])
-					dst[j+0] = uint8((r32 * 0xffff / a32) >> 8)
-					dst[j+1] = uint8((g32 * 0xffff / a32) >> 8)
-					dst[j+2] = uint8((b32 * 0xffff / a32) >> 8)
+					r32 := uint32(s[0])<<8 | uint32(s[1])
+					g32 := uint32(s[2])<<8 | uint32(s[3])
+					b32 := uint32(s[4])<<8 | uint32(s[5])
+					a32 := uint32(s[6])<<8 | uint32(s[7])
+					d[0] = uint8((r32 * 0xffff / a32) >> 8)
+					d[1] = uint8((g32 * 0xffff / a32) >> 8)
+					d[2] = uint8((b32 * 0xffff / a32) >> 8)
 				}
-				dst[j+3] = a
+				d[3] = a
 				j += 4
 				i += 8
 			}
@@ -119,10 +141,11 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 			i := y*img.Stride + x1
 			for x := x1; x < x2; x++ {
 				c := img.Pix[i]
-				dst[j+0] = c
-				dst[j+1] = c
-				dst[j+2] = c
-				dst[j+3] = 0xff
+				d := dst[j : j+4 : j+4]
+				d[0] = c
+				d[1] = c
+				d[2] = c
+				d[3] = 0xff
 				j += 4
 				i++
 			}
@@ -134,10 +157,11 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 			i := y*img.Stride + x1*2
 			for x := x1; x < x2; x++ {
 				c := img.Pix[i]
-				dst[j+0] = c
-				dst[j+1] = c
-				dst[j+2] = c
-				dst[j+3] = 0xff
+				d := dst[j : j+4 : j+4]
+				d[0] = c
+				d[1] = c
+				d[2] = c
+				d[3] = 0xff
 				j += 4
 				i += 2
 			}
@@ -149,52 +173,61 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 		x2 += img.Rect.Min.X
 		y1 += img.Rect.Min.Y
 		y2 += img.Rect.Min.Y
+
+		hy := img.Rect.Min.Y / 2
+		hx := img.Rect.Min.X / 2
 		for y := y1; y < y2; y++ {
 			iy := (y-img.Rect.Min.Y)*img.YStride + (x1 - img.Rect.Min.X)
+
+			var yBase int
+			switch img.SubsampleRatio {
+			case image.YCbCrSubsampleRatio444, image.YCbCrSubsampleRatio422:
+				yBase = (y - img.Rect.Min.Y) * img.CStride
+			case image.YCbCrSubsampleRatio420, image.YCbCrSubsampleRatio440:
+				yBase = (y/2 - hy) * img.CStride
+			}
+
 			for x := x1; x < x2; x++ {
 				var ic int
 				switch img.SubsampleRatio {
-				case image.YCbCrSubsampleRatio444:
-					ic = (y-img.Rect.Min.Y)*img.CStride + (x - img.Rect.Min.X)
-				case image.YCbCrSubsampleRatio422:
-					ic = (y-img.Rect.Min.Y)*img.CStride + (x/2 - img.Rect.Min.X/2)
-				case image.YCbCrSubsampleRatio420:
-					ic = (y/2-img.Rect.Min.Y/2)*img.CStride + (x/2 - img.Rect.Min.X/2)
-				case image.YCbCrSubsampleRatio440:
-					ic = (y/2-img.Rect.Min.Y/2)*img.CStride + (x - img.Rect.Min.X)
+				case image.YCbCrSubsampleRatio444, image.YCbCrSubsampleRatio440:
+					ic = yBase + (x - img.Rect.Min.X)
+				case image.YCbCrSubsampleRatio422, image.YCbCrSubsampleRatio420:
+					ic = yBase + (x/2 - hx)
 				default:
 					ic = img.COffset(x, y)
 				}
 
-				yy := int(img.Y[iy])
-				cb := int(img.Cb[ic]) - 128
-				cr := int(img.Cr[ic]) - 128
+				yy1 := int32(img.Y[iy]) * 0x10101
+				cb1 := int32(img.Cb[ic]) - 128
+				cr1 := int32(img.Cr[ic]) - 128
 
-				r := (yy<<16 + 91881*cr + 1<<15) >> 16
-				if r > 0xff {
-					r = 0xff
-				} else if r < 0 {
-					r = 0
+				r := yy1 + 91881*cr1
+				if uint32(r)&0xff000000 == 0 {
+					r >>= 16
+				} else {
+					r = ^(r >> 31)
 				}
 
-				g := (yy<<16 - 22554*cb - 46802*cr + 1<<15) >> 16
-				if g > 0xff {
-					g = 0xff
-				} else if g < 0 {
-					g = 0
+				g := yy1 - 22554*cb1 - 46802*cr1
+				if uint32(g)&0xff000000 == 0 {
+					g >>= 16
+				} else {
+					g = ^(g >> 31)
 				}
 
-				b := (yy<<16 + 116130*cb + 1<<15) >> 16
-				if b > 0xff {
-					b = 0xff
-				} else if b < 0 {
-					b = 0
+				b := yy1 + 116130*cb1
+				if uint32(b)&0xff000000 == 0 {
+					b >>= 16
+				} else {
+					b = ^(b >> 31)
 				}
 
-				dst[j+0] = uint8(r)
-				dst[j+1] = uint8(g)
-				dst[j+2] = uint8(b)
-				dst[j+3] = 0xff
+				d := dst[j : j+4 : j+4]
+				d[0] = uint8(r)
+				d[1] = uint8(g)
+				d[2] = uint8(b)
+				d[3] = 0xff
 
 				iy++
 				j += 4
@@ -207,10 +240,11 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 			i := y*img.Stride + x1
 			for x := x1; x < x2; x++ {
 				c := s.palette[img.Pix[i]]
-				dst[j+0] = c.R
-				dst[j+1] = c.G
-				dst[j+2] = c.B
-				dst[j+3] = c.A
+				d := dst[j : j+4 : j+4]
+				d[0] = c.R
+				d[1] = c.G
+				d[2] = c.B
+				d[3] = c.A
 				j += 4
 				i++
 			}
@@ -226,22 +260,23 @@ func (s *scanner) scan(x1, y1, x2, y2 int, dst []uint8) {
 		for y := y1; y < y2; y++ {
 			for x := x1; x < x2; x++ {
 				r16, g16, b16, a16 := s.image.At(x, y).RGBA()
+				d := dst[j : j+4 : j+4]
 				switch a16 {
 				case 0xffff:
-					dst[j+0] = uint8(r16 >> 8)
-					dst[j+1] = uint8(g16 >> 8)
-					dst[j+2] = uint8(b16 >> 8)
-					dst[j+3] = 0xff
+					d[0] = uint8(r16 >> 8)
+					d[1] = uint8(g16 >> 8)
+					d[2] = uint8(b16 >> 8)
+					d[3] = 0xff
 				case 0:
-					dst[j+0] = 0
-					dst[j+1] = 0
-					dst[j+2] = 0
-					dst[j+3] = 0
+					d[0] = 0
+					d[1] = 0
+					d[2] = 0
+					d[3] = 0
 				default:
-					dst[j+0] = uint8(((r16 * 0xffff) / a16) >> 8)
-					dst[j+1] = uint8(((g16 * 0xffff) / a16) >> 8)
-					dst[j+2] = uint8(((b16 * 0xffff) / a16) >> 8)
-					dst[j+3] = uint8(a16 >> 8)
+					d[0] = uint8(((r16 * 0xffff) / a16) >> 8)
+					d[1] = uint8(((g16 * 0xffff) / a16) >> 8)
+					d[2] = uint8(((b16 * 0xffff) / a16) >> 8)
+					d[3] = uint8(a16 >> 8)
 				}
 				j += 4
 			}