simd_optimizations_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. package parser
  2. import (
  3. "context"
  4. "strings"
  5. "testing"
  6. )
  7. // BenchmarkSIMDOptimizations tests SIMD-optimized string operations
  8. func BenchmarkSIMDOptimizations(b *testing.B) {
  9. matcher := NewSIMDStringMatcher()
  10. testData := []byte(`127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0"`)
  11. benchmarks := []struct {
  12. name string
  13. fn func() interface{}
  14. }{
  15. {
  16. "SIMD_FindNextSpace",
  17. func() interface{} {
  18. return matcher.FindNextSpace(testData, 0)
  19. },
  20. },
  21. {
  22. "SIMD_FindNextQuote",
  23. func() interface{} {
  24. return matcher.FindNextQuote(testData, 0)
  25. },
  26. },
  27. {
  28. "SIMD_FindNextDigit",
  29. func() interface{} {
  30. return matcher.FindNextDigit(testData, 0)
  31. },
  32. },
  33. {
  34. "SIMD_ExtractIPAddress",
  35. func() interface{} {
  36. ip, _ := matcher.ExtractIPAddress(testData, 0)
  37. return ip
  38. },
  39. },
  40. {
  41. "SIMD_ExtractTimestamp",
  42. func() interface{} {
  43. timestamp, _ := matcher.ExtractTimestamp(testData, 0)
  44. return timestamp
  45. },
  46. },
  47. {
  48. "SIMD_ExtractQuotedString",
  49. func() interface{} {
  50. str, _ := matcher.ExtractQuotedString(testData, 50)
  51. return str
  52. },
  53. },
  54. {
  55. "SIMD_ExtractStatusCode",
  56. func() interface{} {
  57. status, _ := matcher.ExtractStatusCode(testData, 80)
  58. return status
  59. },
  60. },
  61. {
  62. "SIMD_ParseCompleteLine",
  63. func() interface{} {
  64. return matcher.ParseLogLineSIMD(testData)
  65. },
  66. },
  67. }
  68. for _, bench := range benchmarks {
  69. b.Run(bench.name, func(b *testing.B) {
  70. b.ResetTimer()
  71. b.ReportAllocs()
  72. for i := 0; i < b.N; i++ {
  73. result := bench.fn()
  74. _ = result // Avoid optimization
  75. }
  76. })
  77. }
  78. }
  79. // BenchmarkSIMDvsRegularParsing compares SIMD vs regular parsing performance
  80. func BenchmarkSIMDvsRegularParsing(b *testing.B) {
  81. // Setup test data
  82. logLines := []string{
  83. `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0"`,
  84. `192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "https://example.com" "curl/7.68.0"`,
  85. `10.0.0.1 - - [25/Dec/2023:10:00:02 +0000] "GET /style.css HTTP/1.1" 200 890 "https://example.com" "Mozilla/5.0"`,
  86. `203.0.113.195 - - [25/Dec/2023:10:00:03 +0000] "DELETE /api/users/123 HTTP/1.1" 204 0 "-" "Postman/7.36.0"`,
  87. `172.16.0.50 - - [25/Dec/2023:10:00:04 +0000] "PUT /api/config HTTP/1.1" 200 456 "https://admin.example.com" "Chrome/91.0"`,
  88. }
  89. // Convert to byte slices for SIMD processing
  90. logBytes := make([][]byte, len(logLines))
  91. for i, line := range logLines {
  92. logBytes[i] = []byte(line)
  93. }
  94. // Setup parsers
  95. config := DefaultParserConfig()
  96. config.MaxLineLength = 16 * 1024
  97. regularParser := NewOptimizedParser(
  98. config,
  99. NewSimpleUserAgentParser(),
  100. &mockGeoIPService{},
  101. )
  102. simdParser := NewOptimizedLogLineParser()
  103. b.Run("Regular_SingleLine", func(b *testing.B) {
  104. b.ResetTimer()
  105. b.ReportAllocs()
  106. for i := 0; i < b.N; i++ {
  107. line := logLines[i%len(logLines)]
  108. _, _ = regularParser.ParseLine(line)
  109. }
  110. })
  111. b.Run("SIMD_SingleLine", func(b *testing.B) {
  112. b.ResetTimer()
  113. b.ReportAllocs()
  114. for i := 0; i < b.N; i++ {
  115. lineBytes := logBytes[i%len(logBytes)]
  116. _ = simdParser.ParseLine(lineBytes)
  117. }
  118. })
  119. b.Run("Regular_BatchLines", func(b *testing.B) {
  120. b.ResetTimer()
  121. b.ReportAllocs()
  122. for i := 0; i < b.N; i++ {
  123. ctx := context.Background()
  124. _ = regularParser.ParseLinesWithContext(ctx, logLines)
  125. }
  126. })
  127. b.Run("SIMD_BatchLines", func(b *testing.B) {
  128. b.ResetTimer()
  129. b.ReportAllocs()
  130. for i := 0; i < b.N; i++ {
  131. _ = simdParser.ParseLines(logBytes)
  132. }
  133. })
  134. }
  135. // BenchmarkSIMDCharacterSearch compares SIMD vs standard character search
  136. func BenchmarkSIMDCharacterSearch(b *testing.B) {
  137. matcher := NewSIMDStringMatcher()
  138. testData := []byte(`127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`)
  139. b.Run("SIMD_SpaceSearch", func(b *testing.B) {
  140. b.ResetTimer()
  141. b.ReportAllocs()
  142. for i := 0; i < b.N; i++ {
  143. _ = matcher.FindNextSpace(testData, 0)
  144. }
  145. })
  146. b.Run("Standard_SpaceSearch", func(b *testing.B) {
  147. b.ResetTimer()
  148. b.ReportAllocs()
  149. for i := 0; i < b.N; i++ {
  150. // Standard byte-by-byte search
  151. for j := 0; j < len(testData); j++ {
  152. if testData[j] == ' ' {
  153. _ = j
  154. break
  155. }
  156. }
  157. }
  158. })
  159. b.Run("SIMD_QuoteSearch", func(b *testing.B) {
  160. b.ResetTimer()
  161. b.ReportAllocs()
  162. for i := 0; i < b.N; i++ {
  163. _ = matcher.FindNextQuote(testData, 0)
  164. }
  165. })
  166. b.Run("Standard_QuoteSearch", func(b *testing.B) {
  167. b.ResetTimer()
  168. b.ReportAllocs()
  169. for i := 0; i < b.N; i++ {
  170. // Standard byte-by-byte search
  171. for j := 0; j < len(testData); j++ {
  172. if testData[j] == '"' {
  173. _ = j
  174. break
  175. }
  176. }
  177. }
  178. })
  179. }
  180. // BenchmarkSIMDStringExtraction compares SIMD vs regex string extraction
  181. func BenchmarkSIMDStringExtraction(b *testing.B) {
  182. matcher := NewSIMDStringMatcher()
  183. testData := []byte(`127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234`)
  184. b.Run("SIMD_IPExtraction", func(b *testing.B) {
  185. b.ResetTimer()
  186. b.ReportAllocs()
  187. for i := 0; i < b.N; i++ {
  188. ip, _ := matcher.ExtractIPAddress(testData, 0)
  189. _ = ip
  190. }
  191. })
  192. b.Run("SIMD_TimestampExtraction", func(b *testing.B) {
  193. b.ResetTimer()
  194. b.ReportAllocs()
  195. for i := 0; i < b.N; i++ {
  196. timestamp, _ := matcher.ExtractTimestamp(testData, 0)
  197. _ = timestamp
  198. }
  199. })
  200. b.Run("SIMD_StatusExtraction", func(b *testing.B) {
  201. b.ResetTimer()
  202. b.ReportAllocs()
  203. for i := 0; i < b.N; i++ {
  204. status, _ := matcher.ExtractStatusCode(testData, 0)
  205. _ = status
  206. }
  207. })
  208. }
  209. // BenchmarkObjectPooling tests the performance impact of object pooling
  210. func BenchmarkObjectPooling(b *testing.B) {
  211. pool := NewAccessLogEntryPool()
  212. b.Run("WithPooling", func(b *testing.B) {
  213. b.ResetTimer()
  214. b.ReportAllocs()
  215. for i := 0; i < b.N; i++ {
  216. entry := pool.Get()
  217. entry.IP = "127.0.0.1"
  218. entry.Status = 200
  219. pool.Put(entry)
  220. }
  221. })
  222. b.Run("WithoutPooling", func(b *testing.B) {
  223. b.ResetTimer()
  224. b.ReportAllocs()
  225. for i := 0; i < b.N; i++ {
  226. entry := &AccessLogEntry{}
  227. entry.IP = "127.0.0.1"
  228. entry.Status = 200
  229. // No pooling - let GC handle
  230. }
  231. })
  232. }
  233. // BenchmarkSIMDScaleTest tests SIMD performance at different scales
  234. func BenchmarkSIMDScaleTest(b *testing.B) {
  235. simdParser := NewOptimizedLogLineParser()
  236. scales := []struct {
  237. name string
  238. lines int
  239. }{
  240. {"Small_100", 100},
  241. {"Medium_1K", 1000},
  242. {"Large_10K", 10000},
  243. {"XLarge_50K", 50000},
  244. }
  245. for _, scale := range scales {
  246. // Generate test data
  247. testLines := make([][]byte, scale.lines)
  248. for i := 0; i < scale.lines; i++ {
  249. line := generateTestLogLine(i)
  250. testLines[i] = []byte(line)
  251. }
  252. b.Run("SIMD_"+scale.name, func(b *testing.B) {
  253. b.ResetTimer()
  254. b.ReportAllocs()
  255. for i := 0; i < b.N; i++ {
  256. entries := simdParser.ParseLines(testLines)
  257. b.ReportMetric(float64(len(entries)), "parsed_lines")
  258. }
  259. })
  260. }
  261. }
  262. // generateTestLogLine generates a test log line for benchmarking
  263. func generateTestLogLine(index int) string {
  264. ip := "192.168.1." + string(rune('1' + (index % 254)))
  265. method := []string{"GET", "POST", "PUT", "DELETE"}[index%4]
  266. path := []string{"/", "/api/data", "/style.css", "/script.js"}[index%4]
  267. status := []int{200, 404, 500, 301}[index%4]
  268. return strings.Join([]string{
  269. ip, "- - [25/Dec/2023:10:00:00 +0000]",
  270. `"` + method + " " + path + ` HTTP/1.1"`,
  271. string(rune('0' + status/100)) + string(rune('0' + (status/10)%10)) + string(rune('0' + status%10)),
  272. "1234",
  273. `"https://example.com"`,
  274. `"Mozilla/5.0"`,
  275. }, " ")
  276. }
  277. // TestSIMDCorrectnessValidation validates SIMD operations produce correct results
  278. func TestSIMDCorrectnessValidation(t *testing.T) {
  279. matcher := NewSIMDStringMatcher()
  280. simdParser := NewOptimizedLogLineParser()
  281. testCases := []struct {
  282. name string
  283. line string
  284. expectedIP string
  285. expectedStatus int
  286. }{
  287. {
  288. "Standard_Log",
  289. `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0"`,
  290. "127.0.0.1",
  291. 200,
  292. },
  293. {
  294. "Complex_IP",
  295. `192.168.100.255 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "https://example.com" "curl/7.68.0"`,
  296. "192.168.100.255",
  297. 201,
  298. },
  299. {
  300. "Error_Status",
  301. `10.0.0.1 - - [25/Dec/2023:10:00:02 +0000] "GET /nonexistent HTTP/1.1" 404 0 "-" "Bot/1.0"`,
  302. "10.0.0.1",
  303. 404,
  304. },
  305. }
  306. for _, tc := range testCases {
  307. t.Run(tc.name, func(t *testing.T) {
  308. lineBytes := []byte(tc.line)
  309. // Test IP extraction
  310. ip, _ := matcher.ExtractIPAddress(lineBytes, 0)
  311. if ip != tc.expectedIP {
  312. t.Errorf("IP extraction failed: got %s, want %s", ip, tc.expectedIP)
  313. }
  314. // Test status extraction
  315. status, _ := matcher.ExtractStatusCode(lineBytes, 0)
  316. if status != tc.expectedStatus {
  317. t.Errorf("Status extraction failed: got %d, want %d", status, tc.expectedStatus)
  318. }
  319. // Test complete parsing
  320. entry := simdParser.ParseLine(lineBytes)
  321. if entry == nil {
  322. t.Fatal("SIMD parsing returned nil")
  323. }
  324. if entry.IP != tc.expectedIP {
  325. t.Errorf("Complete parsing IP failed: got %s, want %s", entry.IP, tc.expectedIP)
  326. }
  327. if entry.Status != tc.expectedStatus {
  328. t.Errorf("Complete parsing status failed: got %d, want %d", entry.Status, tc.expectedStatus)
  329. }
  330. })
  331. }
  332. }
  333. // TestSIMDLookupTables validates lookup table correctness
  334. func TestSIMDLookupTables(t *testing.T) {
  335. matcher := NewSIMDStringMatcher()
  336. // Test space lookup
  337. spaces := []byte{' ', '\t', '\n', '\r'}
  338. for _, c := range spaces {
  339. if !matcher.spaceLookup[c] {
  340. t.Errorf("Space lookup failed for character %c (%d)", c, c)
  341. }
  342. }
  343. // Test digit lookup
  344. for c := byte('0'); c <= '9'; c++ {
  345. if !matcher.digitLookup[c] {
  346. t.Errorf("Digit lookup failed for character %c", c)
  347. }
  348. }
  349. // Test quote lookup
  350. quotes := []byte{'"', '\''}
  351. for _, c := range quotes {
  352. if !matcher.quoteLookup[c] {
  353. t.Errorf("Quote lookup failed for character %c", c)
  354. }
  355. }
  356. // Test non-special characters
  357. if matcher.spaceLookup['a'] {
  358. t.Error("Space lookup false positive for 'a'")
  359. }
  360. if matcher.digitLookup['a'] {
  361. t.Error("Digit lookup false positive for 'a'")
  362. }
  363. }
  364. // TestObjectPoolEfficiency validates object pool functionality
  365. func TestObjectPoolEfficiency(t *testing.T) {
  366. pool := NewAccessLogEntryPool()
  367. // Get entry from pool
  368. entry1 := pool.Get()
  369. if entry1 == nil {
  370. t.Fatal("Pool returned nil entry")
  371. }
  372. // Modify entry
  373. entry1.IP = "127.0.0.1"
  374. entry1.Status = 200
  375. // Return to pool
  376. pool.Put(entry1)
  377. // Get another entry (should be reused)
  378. entry2 := pool.Get()
  379. if entry2 == nil {
  380. t.Fatal("Pool returned nil after put")
  381. }
  382. // Should be reset
  383. if entry2.IP != "" {
  384. t.Error("Pool entry not properly reset")
  385. }
  386. if entry2.Status != 0 {
  387. t.Error("Pool entry status not properly reset")
  388. }
  389. }