1
0

enhanced_parser_test.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. package parser
  2. import (
  3. "context"
  4. "fmt"
  5. "strings"
  6. "testing"
  7. "time"
  8. )
  9. // Additional comprehensive performance benchmarks
  10. func BenchmarkOptimizedParser_ParseStream(b *testing.B) {
  11. logData := strings.Repeat(`127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0"`+"\n", 1000)
  12. config := DefaultParserConfig()
  13. parser := NewOptimizedParser(
  14. config,
  15. NewSimpleUserAgentParser(),
  16. &mockGeoIPService{},
  17. )
  18. b.ResetTimer()
  19. b.ReportAllocs()
  20. for i := 0; i < b.N; i++ {
  21. reader := strings.NewReader(logData)
  22. ctx := context.Background()
  23. _, err := parser.ParseStream(ctx, reader)
  24. if err != nil {
  25. b.Fatal(err)
  26. }
  27. }
  28. }
  29. func BenchmarkOptimizedParser_LargeScale(b *testing.B) {
  30. lines := make([]string, 10000)
  31. for i := range lines {
  32. lines[i] = fmt.Sprintf(`192.168.%d.%d - - [25/Dec/2023:10:%02d:%02d +0000] "GET /api/data/%d HTTP/1.1" 200 %d "https://example.com/page%d" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/96.%d.%d.%d"`,
  33. i%256, (i/256)%256, (i/60)%60, i%60, i, 1000+i, i%100, i%100, (i*7)%100, i%1000)
  34. }
  35. config := DefaultParserConfig()
  36. config.WorkerCount = 4
  37. config.BatchSize = 1000
  38. parser := NewOptimizedParser(
  39. config,
  40. NewCachedUserAgentParser(NewSimpleUserAgentParser(), 1000),
  41. &mockGeoIPService{},
  42. )
  43. b.ResetTimer()
  44. b.ReportAllocs()
  45. for i := 0; i < b.N; i++ {
  46. ctx := context.Background()
  47. result := parser.ParseLinesWithContext(ctx, lines)
  48. if result.Failed > 0 {
  49. b.Fatalf("parsing failed: %d errors", result.Failed)
  50. }
  51. }
  52. }
  53. func BenchmarkUserAgentParsing(b *testing.B) {
  54. userAgents := []string{
  55. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
  56. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
  57. "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
  58. "Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1",
  59. "Mozilla/5.0 (Android 11; Mobile; rv:95.0) Gecko/95.0 Firefox/95.0",
  60. }
  61. b.Run("Simple", func(b *testing.B) {
  62. parser := NewSimpleUserAgentParser()
  63. b.ResetTimer()
  64. b.ReportAllocs()
  65. for i := 0; i < b.N; i++ {
  66. userAgent := userAgents[i%len(userAgents)]
  67. parser.Parse(userAgent)
  68. }
  69. })
  70. b.Run("Cached", func(b *testing.B) {
  71. parser := NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100)
  72. b.ResetTimer()
  73. b.ReportAllocs()
  74. for i := 0; i < b.N; i++ {
  75. userAgent := userAgents[i%len(userAgents)]
  76. parser.Parse(userAgent)
  77. }
  78. })
  79. }
  80. func BenchmarkConcurrentParsing(b *testing.B) {
  81. lines := make([]string, 1000)
  82. for i := range lines {
  83. lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)
  84. }
  85. config := DefaultParserConfig()
  86. config.WorkerCount = 8
  87. parser := NewOptimizedParser(
  88. config,
  89. NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),
  90. &mockGeoIPService{},
  91. )
  92. b.ResetTimer()
  93. b.ReportAllocs()
  94. b.RunParallel(func(pb *testing.PB) {
  95. for pb.Next() {
  96. result := parser.ParseLines(lines[:100]) // Smaller batches for parallel test
  97. if result.Failed > 0 {
  98. b.Fatalf("parsing failed: %d errors", result.Failed)
  99. }
  100. }
  101. })
  102. }
  103. // Memory usage benchmarks
  104. func BenchmarkMemoryUsage(b *testing.B) {
  105. line := `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`
  106. config := DefaultParserConfig()
  107. parser := NewOptimizedParser(
  108. config,
  109. NewSimpleUserAgentParser(),
  110. &mockGeoIPService{},
  111. )
  112. b.ResetTimer()
  113. b.ReportAllocs()
  114. for i := 0; i < b.N; i++ {
  115. entry, err := parser.ParseLine(line)
  116. if err != nil {
  117. b.Fatal(err)
  118. }
  119. _ = entry // Prevent optimization
  120. }
  121. }
  122. // Edge case tests
  123. func TestOptimizedParser_EdgeCases(t *testing.T) {
  124. tests := []struct {
  125. name string
  126. line string
  127. wantErr bool
  128. validate func(*AccessLogEntry) bool
  129. }{
  130. {
  131. name: "IPv6 address",
  132. line: `2001:0db8:85a3:0000:0000:8a2e:0370:7334 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234`,
  133. validate: func(entry *AccessLogEntry) bool {
  134. return entry.IP == "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
  135. },
  136. },
  137. {
  138. name: "Very long path",
  139. line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /` + strings.Repeat("a", 2000) + ` HTTP/1.1" 200 1234`,
  140. validate: func(entry *AccessLogEntry) bool {
  141. return len(entry.Path) == 2001 && strings.HasPrefix(entry.Path, "/a")
  142. },
  143. },
  144. {
  145. name: "Special characters in path",
  146. line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /测试/path%20with%20spaces?param=value&other=测试 HTTP/1.1" 200 1234`,
  147. validate: func(entry *AccessLogEntry) bool {
  148. return strings.Contains(entry.Path, "测试") && strings.Contains(entry.Path, "spaces")
  149. },
  150. },
  151. {
  152. name: "Large response size",
  153. line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /large-file HTTP/1.1" 200 999999999999`,
  154. validate: func(entry *AccessLogEntry) bool {
  155. return entry.BytesSent == 999999999999
  156. },
  157. },
  158. {
  159. name: "HTTP/2 protocol",
  160. line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/2" 200 1234`,
  161. validate: func(entry *AccessLogEntry) bool {
  162. return entry.Protocol == "HTTP/2"
  163. },
  164. },
  165. {
  166. name: "Extreme timing values",
  167. line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /slow HTTP/1.1" 200 1234 "-" "Mozilla/5.0" 30.123456 45.987654`,
  168. validate: func(entry *AccessLogEntry) bool {
  169. return entry.RequestTime == 30.123456 && entry.UpstreamTime != nil && *entry.UpstreamTime == 45.987654
  170. },
  171. },
  172. }
  173. config := DefaultParserConfig()
  174. parser := NewOptimizedParser(
  175. config,
  176. NewSimpleUserAgentParser(),
  177. &mockGeoIPService{},
  178. )
  179. for _, tt := range tests {
  180. t.Run(tt.name, func(t *testing.T) {
  181. entry, err := parser.ParseLine(tt.line)
  182. if tt.wantErr {
  183. if err == nil {
  184. t.Error("expected error but got none")
  185. }
  186. return
  187. }
  188. if err != nil {
  189. t.Errorf("unexpected error: %v", err)
  190. return
  191. }
  192. if entry == nil {
  193. t.Error("expected entry but got nil")
  194. return
  195. }
  196. if tt.validate != nil && !tt.validate(entry) {
  197. t.Errorf("entry validation failed: %+v", entry)
  198. }
  199. })
  200. }
  201. }
  202. // Concurrent safety test
  203. func TestOptimizedParser_ConcurrentSafety(t *testing.T) {
  204. config := DefaultParserConfig()
  205. parser := NewOptimizedParser(
  206. config,
  207. NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),
  208. &mockGeoIPService{},
  209. )
  210. lines := make([]string, 100)
  211. for i := range lines {
  212. lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)
  213. }
  214. // Start multiple goroutines parsing simultaneously
  215. const numGoroutines = 10
  216. results := make(chan *ParseResult, numGoroutines)
  217. for i := 0; i < numGoroutines; i++ {
  218. go func() {
  219. result := parser.ParseLines(lines)
  220. results <- result
  221. }()
  222. }
  223. // Collect results
  224. for i := 0; i < numGoroutines; i++ {
  225. result := <-results
  226. if result.Failed > 0 {
  227. t.Errorf("parsing failed in goroutine: %d errors", result.Failed)
  228. }
  229. if result.Succeeded != 100 {
  230. t.Errorf("expected 100 successful parses, got %d", result.Succeeded)
  231. }
  232. }
  233. }
  234. // Cache performance tests
  235. func TestCachedUserAgentParser_Performance(t *testing.T) {
  236. baseParser := NewSimpleUserAgentParser()
  237. cachedParser := NewCachedUserAgentParser(baseParser, 10)
  238. userAgent := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/96.0.4664.110"
  239. // Fill cache
  240. for i := 0; i < 5; i++ {
  241. uaVariant := fmt.Sprintf("%s.%d", userAgent, i)
  242. cachedParser.Parse(uaVariant)
  243. }
  244. // Test cache hits
  245. start := time.Now()
  246. for i := 0; i < 1000; i++ {
  247. uaVariant := fmt.Sprintf("%s.%d", userAgent, i%5)
  248. cachedParser.Parse(uaVariant)
  249. }
  250. cacheTime := time.Since(start)
  251. // Test without cache
  252. start = time.Now()
  253. for i := 0; i < 1000; i++ {
  254. uaVariant := fmt.Sprintf("%s.%d", userAgent, i%5)
  255. baseParser.Parse(uaVariant)
  256. }
  257. baseTime := time.Since(start)
  258. // Cache should be significantly faster
  259. if cacheTime >= baseTime {
  260. t.Logf("Cache time: %v, Base time: %v", cacheTime, baseTime)
  261. t.Error("cached parser should be faster than base parser for repeated queries")
  262. }
  263. size, _ := cachedParser.GetCacheStats()
  264. if size != 5 {
  265. t.Errorf("expected cache size 5, got %d", size)
  266. }
  267. }
  268. // Stress test with malformed data
  269. func TestOptimizedParser_StressTest(t *testing.T) {
  270. config := DefaultParserConfig()
  271. config.StrictMode = false
  272. parser := NewOptimizedParser(
  273. config,
  274. NewSimpleUserAgentParser(),
  275. &mockGeoIPService{},
  276. )
  277. // Generate mix of valid and invalid log lines
  278. lines := make([]string, 1000)
  279. for i := range lines {
  280. switch i % 10 {
  281. case 0:
  282. lines[i] = "" // Empty line
  283. case 1:
  284. lines[i] = "totally invalid log line" // Completely invalid
  285. case 2:
  286. lines[i] = `incomplete log line - - [25/Dec/2023:10:00:00` // Incomplete
  287. case 3:
  288. lines[i] = `127.0.0.1 - - [invalid-date] "GET / HTTP/1.1" 200 1234` // Invalid date
  289. default:
  290. // Valid lines with variations
  291. lines[i] = fmt.Sprintf(`192.168.%d.%d - - [25/Dec/2023:10:%02d:%02d +0000] "GET /test%d HTTP/1.1" %d %d "-" "Mozilla/5.0"`,
  292. i%256, (i/256)%256, (i/60)%60, i%60, i, 200+(i%100), 1000+i)
  293. }
  294. }
  295. result := parser.ParseLines(lines)
  296. // Should handle all lines gracefully
  297. if result.Processed != len(lines) {
  298. t.Errorf("processed count mismatch: got %d, want %d", result.Processed, len(lines))
  299. }
  300. // Should have some failures for malformed lines
  301. if result.Failed == 0 {
  302. t.Error("expected some parsing failures for malformed lines")
  303. }
  304. // Should have majority successes
  305. if float64(result.Succeeded)/float64(result.Processed) < 0.6 {
  306. t.Errorf("success rate too low: %d/%d = %.2f%%", result.Succeeded, result.Processed, 100.0*float64(result.Succeeded)/float64(result.Processed))
  307. }
  308. }
  309. // Test resource cleanup
  310. func TestOptimizedParser_ResourceCleanup(t *testing.T) {
  311. config := DefaultParserConfig()
  312. config.WorkerCount = 4
  313. parser := NewOptimizedParser(
  314. config,
  315. NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),
  316. &mockGeoIPService{},
  317. )
  318. // Create many parsing operations to test resource management
  319. for i := 0; i < 10; i++ {
  320. lines := make([]string, 100)
  321. for j := range lines {
  322. lines[j] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, j%255+1, j)
  323. }
  324. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
  325. result := parser.ParseLinesWithContext(ctx, lines)
  326. cancel()
  327. if result.Failed > 0 {
  328. t.Errorf("iteration %d: unexpected parsing failures: %d", i, result.Failed)
  329. }
  330. }
  331. }
  332. // Performance comparison between different configurations
  333. func BenchmarkParserConfigurations(b *testing.B) {
  334. lines := make([]string, 1000)
  335. for i := range lines {
  336. lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)
  337. }
  338. configs := []struct {
  339. name string
  340. config *Config
  341. }{
  342. {
  343. name: "Single Worker",
  344. config: &Config{
  345. WorkerCount: 1,
  346. BatchSize: 100,
  347. BufferSize: 1000,
  348. EnableGeoIP: false,
  349. StrictMode: false,
  350. },
  351. },
  352. {
  353. name: "Multiple Workers",
  354. config: &Config{
  355. WorkerCount: 4,
  356. BatchSize: 250,
  357. BufferSize: 2000,
  358. EnableGeoIP: false,
  359. StrictMode: false,
  360. },
  361. },
  362. {
  363. name: "With GeoIP",
  364. config: &Config{
  365. WorkerCount: 4,
  366. BatchSize: 250,
  367. BufferSize: 2000,
  368. EnableGeoIP: true,
  369. StrictMode: false,
  370. },
  371. },
  372. {
  373. name: "Strict Mode",
  374. config: &Config{
  375. WorkerCount: 4,
  376. BatchSize: 250,
  377. BufferSize: 2000,
  378. EnableGeoIP: false,
  379. StrictMode: true,
  380. },
  381. },
  382. }
  383. for _, cfg := range configs {
  384. b.Run(cfg.name, func(b *testing.B) {
  385. parser := NewOptimizedParser(
  386. cfg.config,
  387. NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),
  388. &mockGeoIPService{},
  389. )
  390. b.ResetTimer()
  391. b.ReportAllocs()
  392. for i := 0; i < b.N; i++ {
  393. result := parser.ParseLines(lines)
  394. if result.Failed > len(lines)/2 { // Allow some failures in strict mode
  395. b.Fatalf("too many parsing failures: %d", result.Failed)
  396. }
  397. }
  398. })
  399. }
  400. }