| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456 | package parserimport (	"context"	"fmt"	"strings"	"testing"	"time")// Additional comprehensive performance benchmarksfunc BenchmarkOptimizedParser_ParseStream(b *testing.B) {	logData := strings.Repeat(`127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0"`+"\n", 1000)	config := DefaultParserConfig()	parser := NewOptimizedParser(		config,		NewSimpleUserAgentParser(),		&mockGeoIPService{},	)	b.ResetTimer()	b.ReportAllocs()	for i := 0; i < b.N; i++ {		reader := strings.NewReader(logData)		ctx := context.Background()		_, err := parser.ParseStream(ctx, reader)		if err != nil {			b.Fatal(err)		}	}}func BenchmarkOptimizedParser_LargeScale(b *testing.B) {	lines := make([]string, 10000)	for i := range lines {		lines[i] = fmt.Sprintf(`192.168.%d.%d - - [25/Dec/2023:10:%02d:%02d +0000] "GET /api/data/%d HTTP/1.1" 200 %d "https://example.com/page%d" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/96.%d.%d.%d"`,			i%256, (i/256)%256, (i/60)%60, i%60, i, 1000+i, i%100, i%100, (i*7)%100, i%1000)	}	config := DefaultParserConfig()	config.WorkerCount = 4	config.BatchSize = 1000	parser := NewOptimizedParser(		config,		NewCachedUserAgentParser(NewSimpleUserAgentParser(), 1000),		&mockGeoIPService{},	)	b.ResetTimer()	b.ReportAllocs()	for i := 0; i < b.N; i++ {		ctx := context.Background()		result := parser.ParseLinesWithContext(ctx, lines)		if result.Failed > 0 {			b.Fatalf("parsing failed: %d errors", result.Failed)		}	}}func BenchmarkUserAgentParsing(b *testing.B) {	userAgents := []string{		"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",		"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",		"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",		"Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1",		"Mozilla/5.0 (Android 11; Mobile; rv:95.0) Gecko/95.0 Firefox/95.0",	}	b.Run("Simple", func(b *testing.B) {		parser := NewSimpleUserAgentParser()		b.ResetTimer()		b.ReportAllocs()		for i := 0; i < b.N; i++ {			userAgent := userAgents[i%len(userAgents)]			parser.Parse(userAgent)		}	})	b.Run("Cached", func(b *testing.B) {		parser := NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100)		b.ResetTimer()		b.ReportAllocs()		for i := 0; i < b.N; i++ {			userAgent := userAgents[i%len(userAgents)]			parser.Parse(userAgent)		}	})}func BenchmarkConcurrentParsing(b *testing.B) {	lines := make([]string, 1000)	for i := range lines {		lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)	}	config := DefaultParserConfig()	config.WorkerCount = 8	parser := NewOptimizedParser(		config,		NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),		&mockGeoIPService{},	)	b.ResetTimer()	b.ReportAllocs()	b.RunParallel(func(pb *testing.PB) {		for pb.Next() {			result := parser.ParseLines(lines[:100]) // Smaller batches for parallel test			if result.Failed > 0 {				b.Fatalf("parsing failed: %d errors", result.Failed)			}		}	})}// Memory usage benchmarksfunc BenchmarkMemoryUsage(b *testing.B) {	line := `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`	config := DefaultParserConfig()	parser := NewOptimizedParser(		config,		NewSimpleUserAgentParser(),		&mockGeoIPService{},	)	b.ResetTimer()	b.ReportAllocs()	for i := 0; i < b.N; i++ {		entry, err := parser.ParseLine(line)		if err != nil {			b.Fatal(err)		}		_ = entry // Prevent optimization	}}// Edge case testsfunc TestOptimizedParser_EdgeCases(t *testing.T) {	tests := []struct {		name     string		line     string		wantErr  bool		validate func(*AccessLogEntry) bool	}{		{			name: "IPv6 address",			line: `2001:0db8:85a3:0000:0000:8a2e:0370:7334 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234`,			validate: func(entry *AccessLogEntry) bool {				return entry.IP == "2001:0db8:85a3:0000:0000:8a2e:0370:7334"			},		},		{			name: "Very long path",			line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /` + strings.Repeat("a", 2000) + ` HTTP/1.1" 200 1234`,			validate: func(entry *AccessLogEntry) bool {				return len(entry.Path) == 2001 && strings.HasPrefix(entry.Path, "/a")			},		},		{			name: "Special characters in path",			line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /测试/path%20with%20spaces?param=value&other=测试 HTTP/1.1" 200 1234`,			validate: func(entry *AccessLogEntry) bool {				return strings.Contains(entry.Path, "测试") && strings.Contains(entry.Path, "spaces")			},		},		{			name: "Large response size",			line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /large-file HTTP/1.1" 200 999999999999`,			validate: func(entry *AccessLogEntry) bool {				return entry.BytesSent == 999999999999			},		},		{			name: "HTTP/2 protocol",			line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/2" 200 1234`,			validate: func(entry *AccessLogEntry) bool {				return entry.Protocol == "HTTP/2"			},		},		{			name: "Extreme timing values",			line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /slow HTTP/1.1" 200 1234 "-" "Mozilla/5.0" 30.123456 45.987654`,			validate: func(entry *AccessLogEntry) bool {				return entry.RequestTime == 30.123456 && entry.UpstreamTime != nil && *entry.UpstreamTime == 45.987654			},		},	}	config := DefaultParserConfig()	parser := NewOptimizedParser(		config,		NewSimpleUserAgentParser(),		&mockGeoIPService{},	)	for _, tt := range tests {		t.Run(tt.name, func(t *testing.T) {			entry, err := parser.ParseLine(tt.line)			if tt.wantErr {				if err == nil {					t.Error("expected error but got none")				}				return			}			if err != nil {				t.Errorf("unexpected error: %v", err)				return			}			if entry == nil {				t.Error("expected entry but got nil")				return			}			if tt.validate != nil && !tt.validate(entry) {				t.Errorf("entry validation failed: %+v", entry)			}		})	}}// Concurrent safety testfunc TestOptimizedParser_ConcurrentSafety(t *testing.T) {	config := DefaultParserConfig()	parser := NewOptimizedParser(		config,		NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),		&mockGeoIPService{},	)	lines := make([]string, 100)	for i := range lines {		lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)	}	// Start multiple goroutines parsing simultaneously	const numGoroutines = 10	results := make(chan *ParseResult, numGoroutines)	for i := 0; i < numGoroutines; i++ {		go func() {			result := parser.ParseLines(lines)			results <- result		}()	}	// Collect results	for i := 0; i < numGoroutines; i++ {		result := <-results		if result.Failed > 0 {			t.Errorf("parsing failed in goroutine: %d errors", result.Failed)		}		if result.Succeeded != 100 {			t.Errorf("expected 100 successful parses, got %d", result.Succeeded)		}	}}// Cache performance testsfunc TestCachedUserAgentParser_Performance(t *testing.T) {	baseParser := NewSimpleUserAgentParser()	cachedParser := NewCachedUserAgentParser(baseParser, 10)	userAgent := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/96.0.4664.110"	// Fill cache	for i := 0; i < 5; i++ {		uaVariant := fmt.Sprintf("%s.%d", userAgent, i)		cachedParser.Parse(uaVariant)	}	// Test cache hits	start := time.Now()	for i := 0; i < 1000; i++ {		uaVariant := fmt.Sprintf("%s.%d", userAgent, i%5)		cachedParser.Parse(uaVariant)	}	cacheTime := time.Since(start)	// Test without cache	start = time.Now()	for i := 0; i < 1000; i++ {		uaVariant := fmt.Sprintf("%s.%d", userAgent, i%5)		baseParser.Parse(uaVariant)	}	baseTime := time.Since(start)	// Cache should be significantly faster	if cacheTime >= baseTime {		t.Logf("Cache time: %v, Base time: %v", cacheTime, baseTime)		t.Error("cached parser should be faster than base parser for repeated queries")	}	size, _ := cachedParser.GetCacheStats()	if size != 5 {		t.Errorf("expected cache size 5, got %d", size)	}}// Stress test with malformed datafunc TestOptimizedParser_StressTest(t *testing.T) {	config := DefaultParserConfig()	config.StrictMode = false	parser := NewOptimizedParser(		config,		NewSimpleUserAgentParser(),		&mockGeoIPService{},	)	// Generate mix of valid and invalid log lines	lines := make([]string, 1000)	for i := range lines {		switch i % 10 {		case 0:			lines[i] = "" // Empty line		case 1:			lines[i] = "totally invalid log line" // Completely invalid		case 2:			lines[i] = `incomplete log line - - [25/Dec/2023:10:00:00` // Incomplete		case 3:			lines[i] = `127.0.0.1 - - [invalid-date] "GET / HTTP/1.1" 200 1234` // Invalid date		default:			// Valid lines with variations			lines[i] = fmt.Sprintf(`192.168.%d.%d - - [25/Dec/2023:10:%02d:%02d +0000] "GET /test%d HTTP/1.1" %d %d "-" "Mozilla/5.0"`,				i%256, (i/256)%256, (i/60)%60, i%60, i, 200+(i%100), 1000+i)		}	}	result := parser.ParseLines(lines)	// Should handle all lines gracefully	if result.Processed != len(lines) {		t.Errorf("processed count mismatch: got %d, want %d", result.Processed, len(lines))	}	// Should have some failures for malformed lines	if result.Failed == 0 {		t.Error("expected some parsing failures for malformed lines")	}	// Should have majority successes	if float64(result.Succeeded)/float64(result.Processed) < 0.6 {		t.Errorf("success rate too low: %d/%d = %.2f%%", result.Succeeded, result.Processed, 100.0*float64(result.Succeeded)/float64(result.Processed))	}}// Test resource cleanupfunc TestOptimizedParser_ResourceCleanup(t *testing.T) {	config := DefaultParserConfig()	config.WorkerCount = 4	parser := NewOptimizedParser(		config,		NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),		&mockGeoIPService{},	)	// Create many parsing operations to test resource management	for i := 0; i < 10; i++ {		lines := make([]string, 100)		for j := range lines {			lines[j] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, j%255+1, j)		}		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)		result := parser.ParseLinesWithContext(ctx, lines)		cancel()		if result.Failed > 0 {			t.Errorf("iteration %d: unexpected parsing failures: %d", i, result.Failed)		}	}}// Performance comparison between different configurationsfunc BenchmarkParserConfigurations(b *testing.B) {	lines := make([]string, 1000)	for i := range lines {		lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)	}	configs := []struct {		name   string		config *Config	}{		{			name: "Single Worker",			config: &Config{				WorkerCount: 1,				BatchSize:   100,				BufferSize:  1000,				EnableGeoIP: false,				StrictMode:  false,			},		},		{			name: "Multiple Workers",			config: &Config{				WorkerCount: 4,				BatchSize:   250,				BufferSize:  2000,				EnableGeoIP: false,				StrictMode:  false,			},		},		{			name: "With GeoIP",			config: &Config{				WorkerCount: 4,				BatchSize:   250,				BufferSize:  2000,				EnableGeoIP: true,				StrictMode:  false,			},		},		{			name: "Strict Mode",			config: &Config{				WorkerCount: 4,				BatchSize:   250,				BufferSize:  2000,				EnableGeoIP: false,				StrictMode:  true,			},		},	}	for _, cfg := range configs {		b.Run(cfg.name, func(b *testing.B) {			parser := NewOptimizedParser(				cfg.config,				NewCachedUserAgentParser(NewSimpleUserAgentParser(), 100),				&mockGeoIPService{},			)			b.ResetTimer()			b.ReportAllocs()			for i := 0; i < b.N; i++ {				result := parser.ParseLines(lines)				if result.Failed > len(lines)/2 { // Allow some failures in strict mode					b.Fatalf("too many parsing failures: %d", result.Failed)				}			}		})	}}
 |