123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422 |
- package parser
- import (
- "context"
- "fmt"
- "strings"
- "testing"
- "time"
- )
- // Mock implementations for testing
- type mockGeoIPService struct{}
- func (m *mockGeoIPService) Search(ip string) (*GeoLocation, error) {
- if ip == "127.0.0.1" || ip == "::1" {
- return &GeoLocation{
- RegionCode: "US",
- Province: "California",
- City: "San Francisco",
- }, nil
- }
- return &GeoLocation{
- RegionCode: "US",
- Province: "Unknown",
- City: "Unknown",
- }, nil
- }
- func TestParser_ParseLine(t *testing.T) {
- tests := []struct {
- name string
- line string
- wantErr bool
- validate func(*AccessLogEntry) bool
- }{
- {
- name: "combined log format",
- line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`,
- validate: func(entry *AccessLogEntry) bool {
- return entry.IP == "127.0.0.1" &&
- entry.Method == "GET" &&
- entry.Path == "/index.html" &&
- entry.Status == 200 &&
- entry.BytesSent == 1234
- },
- },
- {
- name: "with request and upstream time",
- line: `192.168.1.1 - - [25/Dec/2023:10:00:00 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0" 0.123 0.045`,
- validate: func(entry *AccessLogEntry) bool {
- return entry.IP == "192.168.1.1" &&
- entry.Method == "POST" &&
- entry.Status == 201 &&
- entry.RequestTime == 0.123 &&
- entry.UpstreamTime != nil &&
- *entry.UpstreamTime == 0.045
- },
- },
- {
- name: "empty line",
- line: "",
- wantErr: true,
- },
- {
- name: "malformed line",
- line: "not a valid log line",
- wantErr: false, // Non-strict mode should handle this gracefully
- validate: func(entry *AccessLogEntry) bool {
- return entry.Raw == "not a valid log line"
- },
- },
- {
- name: "minimal valid line",
- line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET / HTTP/1.1" 200 -`,
- validate: func(entry *AccessLogEntry) bool {
- return entry.IP == "127.0.0.1" &&
- entry.Method == "GET" &&
- entry.Path == "/" &&
- entry.Status == 200 &&
- entry.BytesSent == 0
- },
- },
- }
- config := DefaultParserConfig()
- config.StrictMode = false // Use non-strict mode to handle malformed lines gracefully
-
- parser := NewParser(
- config,
- NewSimpleUserAgentParser(),
- &mockGeoIPService{},
- )
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- entry, err := parser.ParseLine(tt.line)
-
- if tt.wantErr {
- if err == nil {
- t.Error("expected error but got none")
- }
- return
- }
-
- if err != nil {
- t.Errorf("unexpected error: %v", err)
- return
- }
-
- if entry == nil {
- t.Error("expected entry but got nil")
- return
- }
-
- if tt.validate != nil && !tt.validate(entry) {
- t.Errorf("entry validation failed: %+v", entry)
- }
-
- // Verify common fields
- if entry.ID == "" {
- t.Error("entry ID should not be empty")
- }
-
- if entry.Raw != tt.line {
- t.Errorf("raw line mismatch: got %q, want %q", entry.Raw, tt.line)
- }
- })
- }
- }
- func TestParser_ParseLines(t *testing.T) {
- lines := []string{
- `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`,
- `192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0"`,
- `10.0.0.1 - - [25/Dec/2023:10:00:02 +0000] "GET /style.css HTTP/1.1" 200 890 "-" "Mozilla/5.0"`,
- "", // empty line
- "invalid log line", // malformed line
- }
- config := DefaultParserConfig()
- parser := NewParser(
- config,
- NewSimpleUserAgentParser(),
- &mockGeoIPService{},
- )
- result := parser.ParseLines(lines)
- if result.Processed != len(lines) {
- t.Errorf("processed count mismatch: got %d, want %d", result.Processed, len(lines))
- }
- if result.Succeeded != 4 {
- t.Errorf("success count mismatch: got %d, want 4", result.Succeeded)
- }
- if result.Failed != 1 {
- t.Errorf("failure count mismatch: got %d, want 1", result.Failed)
- }
- if len(result.Entries) != 4 {
- t.Errorf("entries count mismatch: got %d, want 4", len(result.Entries))
- }
- }
- func TestParser_ParseStream(t *testing.T) {
- logData := `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"
- 192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0"
- 10.0.0.1 - - [25/Dec/2023:10:00:02 +0000] "GET /style.css HTTP/1.1" 200 890 "-" "Mozilla/5.0"`
- reader := strings.NewReader(logData)
-
- config := DefaultParserConfig()
- config.BatchSize = 2 // Small batch size for testing
-
- parser := NewParser(
- config,
- NewSimpleUserAgentParser(),
- &mockGeoIPService{},
- )
- ctx := context.Background()
- result, err := parser.ParseStream(ctx, reader)
- if err != nil {
- t.Errorf("unexpected error: %v", err)
- }
- if result.Processed != 3 {
- t.Errorf("processed count mismatch: got %d, want 3", result.Processed)
- }
- if result.Succeeded != 3 {
- t.Errorf("success count mismatch: got %d, want 3", result.Succeeded)
- }
- if len(result.Entries) != 3 {
- t.Errorf("entries count mismatch: got %d, want 3", len(result.Entries))
- }
- }
- func TestParser_WithContext(t *testing.T) {
- lines := make([]string, 1000)
- for i := range lines {
- lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)
- }
- config := DefaultParserConfig()
- parser := NewParser(
- config,
- NewSimpleUserAgentParser(),
- &mockGeoIPService{},
- )
- // Test with timeout context
- ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
- defer cancel()
- result := parser.ParseLinesWithContext(ctx, lines)
- // Should either complete or be cancelled
- if result.Processed == 0 {
- t.Error("no lines were processed")
- }
- }
- func TestFormatDetector(t *testing.T) {
- tests := []struct {
- name string
- lines []string
- expected string
- }{
- {
- name: "combined format",
- lines: []string{
- `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`,
- `192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0"`,
- },
- expected: "combined",
- },
- {
- name: "main format",
- lines: []string{
- `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234`,
- `192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567`,
- },
- expected: "main",
- },
- {
- name: "no match",
- lines: []string{
- "completely invalid log format",
- "another invalid line",
- },
- expected: "",
- },
- }
- detector := NewFormatDetector()
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- format := detector.DetectFormat(tt.lines)
-
- if tt.expected == "" {
- if format != nil {
- t.Errorf("expected no format detection, but got %s", format.Name)
- }
- } else {
- if format == nil {
- t.Errorf("expected format %s, but got nil", tt.expected)
- } else if format.Name != tt.expected {
- t.Errorf("expected format %s, but got %s", tt.expected, format.Name)
- }
- }
- })
- }
- }
- func TestSimpleUserAgentParser(t *testing.T) {
- parser := NewSimpleUserAgentParser()
-
- tests := []struct {
- name string
- userAgent string
- expected UserAgentInfo
- }{
- {
- name: "Chrome on Windows",
- userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
- expected: UserAgentInfo{
- Browser: "Chrome",
- BrowserVer: "96.0",
- OS: "Windows",
- OSVersion: "10.0",
- DeviceType: "Desktop",
- },
- },
- {
- name: "Firefox on macOS",
- userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:95.0) Gecko/20100101 Firefox/95.0",
- expected: UserAgentInfo{
- Browser: "Firefox",
- BrowserVer: "95.0",
- OS: "macOS",
- OSVersion: "10.15",
- DeviceType: "Desktop",
- },
- },
- {
- name: "Mobile Safari on iOS",
- userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1",
- expected: UserAgentInfo{
- Browser: "Safari",
- BrowserVer: "15.2",
- OS: "iOS",
- OSVersion: "15.2",
- DeviceType: "iPhone",
- },
- },
- {
- name: "Empty user agent",
- userAgent: "-",
- expected: UserAgentInfo{
- Browser: "Unknown",
- BrowserVer: "",
- OS: "Unknown",
- OSVersion: "",
- DeviceType: "Desktop",
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- result := parser.Parse(tt.userAgent)
-
- if result.Browser != tt.expected.Browser {
- t.Errorf("browser mismatch: got %s, want %s", result.Browser, tt.expected.Browser)
- }
-
- if result.OS != tt.expected.OS {
- t.Errorf("OS mismatch: got %s, want %s", result.OS, tt.expected.OS)
- }
-
- if result.DeviceType != tt.expected.DeviceType {
- t.Errorf("device type mismatch: got %s, want %s", result.DeviceType, tt.expected.DeviceType)
- }
- })
- }
- }
- func TestCachedUserAgentParser(t *testing.T) {
- baseParser := NewSimpleUserAgentParser()
- cachedParser := NewCachedUserAgentParser(baseParser, 5)
-
- userAgent := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/96.0.4664.110"
-
- // First parse should cache the result
- result1 := cachedParser.Parse(userAgent)
-
- // Second parse should use cached result
- result2 := cachedParser.Parse(userAgent)
-
- if result1.Browser != result2.Browser {
- t.Error("cached result differs from original")
- }
-
- size, maxSize := cachedParser.GetCacheStats()
- if size != 1 {
- t.Errorf("expected cache size 1, got %d", size)
- }
-
- if maxSize != 5 {
- t.Errorf("expected max cache size 5, got %d", maxSize)
- }
- }
- func BenchmarkParser_ParseLine(b *testing.B) {
- line := `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`
-
- config := DefaultParserConfig()
- parser := NewParser(
- config,
- NewSimpleUserAgentParser(),
- &mockGeoIPService{},
- )
-
- b.ResetTimer()
- b.ReportAllocs()
-
- for i := 0; i < b.N; i++ {
- _, err := parser.ParseLine(line)
- if err != nil {
- b.Fatal(err)
- }
- }
- }
- func BenchmarkParser_ParseLines(b *testing.B) {
- lines := make([]string, 1000)
- for i := range lines {
- lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)
- }
-
- config := DefaultParserConfig()
- parser := NewParser(
- config,
- NewSimpleUserAgentParser(),
- &mockGeoIPService{},
- )
-
- b.ResetTimer()
- b.ReportAllocs()
-
- for i := 0; i < b.N; i++ {
- result := parser.ParseLines(lines)
- if result.Failed > 0 {
- b.Fatalf("parsing failed: %d errors", result.Failed)
- }
- }
- }
|