parser_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. package parser
  2. import (
  3. "context"
  4. "fmt"
  5. "strings"
  6. "testing"
  7. "time"
  8. )
  9. // Mock implementations for testing
  10. type mockGeoIPService struct{}
  11. func (m *mockGeoIPService) Search(ip string) (*GeoLocation, error) {
  12. if ip == "127.0.0.1" || ip == "::1" {
  13. return &GeoLocation{
  14. RegionCode: "US",
  15. Province: "California",
  16. City: "San Francisco",
  17. }, nil
  18. }
  19. return &GeoLocation{
  20. RegionCode: "US",
  21. Province: "Unknown",
  22. City: "Unknown",
  23. }, nil
  24. }
  25. func TestParser_ParseLine(t *testing.T) {
  26. tests := []struct {
  27. name string
  28. line string
  29. wantErr bool
  30. validate func(*AccessLogEntry) bool
  31. }{
  32. {
  33. name: "combined log format",
  34. line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`,
  35. validate: func(entry *AccessLogEntry) bool {
  36. return entry.IP == "127.0.0.1" &&
  37. entry.Method == "GET" &&
  38. entry.Path == "/index.html" &&
  39. entry.Status == 200 &&
  40. entry.BytesSent == 1234
  41. },
  42. },
  43. {
  44. name: "with request and upstream time",
  45. line: `192.168.1.1 - - [25/Dec/2023:10:00:00 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0" 0.123 0.045`,
  46. validate: func(entry *AccessLogEntry) bool {
  47. return entry.IP == "192.168.1.1" &&
  48. entry.Method == "POST" &&
  49. entry.Status == 201 &&
  50. entry.RequestTime == 0.123 &&
  51. entry.UpstreamTime != nil &&
  52. *entry.UpstreamTime == 0.045
  53. },
  54. },
  55. {
  56. name: "empty line",
  57. line: "",
  58. wantErr: true,
  59. },
  60. {
  61. name: "malformed line",
  62. line: "not a valid log line",
  63. wantErr: false, // Non-strict mode should handle this gracefully
  64. validate: func(entry *AccessLogEntry) bool {
  65. return entry.Raw == "not a valid log line"
  66. },
  67. },
  68. {
  69. name: "minimal valid line",
  70. line: `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET / HTTP/1.1" 200 -`,
  71. validate: func(entry *AccessLogEntry) bool {
  72. return entry.IP == "127.0.0.1" &&
  73. entry.Method == "GET" &&
  74. entry.Path == "/" &&
  75. entry.Status == 200 &&
  76. entry.BytesSent == 0
  77. },
  78. },
  79. }
  80. config := DefaultParserConfig()
  81. config.StrictMode = false // Use non-strict mode to handle malformed lines gracefully
  82. parser := NewParser(
  83. config,
  84. NewSimpleUserAgentParser(),
  85. &mockGeoIPService{},
  86. )
  87. for _, tt := range tests {
  88. t.Run(tt.name, func(t *testing.T) {
  89. entry, err := parser.ParseLine(tt.line)
  90. if tt.wantErr {
  91. if err == nil {
  92. t.Error("expected error but got none")
  93. }
  94. return
  95. }
  96. if err != nil {
  97. t.Errorf("unexpected error: %v", err)
  98. return
  99. }
  100. if entry == nil {
  101. t.Error("expected entry but got nil")
  102. return
  103. }
  104. if tt.validate != nil && !tt.validate(entry) {
  105. t.Errorf("entry validation failed: %+v", entry)
  106. }
  107. // Verify common fields
  108. if entry.ID == "" {
  109. t.Error("entry ID should not be empty")
  110. }
  111. if entry.Raw != tt.line {
  112. t.Errorf("raw line mismatch: got %q, want %q", entry.Raw, tt.line)
  113. }
  114. })
  115. }
  116. }
  117. func TestParser_ParseLines(t *testing.T) {
  118. lines := []string{
  119. `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`,
  120. `192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0"`,
  121. `10.0.0.1 - - [25/Dec/2023:10:00:02 +0000] "GET /style.css HTTP/1.1" 200 890 "-" "Mozilla/5.0"`,
  122. "", // empty line
  123. "invalid log line", // malformed line
  124. }
  125. config := DefaultParserConfig()
  126. parser := NewParser(
  127. config,
  128. NewSimpleUserAgentParser(),
  129. &mockGeoIPService{},
  130. )
  131. result := parser.ParseLines(lines)
  132. if result.Processed != len(lines) {
  133. t.Errorf("processed count mismatch: got %d, want %d", result.Processed, len(lines))
  134. }
  135. if result.Succeeded != 4 {
  136. t.Errorf("success count mismatch: got %d, want 4", result.Succeeded)
  137. }
  138. if result.Failed != 1 {
  139. t.Errorf("failure count mismatch: got %d, want 1", result.Failed)
  140. }
  141. if len(result.Entries) != 4 {
  142. t.Errorf("entries count mismatch: got %d, want 4", len(result.Entries))
  143. }
  144. }
  145. func TestParser_ParseStream(t *testing.T) {
  146. logData := `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"
  147. 192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0"
  148. 10.0.0.1 - - [25/Dec/2023:10:00:02 +0000] "GET /style.css HTTP/1.1" 200 890 "-" "Mozilla/5.0"`
  149. reader := strings.NewReader(logData)
  150. config := DefaultParserConfig()
  151. config.BatchSize = 2 // Small batch size for testing
  152. parser := NewParser(
  153. config,
  154. NewSimpleUserAgentParser(),
  155. &mockGeoIPService{},
  156. )
  157. ctx := context.Background()
  158. result, err := parser.ParseStream(ctx, reader)
  159. if err != nil {
  160. t.Errorf("unexpected error: %v", err)
  161. }
  162. if result.Processed != 3 {
  163. t.Errorf("processed count mismatch: got %d, want 3", result.Processed)
  164. }
  165. if result.Succeeded != 3 {
  166. t.Errorf("success count mismatch: got %d, want 3", result.Succeeded)
  167. }
  168. if len(result.Entries) != 3 {
  169. t.Errorf("entries count mismatch: got %d, want 3", len(result.Entries))
  170. }
  171. }
  172. func TestParser_WithContext(t *testing.T) {
  173. lines := make([]string, 1000)
  174. for i := range lines {
  175. lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)
  176. }
  177. config := DefaultParserConfig()
  178. parser := NewParser(
  179. config,
  180. NewSimpleUserAgentParser(),
  181. &mockGeoIPService{},
  182. )
  183. // Test with timeout context
  184. ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
  185. defer cancel()
  186. result := parser.ParseLinesWithContext(ctx, lines)
  187. // Should either complete or be cancelled
  188. if result.Processed == 0 {
  189. t.Error("no lines were processed")
  190. }
  191. }
  192. func TestFormatDetector(t *testing.T) {
  193. tests := []struct {
  194. name string
  195. lines []string
  196. expected string
  197. }{
  198. {
  199. name: "combined format",
  200. lines: []string{
  201. `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`,
  202. `192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567 "-" "curl/7.68.0"`,
  203. },
  204. expected: "combined",
  205. },
  206. {
  207. name: "main format",
  208. lines: []string{
  209. `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234`,
  210. `192.168.1.1 - - [25/Dec/2023:10:00:01 +0000] "POST /api/data HTTP/1.1" 201 567`,
  211. },
  212. expected: "main",
  213. },
  214. {
  215. name: "no match",
  216. lines: []string{
  217. "completely invalid log format",
  218. "another invalid line",
  219. },
  220. expected: "",
  221. },
  222. }
  223. detector := NewFormatDetector()
  224. for _, tt := range tests {
  225. t.Run(tt.name, func(t *testing.T) {
  226. format := detector.DetectFormat(tt.lines)
  227. if tt.expected == "" {
  228. if format != nil {
  229. t.Errorf("expected no format detection, but got %s", format.Name)
  230. }
  231. } else {
  232. if format == nil {
  233. t.Errorf("expected format %s, but got nil", tt.expected)
  234. } else if format.Name != tt.expected {
  235. t.Errorf("expected format %s, but got %s", tt.expected, format.Name)
  236. }
  237. }
  238. })
  239. }
  240. }
  241. func TestSimpleUserAgentParser(t *testing.T) {
  242. parser := NewSimpleUserAgentParser()
  243. tests := []struct {
  244. name string
  245. userAgent string
  246. expected UserAgentInfo
  247. }{
  248. {
  249. name: "Chrome on Windows",
  250. userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
  251. expected: UserAgentInfo{
  252. Browser: "Chrome",
  253. BrowserVer: "96.0",
  254. OS: "Windows",
  255. OSVersion: "10.0",
  256. DeviceType: "Desktop",
  257. },
  258. },
  259. {
  260. name: "Firefox on macOS",
  261. userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:95.0) Gecko/20100101 Firefox/95.0",
  262. expected: UserAgentInfo{
  263. Browser: "Firefox",
  264. BrowserVer: "95.0",
  265. OS: "macOS",
  266. OSVersion: "10.15",
  267. DeviceType: "Desktop",
  268. },
  269. },
  270. {
  271. name: "Mobile Safari on iOS",
  272. userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1",
  273. expected: UserAgentInfo{
  274. Browser: "Safari",
  275. BrowserVer: "15.2",
  276. OS: "iOS",
  277. OSVersion: "15.2",
  278. DeviceType: "iPhone",
  279. },
  280. },
  281. {
  282. name: "Empty user agent",
  283. userAgent: "-",
  284. expected: UserAgentInfo{
  285. Browser: "Unknown",
  286. BrowserVer: "",
  287. OS: "Unknown",
  288. OSVersion: "",
  289. DeviceType: "Desktop",
  290. },
  291. },
  292. }
  293. for _, tt := range tests {
  294. t.Run(tt.name, func(t *testing.T) {
  295. result := parser.Parse(tt.userAgent)
  296. if result.Browser != tt.expected.Browser {
  297. t.Errorf("browser mismatch: got %s, want %s", result.Browser, tt.expected.Browser)
  298. }
  299. if result.OS != tt.expected.OS {
  300. t.Errorf("OS mismatch: got %s, want %s", result.OS, tt.expected.OS)
  301. }
  302. if result.DeviceType != tt.expected.DeviceType {
  303. t.Errorf("device type mismatch: got %s, want %s", result.DeviceType, tt.expected.DeviceType)
  304. }
  305. })
  306. }
  307. }
  308. func TestCachedUserAgentParser(t *testing.T) {
  309. baseParser := NewSimpleUserAgentParser()
  310. cachedParser := NewCachedUserAgentParser(baseParser, 5)
  311. userAgent := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/96.0.4664.110"
  312. // First parse should cache the result
  313. result1 := cachedParser.Parse(userAgent)
  314. // Second parse should use cached result
  315. result2 := cachedParser.Parse(userAgent)
  316. if result1.Browser != result2.Browser {
  317. t.Error("cached result differs from original")
  318. }
  319. size, maxSize := cachedParser.GetCacheStats()
  320. if size != 1 {
  321. t.Errorf("expected cache size 1, got %d", size)
  322. }
  323. if maxSize != 5 {
  324. t.Errorf("expected max cache size 5, got %d", maxSize)
  325. }
  326. }
  327. func BenchmarkParser_ParseLine(b *testing.B) {
  328. line := `127.0.0.1 - - [25/Dec/2023:10:00:00 +0000] "GET /index.html HTTP/1.1" 200 1234 "https://example.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"`
  329. config := DefaultParserConfig()
  330. parser := NewParser(
  331. config,
  332. NewSimpleUserAgentParser(),
  333. &mockGeoIPService{},
  334. )
  335. b.ResetTimer()
  336. b.ReportAllocs()
  337. for i := 0; i < b.N; i++ {
  338. _, err := parser.ParseLine(line)
  339. if err != nil {
  340. b.Fatal(err)
  341. }
  342. }
  343. }
  344. func BenchmarkParser_ParseLines(b *testing.B) {
  345. lines := make([]string, 1000)
  346. for i := range lines {
  347. lines[i] = fmt.Sprintf(`127.0.0.%d - - [25/Dec/2023:10:00:00 +0000] "GET /test%d.html HTTP/1.1" 200 1234 "-" "Mozilla/5.0"`, i%255+1, i)
  348. }
  349. config := DefaultParserConfig()
  350. parser := NewParser(
  351. config,
  352. NewSimpleUserAgentParser(),
  353. &mockGeoIPService{},
  354. )
  355. b.ResetTimer()
  356. b.ReportAllocs()
  357. for i := 0; i < b.N; i++ {
  358. result := parser.ParseLines(lines)
  359. if result.Failed > 0 {
  360. b.Fatalf("parsing failed: %d errors", result.Failed)
  361. }
  362. }
  363. }