search_performance_bench_test.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. package nginx_log
  2. import (
  3. "context"
  4. "fmt"
  5. "math/rand"
  6. "os"
  7. "path/filepath"
  8. "runtime"
  9. "strings"
  10. "testing"
  11. "time"
  12. "github.com/dgraph-io/ristretto/v2"
  13. )
  14. // Benchmark configuration constants
  15. const (
  16. BenchmarkLogEntriesSmall = 10000 // 10K entries
  17. BenchmarkLogEntriesMedium = 100000 // 100K entries
  18. BenchmarkLogEntriesLarge = 1000000 // 1M entries
  19. BenchmarkLogEntriesXLarge = 10000000 // 10M entries
  20. BenchmarkBatchSizeSmall = 100
  21. BenchmarkBatchSizeMedium = 1000
  22. BenchmarkBatchSizeLarge = 10000
  23. BenchmarkConcurrencyLow = 1
  24. BenchmarkConcurrencyMedium = 4
  25. BenchmarkConcurrencyHigh = 8
  26. )
  27. var (
  28. // Pre-generated test data for consistent benchmarking
  29. testIPs []string
  30. testPaths []string
  31. testMethods []string
  32. testStatuses []int
  33. testUserAgents []string
  34. benchmarkData []string
  35. )
  36. func init() {
  37. initBenchmarkTestData()
  38. }
  39. func initBenchmarkTestData() {
  40. // Initialize test data arrays for consistent benchmarking
  41. testIPs = []string{
  42. "192.168.1.1", "192.168.1.2", "10.0.0.1", "10.0.0.2",
  43. "172.16.0.1", "172.16.0.2", "203.0.113.1", "203.0.113.2",
  44. "198.51.100.1", "198.51.100.2", "2001:db8::1", "2001:db8::2",
  45. }
  46. testPaths = []string{
  47. "/", "/api/v1/users", "/api/v1/posts", "/static/css/main.css",
  48. "/static/js/app.js", "/api/v1/auth/login", "/api/v1/auth/logout",
  49. "/api/v1/data", "/images/logo.png", "/favicon.ico", "/robots.txt",
  50. "/sitemap.xml", "/api/v1/search", "/admin/dashboard", "/user/profile",
  51. }
  52. testMethods = []string{"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS"}
  53. testStatuses = []int{200, 201, 301, 302, 400, 401, 403, 404, 500, 502, 503}
  54. testUserAgents = []string{
  55. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
  56. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
  57. "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1",
  58. "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1",
  59. "Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/88.0",
  60. "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0",
  61. }
  62. }
  63. func generateBenchmarkLogData(count int) []string {
  64. if len(benchmarkData) >= count {
  65. return benchmarkData[:count]
  66. }
  67. data := make([]string, count)
  68. baseTime := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)
  69. for i := 0; i < count; i++ {
  70. timestamp := baseTime.Add(time.Duration(i) * time.Minute)
  71. ip := testIPs[rand.Intn(len(testIPs))]
  72. method := testMethods[rand.Intn(len(testMethods))]
  73. path := testPaths[rand.Intn(len(testPaths))]
  74. status := testStatuses[rand.Intn(len(testStatuses))]
  75. size := rand.Intn(10000) + 100
  76. userAgent := testUserAgents[rand.Intn(len(testUserAgents))]
  77. data[i] = fmt.Sprintf(
  78. `%s - - [%s] "%s %s HTTP/1.1" %d %d "-" "%s" %d.%03d %d.%03d`,
  79. ip,
  80. timestamp.Format("02/Jan/2006:15:04:05 -0700"),
  81. method,
  82. path,
  83. status,
  84. size,
  85. userAgent,
  86. rand.Intn(5), rand.Intn(1000),
  87. rand.Intn(2), rand.Intn(1000),
  88. )
  89. }
  90. // Cache the data for reuse
  91. if len(benchmarkData) == 0 {
  92. benchmarkData = data
  93. }
  94. return data
  95. }
  96. func setupBenchmarkIndexer(b *testing.B, entryCount int) (*LogIndexer, string, func()) {
  97. b.Helper()
  98. // Create temporary directory for benchmark index
  99. tempDir, err := os.MkdirTemp("", "nginx_search_bench")
  100. if err != nil {
  101. b.Fatalf("Failed to create temp dir: %v", err)
  102. }
  103. // Create test log file
  104. logFile := filepath.Join(tempDir, "benchmark.log")
  105. logData := generateBenchmarkLogData(entryCount)
  106. logContent := strings.Join(logData, "\n")
  107. err = os.WriteFile(logFile, []byte(logContent), 0644)
  108. if err != nil {
  109. b.Fatalf("Failed to write benchmark log file: %v", err)
  110. }
  111. // Create indexer
  112. indexPath := filepath.Join(tempDir, "index")
  113. index, err := createOrOpenIndex(indexPath)
  114. if err != nil {
  115. b.Fatalf("Failed to create index: %v", err)
  116. }
  117. uaParser := NewSimpleUserAgentParser()
  118. parser := NewOptimizedLogParser(uaParser)
  119. // Initialize cache with larger capacity for benchmarks
  120. cache, err := ristretto.NewCache(&ristretto.Config[string, *CachedSearchResult]{
  121. NumCounters: 1e8, // 100M counters
  122. MaxCost: 1 << 30, // 1GB cache
  123. BufferItems: 64,
  124. })
  125. if err != nil {
  126. b.Fatalf("Failed to create cache: %v", err)
  127. }
  128. indexer := &LogIndexer{
  129. index: index,
  130. indexPath: indexPath,
  131. parser: parser,
  132. logPaths: make(map[string]*LogFileInfo),
  133. indexBatch: BenchmarkBatchSizeLarge,
  134. cache: cache,
  135. }
  136. // Parse and index the data directly (bypass safety checks for benchmarking)
  137. entries := make([]*AccessLogEntry, 0, entryCount)
  138. for _, line := range logData {
  139. if entry, err := parser.ParseLine(line); err == nil {
  140. entry.Raw = line
  141. entries = append(entries, entry)
  142. }
  143. }
  144. // Index entries directly
  145. batch := index.NewBatch()
  146. for i, entry := range entries {
  147. docID := fmt.Sprintf("doc_%d", i)
  148. doc := map[string]interface{}{
  149. "timestamp": entry.Timestamp.Format(time.RFC3339),
  150. "ip": entry.IP,
  151. "method": entry.Method,
  152. "path": entry.Path,
  153. "protocol": entry.Protocol,
  154. "status": entry.Status,
  155. "bytes_sent": entry.BytesSent,
  156. "request_time": entry.RequestTime,
  157. "referer": entry.Referer,
  158. "user_agent": entry.UserAgent,
  159. "browser": entry.Browser,
  160. "browser_version": entry.BrowserVer,
  161. "os": entry.OS,
  162. "os_version": entry.OSVersion,
  163. "device_type": entry.DeviceType,
  164. "raw": entry.Raw,
  165. }
  166. if entry.UpstreamTime != nil {
  167. doc["upstream_time"] = *entry.UpstreamTime
  168. }
  169. err = batch.Index(docID, doc)
  170. if err != nil {
  171. b.Fatalf("Failed to add document to batch: %v", err)
  172. }
  173. }
  174. err = index.Batch(batch)
  175. if err != nil {
  176. b.Fatalf("Failed to execute batch: %v", err)
  177. }
  178. // Wait for indexing to complete
  179. time.Sleep(500 * time.Millisecond)
  180. cleanup := func() {
  181. indexer.Close()
  182. os.RemoveAll(tempDir)
  183. }
  184. return indexer, logFile, cleanup
  185. }
  186. // Benchmark basic search operations
  187. func BenchmarkSearchLogs_Simple(b *testing.B) {
  188. sizes := []struct {
  189. name string
  190. count int
  191. }{
  192. {"10K", BenchmarkLogEntriesSmall},
  193. {"100K", BenchmarkLogEntriesMedium},
  194. {"1M", BenchmarkLogEntriesLarge},
  195. }
  196. for _, size := range sizes {
  197. b.Run(size.name, func(b *testing.B) {
  198. indexer, _, cleanup := setupBenchmarkIndexer(b, size.count)
  199. defer cleanup()
  200. req := &QueryRequest{
  201. Limit: 100,
  202. }
  203. b.ResetTimer()
  204. b.ReportAllocs()
  205. for i := 0; i < b.N; i++ {
  206. _, err := indexer.SearchLogs(context.Background(), req)
  207. if err != nil {
  208. b.Fatalf("Search failed: %v", err)
  209. }
  210. }
  211. })
  212. }
  213. }
  214. // Benchmark IP-based searches
  215. func BenchmarkSearchLogs_ByIP(b *testing.B) {
  216. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  217. defer cleanup()
  218. req := &QueryRequest{
  219. IP: "192.168.1.1",
  220. Limit: 100,
  221. }
  222. b.ResetTimer()
  223. b.ReportAllocs()
  224. for i := 0; i < b.N; i++ {
  225. _, err := indexer.SearchLogs(context.Background(), req)
  226. if err != nil {
  227. b.Fatalf("Search failed: %v", err)
  228. }
  229. }
  230. }
  231. // Benchmark method-based searches
  232. func BenchmarkSearchLogs_ByMethod(b *testing.B) {
  233. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  234. defer cleanup()
  235. req := &QueryRequest{
  236. Method: "GET",
  237. Limit: 100,
  238. }
  239. b.ResetTimer()
  240. b.ReportAllocs()
  241. for i := 0; i < b.N; i++ {
  242. _, err := indexer.SearchLogs(context.Background(), req)
  243. if err != nil {
  244. b.Fatalf("Search failed: %v", err)
  245. }
  246. }
  247. }
  248. // Benchmark status-based searches
  249. func BenchmarkSearchLogs_ByStatus(b *testing.B) {
  250. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  251. defer cleanup()
  252. req := &QueryRequest{
  253. Status: []int{200, 404, 500},
  254. Limit: 100,
  255. }
  256. b.ResetTimer()
  257. b.ReportAllocs()
  258. for i := 0; i < b.N; i++ {
  259. _, err := indexer.SearchLogs(context.Background(), req)
  260. if err != nil {
  261. b.Fatalf("Search failed: %v", err)
  262. }
  263. }
  264. }
  265. // Benchmark complex multi-field searches
  266. func BenchmarkSearchLogs_Complex(b *testing.B) {
  267. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  268. defer cleanup()
  269. req := &QueryRequest{
  270. Method: "GET",
  271. Status: []int{200, 404},
  272. Path: "/api",
  273. Limit: 100,
  274. }
  275. b.ResetTimer()
  276. b.ReportAllocs()
  277. for i := 0; i < b.N; i++ {
  278. _, err := indexer.SearchLogs(context.Background(), req)
  279. if err != nil {
  280. b.Fatalf("Search failed: %v", err)
  281. }
  282. }
  283. }
  284. // Benchmark time range searches
  285. func BenchmarkSearchLogs_TimeRange(b *testing.B) {
  286. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  287. defer cleanup()
  288. startTime := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC)
  289. endTime := startTime.Add(24 * time.Hour)
  290. req := &QueryRequest{
  291. StartTime: startTime,
  292. EndTime: endTime,
  293. Limit: 100,
  294. }
  295. b.ResetTimer()
  296. b.ReportAllocs()
  297. for i := 0; i < b.N; i++ {
  298. _, err := indexer.SearchLogs(context.Background(), req)
  299. if err != nil {
  300. b.Fatalf("Search failed: %v", err)
  301. }
  302. }
  303. }
  304. // Benchmark pagination performance
  305. func BenchmarkSearchLogs_Pagination(b *testing.B) {
  306. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  307. defer cleanup()
  308. pageSize := 50
  309. b.ResetTimer()
  310. b.ReportAllocs()
  311. for i := 0; i < b.N; i++ {
  312. offset := (i % 100) * pageSize // Simulate different pages
  313. req := &QueryRequest{
  314. Limit: pageSize,
  315. Offset: offset,
  316. }
  317. _, err := indexer.SearchLogs(context.Background(), req)
  318. if err != nil {
  319. b.Fatalf("Search failed: %v", err)
  320. }
  321. }
  322. }
  323. // Benchmark sorting performance
  324. func BenchmarkSearchLogs_Sorting(b *testing.B) {
  325. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  326. defer cleanup()
  327. sortFields := []string{"timestamp", "ip", "method", "status", "bytes_sent"}
  328. for _, field := range sortFields {
  329. b.Run(field, func(b *testing.B) {
  330. req := &QueryRequest{
  331. Limit: 100,
  332. SortBy: field,
  333. SortOrder: "desc",
  334. }
  335. b.ResetTimer()
  336. b.ReportAllocs()
  337. for i := 0; i < b.N; i++ {
  338. _, err := indexer.SearchLogs(context.Background(), req)
  339. if err != nil {
  340. b.Fatalf("Search failed: %v", err)
  341. }
  342. }
  343. })
  344. }
  345. }
  346. // Benchmark cache performance
  347. func BenchmarkSearchLogs_Cache(b *testing.B) {
  348. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  349. defer cleanup()
  350. req := &QueryRequest{
  351. IP: "192.168.1.1",
  352. Limit: 100,
  353. }
  354. // Prime the cache
  355. _, err := indexer.SearchLogs(context.Background(), req)
  356. if err != nil {
  357. b.Fatalf("Failed to prime cache: %v", err)
  358. }
  359. b.ResetTimer()
  360. b.ReportAllocs()
  361. for i := 0; i < b.N; i++ {
  362. _, err := indexer.SearchLogs(context.Background(), req)
  363. if err != nil {
  364. b.Fatalf("Search failed: %v", err)
  365. }
  366. }
  367. }
  368. // Benchmark concurrent search performance
  369. func BenchmarkSearchLogs_Concurrent(b *testing.B) {
  370. concurrencies := []int{
  371. BenchmarkConcurrencyLow,
  372. BenchmarkConcurrencyMedium,
  373. BenchmarkConcurrencyHigh,
  374. runtime.NumCPU(),
  375. }
  376. for _, concurrency := range concurrencies {
  377. b.Run(fmt.Sprintf("Workers%d", concurrency), func(b *testing.B) {
  378. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  379. defer cleanup()
  380. // Create different search requests for each worker
  381. requests := make([]*QueryRequest, concurrency)
  382. for i := 0; i < concurrency; i++ {
  383. requests[i] = &QueryRequest{
  384. IP: testIPs[i%len(testIPs)],
  385. Limit: 100,
  386. }
  387. }
  388. b.ResetTimer()
  389. b.ReportAllocs()
  390. b.RunParallel(func(pb *testing.PB) {
  391. workerID := 0
  392. for pb.Next() {
  393. req := requests[workerID%concurrency]
  394. _, err := indexer.SearchLogs(context.Background(), req)
  395. if err != nil {
  396. b.Fatalf("Search failed: %v", err)
  397. }
  398. workerID++
  399. }
  400. })
  401. })
  402. }
  403. }
  404. // Benchmark large result set handling
  405. func BenchmarkSearchLogs_LargeResults(b *testing.B) {
  406. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesLarge)
  407. defer cleanup()
  408. resultSizes := []int{100, 1000, 10000}
  409. for _, size := range resultSizes {
  410. b.Run(fmt.Sprintf("Results%d", size), func(b *testing.B) {
  411. req := &QueryRequest{
  412. Limit: size,
  413. }
  414. b.ResetTimer()
  415. b.ReportAllocs()
  416. for i := 0; i < b.N; i++ {
  417. _, err := indexer.SearchLogs(context.Background(), req)
  418. if err != nil {
  419. b.Fatalf("Search failed: %v", err)
  420. }
  421. }
  422. })
  423. }
  424. }
  425. // Benchmark text search performance
  426. func BenchmarkSearchLogs_TextSearch(b *testing.B) {
  427. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesMedium)
  428. defer cleanup()
  429. queries := []string{
  430. "api",
  431. "GET",
  432. "200",
  433. "Mozilla",
  434. "/static",
  435. }
  436. for _, query := range queries {
  437. b.Run(query, func(b *testing.B) {
  438. req := &QueryRequest{
  439. Query: query,
  440. Limit: 100,
  441. }
  442. b.ResetTimer()
  443. b.ReportAllocs()
  444. for i := 0; i < b.N; i++ {
  445. _, err := indexer.SearchLogs(context.Background(), req)
  446. if err != nil {
  447. b.Fatalf("Search failed: %v", err)
  448. }
  449. }
  450. })
  451. }
  452. }
  453. // Benchmark memory usage during search
  454. func BenchmarkSearchLogs_Memory(b *testing.B) {
  455. indexer, _, cleanup := setupBenchmarkIndexer(b, BenchmarkLogEntriesLarge)
  456. defer cleanup()
  457. req := &QueryRequest{
  458. Limit: 1000,
  459. }
  460. b.ResetTimer()
  461. b.ReportAllocs()
  462. runtime.GC()
  463. var m1, m2 runtime.MemStats
  464. runtime.ReadMemStats(&m1)
  465. for i := 0; i < b.N; i++ {
  466. _, err := indexer.SearchLogs(context.Background(), req)
  467. if err != nil {
  468. b.Fatalf("Search failed: %v", err)
  469. }
  470. }
  471. runtime.GC()
  472. runtime.ReadMemStats(&m2)
  473. b.ReportMetric(float64(m2.TotalAlloc-m1.TotalAlloc)/float64(b.N), "bytes/search")
  474. }
  475. // Comprehensive performance comparison benchmark
  476. func BenchmarkSearchLogs_Comprehensive(b *testing.B) {
  477. // Test different data sizes with various search patterns
  478. scenarios := []struct {
  479. name string
  480. dataSize int
  481. req *QueryRequest
  482. }{
  483. {
  484. name: "Small_Simple",
  485. dataSize: BenchmarkLogEntriesSmall,
  486. req: &QueryRequest{Limit: 100},
  487. },
  488. {
  489. name: "Medium_IP",
  490. dataSize: BenchmarkLogEntriesMedium,
  491. req: &QueryRequest{IP: "192.168.1.1", Limit: 100},
  492. },
  493. {
  494. name: "Large_Complex",
  495. dataSize: BenchmarkLogEntriesLarge,
  496. req: &QueryRequest{Method: "GET", Status: []int{200}, Limit: 100},
  497. },
  498. }
  499. for _, scenario := range scenarios {
  500. b.Run(scenario.name, func(b *testing.B) {
  501. indexer, _, cleanup := setupBenchmarkIndexer(b, scenario.dataSize)
  502. defer cleanup()
  503. b.ResetTimer()
  504. b.ReportAllocs()
  505. for i := 0; i < b.N; i++ {
  506. result, err := indexer.SearchLogs(context.Background(), scenario.req)
  507. if err != nil {
  508. b.Fatalf("Search failed: %v", err)
  509. }
  510. // Report additional metrics
  511. if i == 0 {
  512. b.ReportMetric(float64(result.Total), "total_results")
  513. b.ReportMetric(float64(len(result.Entries)), "returned_results")
  514. b.ReportMetric(float64(result.Took.Nanoseconds()), "search_time_ns")
  515. }
  516. }
  517. })
  518. }
  519. }