1
0

incremental_indexing.go 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. package cron
  2. import (
  3. "fmt"
  4. "os"
  5. "time"
  6. "github.com/0xJacky/Nginx-UI/internal/nginx_log"
  7. "github.com/0xJacky/Nginx-UI/internal/nginx_log/indexer"
  8. "github.com/go-co-op/gocron/v2"
  9. "github.com/uozi-tech/cosy/logger"
  10. )
  11. // setupIncrementalIndexingJob sets up the periodic incremental log indexing job
  12. func setupIncrementalIndexingJob(s gocron.Scheduler) (gocron.Job, error) {
  13. logger.Info("Setting up incremental log indexing job")
  14. // Run every 5 minutes to check for log file changes
  15. job, err := s.NewJob(
  16. gocron.DurationJob(5*time.Minute),
  17. gocron.NewTask(performIncrementalIndexing),
  18. gocron.WithName("incremental_log_indexing"),
  19. gocron.WithStartAt(gocron.WithStartImmediately()),
  20. )
  21. if err != nil {
  22. return nil, err
  23. }
  24. logger.Info("Incremental log indexing job scheduled to run every 5 minutes")
  25. return job, nil
  26. }
  27. // performIncrementalIndexing performs the actual incremental indexing check
  28. func performIncrementalIndexing() {
  29. logger.Debug("Starting incremental log indexing scan")
  30. // Get log file manager
  31. logFileManager := nginx_log.GetLogFileManager()
  32. if logFileManager == nil {
  33. logger.Warn("Log file manager not available for incremental indexing")
  34. return
  35. }
  36. // Get modern indexer
  37. modernIndexer := nginx_log.GetModernIndexer()
  38. if modernIndexer == nil {
  39. logger.Warn("Modern indexer not available for incremental indexing")
  40. return
  41. }
  42. // Check if indexer is healthy
  43. if !modernIndexer.IsHealthy() {
  44. logger.Warn("Modern indexer is not healthy, skipping incremental indexing")
  45. return
  46. }
  47. // Get all log groups to check for changes
  48. allLogs := nginx_log.GetAllLogsWithIndexGrouped(func(log *nginx_log.NginxLogWithIndex) bool {
  49. // Only process access logs (skip error logs as they are not indexed)
  50. return log.Type == "access"
  51. })
  52. changedCount := 0
  53. for _, log := range allLogs {
  54. // Check if file needs incremental indexing
  55. if needsIncrementalIndexing(log) {
  56. if err := queueIncrementalIndexing(log.Path, modernIndexer, logFileManager); err != nil {
  57. logger.Errorf("Failed to queue incremental indexing for %s: %v", log.Path, err)
  58. } else {
  59. changedCount++
  60. }
  61. }
  62. }
  63. if changedCount > 0 {
  64. logger.Infof("Queued %d log files for incremental indexing", changedCount)
  65. } else {
  66. logger.Debug("No log files need incremental indexing")
  67. }
  68. }
  69. // needsIncrementalIndexing checks if a log file needs incremental indexing
  70. func needsIncrementalIndexing(log *nginx_log.NginxLogWithIndex) bool {
  71. // Skip if already indexing or queued
  72. if log.IndexStatus == string(indexer.IndexStatusIndexing) ||
  73. log.IndexStatus == string(indexer.IndexStatusQueued) {
  74. return false
  75. }
  76. // Check file system status
  77. fileInfo, err := os.Stat(log.Path)
  78. if os.IsNotExist(err) {
  79. // File doesn't exist, but we have index data - this is fine for historical queries
  80. return false
  81. }
  82. if err != nil {
  83. logger.Warnf("Cannot stat file %s: %v", log.Path, err)
  84. return false
  85. }
  86. // Check if file has been modified since last index
  87. fileModTime := fileInfo.ModTime()
  88. fileSize := fileInfo.Size()
  89. lastModified := time.Unix(log.LastModified, 0)
  90. // File was modified after last index and size increased
  91. if fileModTime.After(lastModified) && fileSize > log.LastSize {
  92. logger.Debugf("File %s needs incremental indexing: mod_time=%s, size=%d",
  93. log.Path, fileModTime.Format("2006-01-02 15:04:05"), fileSize)
  94. return true
  95. }
  96. // File size decreased - might be file rotation
  97. if fileSize < log.LastSize {
  98. logger.Debugf("File %s needs full re-indexing due to size decrease: old_size=%d, new_size=%d",
  99. log.Path, log.LastSize, fileSize)
  100. return true
  101. }
  102. return false
  103. }
  104. // queueIncrementalIndexing queues a file for incremental indexing
  105. func queueIncrementalIndexing(logPath string, modernIndexer interface{}, logFileManager interface{}) error {
  106. // Set the file status to queued
  107. if err := setFileIndexStatus(logPath, string(indexer.IndexStatusQueued), logFileManager); err != nil {
  108. return err
  109. }
  110. // Queue the indexing job asynchronously
  111. go func() {
  112. defer func() {
  113. // Ensure status is always updated, even on panic
  114. if r := recover(); r != nil {
  115. logger.Errorf("Recovered from panic during incremental indexing for %s: %v", logPath, r)
  116. _ = setFileIndexStatus(logPath, string(indexer.IndexStatusError), logFileManager)
  117. }
  118. }()
  119. logger.Infof("Starting incremental indexing for file: %s", logPath)
  120. // Set status to indexing
  121. if err := setFileIndexStatus(logPath, string(indexer.IndexStatusIndexing), logFileManager); err != nil {
  122. logger.Errorf("Failed to set indexing status for %s: %v", logPath, err)
  123. return
  124. }
  125. // Perform incremental indexing
  126. startTime := time.Now()
  127. docsCountMap, minTime, maxTime, err := modernIndexer.(*indexer.ParallelIndexer).IndexSingleFileIncrementally(logPath, nil)
  128. if err != nil {
  129. logger.Errorf("Failed incremental indexing for %s: %v", logPath, err)
  130. // Set error status
  131. if statusErr := setFileIndexStatus(logPath, string(indexer.IndexStatusError), logFileManager); statusErr != nil {
  132. logger.Errorf("Failed to set error status for %s: %v", logPath, statusErr)
  133. }
  134. return
  135. }
  136. // Calculate total documents indexed
  137. var totalDocsIndexed uint64
  138. for _, docCount := range docsCountMap {
  139. totalDocsIndexed += docCount
  140. }
  141. // Save indexing metadata
  142. duration := time.Since(startTime)
  143. if lfm, ok := logFileManager.(*indexer.LogFileManager); ok {
  144. persistence := lfm.GetPersistence()
  145. var existingDocCount uint64
  146. existingIndex, err := persistence.GetLogIndex(logPath)
  147. if err != nil {
  148. logger.Warnf("Could not get existing log index for %s: %v", logPath, err)
  149. }
  150. // Determine if the file was rotated by checking if the current size is smaller than the last recorded size.
  151. // This is a strong indicator of log rotation.
  152. fileInfo, statErr := os.Stat(logPath)
  153. isRotated := false
  154. if statErr == nil && existingIndex != nil && fileInfo.Size() < existingIndex.LastSize {
  155. isRotated = true
  156. logger.Infof("Log rotation detected for %s: new size %d is smaller than last size %d. Resetting document count.",
  157. logPath, fileInfo.Size(), existingIndex.LastSize)
  158. }
  159. if existingIndex != nil && !isRotated {
  160. // If it's a normal incremental update (not a rotation), we build upon the existing count.
  161. existingDocCount = existingIndex.DocumentCount
  162. }
  163. // If the file was rotated, existingDocCount remains 0, effectively starting the count over for the new file.
  164. finalDocCount := existingDocCount + totalDocsIndexed
  165. if err := lfm.SaveIndexMetadata(logPath, finalDocCount, startTime, duration, minTime, maxTime); err != nil {
  166. logger.Errorf("Failed to save incremental index metadata for %s: %v", logPath, err)
  167. }
  168. }
  169. // Set status to indexed
  170. if err := setFileIndexStatus(logPath, string(indexer.IndexStatusIndexed), logFileManager); err != nil {
  171. logger.Errorf("Failed to set indexed status for %s: %v", logPath, err)
  172. }
  173. // Update searcher shards
  174. nginx_log.UpdateSearcherShards()
  175. logger.Infof("Successfully completed incremental indexing for %s, Documents: %d", logPath, totalDocsIndexed)
  176. }()
  177. return nil
  178. }
  179. // setFileIndexStatus updates the index status for a file in the database using enhanced status management
  180. func setFileIndexStatus(logPath, status string, logFileManager interface{}) error {
  181. if logFileManager == nil {
  182. return fmt.Errorf("log file manager not available")
  183. }
  184. // Get persistence manager
  185. lfm, ok := logFileManager.(*indexer.LogFileManager)
  186. if !ok {
  187. return fmt.Errorf("invalid log file manager type")
  188. }
  189. persistence := lfm.GetPersistence()
  190. if persistence == nil {
  191. return fmt.Errorf("persistence manager not available")
  192. }
  193. // Use enhanced SetIndexStatus method with queue position for queued status
  194. queuePosition := 0
  195. if status == string(indexer.IndexStatusQueued) {
  196. // For incremental indexing, we don't need specific queue positions
  197. // They will be processed as they come
  198. queuePosition = int(time.Now().Unix() % 1000) // Simple ordering by time
  199. }
  200. return persistence.SetIndexStatus(logPath, status, queuePosition, "")
  201. }