Răsfoiți Sursa

enhance(nginx-log): NginxLogList, HourlyChart for better data handling

0xJacky 5 luni în urmă
părinte
comite
ac2ee3bf4a

+ 1 - 0
.devcontainer/docker-compose.yml

@@ -4,6 +4,7 @@ services:
     image: nginx-ui-dev
     container_name: nginx-ui
     volumes:
+      - ~/.ssh:/root/.ssh
       - ../..:/workspaces:cached
       - ../.go:/root/go
       - ./data/nginx:/etc/nginx

+ 2 - 1
app/src/views/nginx_log/NginxLogList.vue

@@ -187,7 +187,8 @@ const columns: StdTableColumn[] = [
           <div>{lastIndexed.format('YYYY-MM-DD HH:mm:ss')}</div>
           {durationText && (
             <div class="text-xs text-gray-100 dark:text-gray-300 mt-1">
-              Duration:
+              { $gettext('Duration') }
+              :
               {' '}
               {durationText.slice(1, -1)}
             </div>

+ 66 - 6
app/src/views/nginx_log/dashboard/components/HourlyChart.vue

@@ -1,5 +1,5 @@
 <script setup lang="ts">
-import type { DashboardAnalytics } from '@/api/nginx_log'
+import type { DashboardAnalytics, HourlyStats } from '@/api/nginx_log'
 import { storeToRefs } from 'pinia'
 import VueApexchart from 'vue3-apexcharts'
 import { useSettingsStore } from '@/pinia'
@@ -21,8 +21,38 @@ const hourlyChartOptions = computed(() => {
   if (!props.dashboardData || !props.dashboardData.hourly_stats)
     return {}
 
-  const hourlyData = props.dashboardData.hourly_stats || []
-  const hours = hourlyData.map(item => `${item.hour}`)
+  // Filter hourly data to get only the 24 hours for the end_date in local timezone
+  const allHourlyData = props.dashboardData.hourly_stats || []
+
+  // Get the end date in local timezone
+  const endDateLocal = props.endDate ? new Date(`${props.endDate}T00:00:00`) : new Date()
+  const startOfDayTimestamp = Math.floor(endDateLocal.getTime() / 1000)
+  const endOfDayTimestamp = startOfDayTimestamp + (24 * 60 * 60)
+
+  // Filter data for the local date's 24 hours
+  const hourlyData = allHourlyData.filter(item =>
+    item.timestamp >= startOfDayTimestamp && item.timestamp < endOfDayTimestamp,
+  )
+
+  // Sort by timestamp and ensure we have 24 hours
+  hourlyData.sort((a, b) => a.timestamp - b.timestamp)
+
+  // Create final data with proper hour values (0-23) based on local time
+  const finalHourlyData: HourlyStats[] = []
+  for (let hour = 0; hour < 24; hour++) {
+    const targetTimestamp = startOfDayTimestamp + hour * 3600
+    const found = hourlyData.find(item =>
+      item.timestamp >= targetTimestamp && item.timestamp < targetTimestamp + 3600,
+    )
+    if (found) {
+      finalHourlyData.push({ ...found, hour })
+    }
+    else {
+      finalHourlyData.push({ hour, uv: 0, pv: 0, timestamp: targetTimestamp })
+    }
+  }
+
+  const hours = finalHourlyData.map(item => `${item.hour}`)
 
   return {
     chart: {
@@ -105,9 +135,39 @@ const hourlySeries = computed(() => {
   if (!props.dashboardData || !props.dashboardData.hourly_stats)
     return []
 
-  const hourlyData = props.dashboardData.hourly_stats || []
-  const uvData = hourlyData.map(item => item.uv)
-  const pvData = hourlyData.map(item => item.pv)
+  // Use the same filtered data as in hourlyChartOptions
+  const allHourlyData = props.dashboardData.hourly_stats || []
+
+  // Get the end date in local timezone
+  const endDateLocal = props.endDate ? new Date(`${props.endDate}T00:00:00`) : new Date()
+  const startOfDayTimestamp = Math.floor(endDateLocal.getTime() / 1000)
+  const endOfDayTimestamp = startOfDayTimestamp + (24 * 60 * 60)
+
+  // Filter data for the local date's 24 hours
+  const hourlyData = allHourlyData.filter(item =>
+    item.timestamp >= startOfDayTimestamp && item.timestamp < endOfDayTimestamp,
+  )
+
+  // Sort by timestamp and ensure we have 24 hours
+  hourlyData.sort((a, b) => a.timestamp - b.timestamp)
+
+  // Create final data with proper hour values (0-23) based on local time
+  const finalHourlyData: HourlyStats[] = []
+  for (let hour = 0; hour < 24; hour++) {
+    const targetTimestamp = startOfDayTimestamp + hour * 3600
+    const found = hourlyData.find(item =>
+      item.timestamp >= targetTimestamp && item.timestamp < targetTimestamp + 3600,
+    )
+    if (found) {
+      finalHourlyData.push({ ...found, hour })
+    }
+    else {
+      finalHourlyData.push({ hour, uv: 0, pv: 0, timestamp: targetTimestamp })
+    }
+  }
+
+  const uvData = finalHourlyData.map(item => item.uv)
+  const pvData = finalHourlyData.map(item => item.pv)
 
   return [
     {

+ 18 - 1
app/src/views/nginx_log/structured/StructuredLogViewer.vue

@@ -762,7 +762,7 @@ watch(timeRange, () => {
         </div>
 
         <!-- Log Table (show if we have entries) -->
-        <div v-if="filteredEntries.length > 0">
+        <div v-if="filteredEntries.length > 0" class="log-table-container">
           <ATable
             :data-source="filteredEntries"
             :pagination="{
@@ -771,6 +771,7 @@ watch(timeRange, () => {
               total: searchTotal,
               showSizeChanger: true,
               showQuickJumper: true,
+              pageSizeOptions: ['50', '100', '200', '500', '1000'],
               showTotal: (total, range) => $gettext('%{start}-%{end} of %{total} items', {
                 start: range[0].toLocaleString(),
                 end: range[1].toLocaleString(),
@@ -823,3 +824,19 @@ watch(timeRange, () => {
     </div>
   </div>
 </template>
+
+<style scoped>
+/* Fix pagination page size selector width */
+:deep(.log-table-container .ant-pagination-options-size-changer .ant-select) {
+  min-width: 100px !important;
+}
+
+:deep(.log-table-container .ant-pagination-options-size-changer .ant-select-selector) {
+  min-width: 100px !important;
+}
+
+/* Ensure the dropdown has enough width */
+:deep(.ant-select-dropdown .ant-select-item) {
+  min-width: 100px;
+}
+</style>

+ 6 - 7
internal/analytic/node_record.go

@@ -17,7 +17,7 @@ import (
 
 // nodeCache contains both slice and map for efficient access
 type nodeCache struct {
-	Nodes   []*model.Node         // For iteration
+	Nodes   []*model.Node          // For iteration
 	NodeMap map[uint64]*model.Node // For fast lookup by ID
 }
 
@@ -228,7 +228,6 @@ func cleanupDisabledNodes(enabledEnvIDs []uint64) {
 	mutex.Unlock()
 }
 
-
 // getEnabledNodes retrieves enabled nodes from cache or database
 func getEnabledNodes() ([]*model.Node, error) {
 	if cached, found := cache.GetCachedNodes(); found {
@@ -249,14 +248,14 @@ func getEnabledNodes() ([]*model.Node, error) {
 	for _, node := range nodes {
 		nodeMap[node.ID] = node
 	}
-	
+
 	nc := &nodeCache{
 		Nodes:   nodes,
 		NodeMap: nodeMap,
 	}
 
 	cache.SetCachedNodes(nc)
-	logger.Debug("Queried and cached %d enabled nodes", len(nodes))
+	logger.Debugf("Queried and cached %d enabled nodes", len(nodes))
 	return nodes, nil
 }
 
@@ -268,20 +267,20 @@ func isNodeEnabled(nodeID uint64) bool {
 			return exists
 		}
 	}
-	
+
 	// Fallback: load cache and check again
 	_, err := getEnabledNodes()
 	if err != nil {
 		return false
 	}
-	
+
 	if cached, found := cache.GetCachedNodes(); found {
 		if nc, ok := cached.(*nodeCache); ok {
 			_, exists := nc.NodeMap[nodeID]
 			return exists
 		}
 	}
-	
+
 	return false
 }
 

+ 1 - 1
internal/nginx_log/analytics/calculations_test.go

@@ -16,7 +16,7 @@ func TestService_GetDashboardAnalytics_HourlyStats(t *testing.T) {
 	ctx := context.Background()
 	req := &DashboardQueryRequest{
 		StartTime: 1640995200, // 2022-01-01 00:00:00 UTC
-		EndTime:   1641081600, // 2022-01-02 00:00:00 UTC
+		EndTime:   1641006000, // 2022-01-01 03:00:00 UTC (same day as test data)
 		LogPaths:  []string{"/var/log/nginx/access.log"},
 	}
 

+ 32 - 25
internal/nginx_log/analytics/dashboard.go

@@ -75,44 +75,51 @@ func (s *service) GetDashboardAnalytics(ctx context.Context, req *DashboardQuery
 }
 
 // calculateHourlyStats calculates hourly access statistics.
-// This still requires in-memory calculation from hits as it's a temporal analysis.
-// We need to fetch the actual documents for this. A separate, limited query is better.
+// Returns 48 hours of data centered around the end_date to support all timezones.
 func (s *service) calculateHourlyStats(result *searcher.SearchResult, startTime, endTime int64) []HourlyAccessStats {
-	hourlyMap := make(map[int]*HourlyAccessStats)
-	uniqueIPsPerHour := make(map[int]map[string]bool)
-
-	// Initialize hourly buckets for end_date (last day) only
-	endDate := time.Unix(endTime, 0)
-	endDateStart := time.Date(endDate.Year(), endDate.Month(), endDate.Day(), 0, 0, 0, 0, endDate.Location())
+	// Use a map with timestamp as key for easier processing
+	hourlyMap := make(map[int64]*HourlyAccessStats)
+	uniqueIPsPerHour := make(map[int64]map[string]bool)
+
+	// Calculate 48 hours range: from UTC end_date minus 12 hours to plus 36 hours
+	// This covers UTC-12 to UTC+14 timezones
+	endDate := time.Unix(endTime, 0).UTC()
+	endDateStart := time.Date(endDate.Year(), endDate.Month(), endDate.Day(), 0, 0, 0, 0, time.UTC)
+	
+	// Create hourly buckets for 48 hours (12 hours before to 36 hours after the UTC date boundary)
+	rangeStart := endDateStart.Add(-12 * time.Hour)
+	rangeEnd := endDateStart.Add(36 * time.Hour)
 	
-	// Create 24 hour buckets for the end date
-	for hour := 0; hour < 24; hour++ {
-		hourTime := endDateStart.Add(time.Duration(hour) * time.Hour)
-		hourlyMap[hour] = &HourlyAccessStats{
-			Hour:      hour,
+	// Initialize hourly buckets
+	for t := rangeStart; t.Before(rangeEnd); t = t.Add(time.Hour) {
+		timestamp := t.Unix()
+		hourlyMap[timestamp] = &HourlyAccessStats{
+			Hour:      t.Hour(),
 			UV:        0,
 			PV:        0,
-			Timestamp: hourTime.Unix(),
+			Timestamp: timestamp,
 		}
-		uniqueIPsPerHour[hour] = make(map[string]bool)
+		uniqueIPsPerHour[timestamp] = make(map[string]bool)
 	}
 
-	// Process search results - only count hits from end_date
+	// Process search results - count hits within the 48-hour window
 	for _, hit := range result.Hits {
 		if timestampField, ok := hit.Fields["timestamp"]; ok {
 			if timestampFloat, ok := timestampField.(float64); ok {
 				timestamp := int64(timestampFloat)
-				t := time.Unix(timestamp, 0)
 				
-				// Check if this hit is from the end_date
-				if t.Year() == endDate.Year() && t.Month() == endDate.Month() && t.Day() == endDate.Day() {
-					hour := t.Hour()
-					if stats, exists := hourlyMap[hour]; exists {
+				// Check if this hit falls within our 48-hour window
+				if timestamp >= rangeStart.Unix() && timestamp < rangeEnd.Unix() {
+					// Round down to the hour
+					t := time.Unix(timestamp, 0).UTC()
+					hourTimestamp := time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), 0, 0, 0, time.UTC).Unix()
+					
+					if stats, exists := hourlyMap[hourTimestamp]; exists {
 						stats.PV++
 						if ipField, ok := hit.Fields["ip"]; ok {
 							if ip, ok := ipField.(string); ok && ip != "" {
-								if !uniqueIPsPerHour[hour][ip] {
-									uniqueIPsPerHour[hour][ip] = true
+								if !uniqueIPsPerHour[hourTimestamp][ip] {
+									uniqueIPsPerHour[hourTimestamp][ip] = true
 									stats.UV++
 								}
 							}
@@ -123,14 +130,14 @@ func (s *service) calculateHourlyStats(result *searcher.SearchResult, startTime,
 		}
 	}
 
-	// Convert to slice and sort
+	// Convert to slice and sort by timestamp
 	var stats []HourlyAccessStats
 	for _, stat := range hourlyMap {
 		stats = append(stats, *stat)
 	}
 
 	sort.Slice(stats, func(i, j int) bool {
-		return stats[i].Hour < stats[j].Hour
+		return stats[i].Timestamp < stats[j].Timestamp
 	})
 
 	return stats

+ 1 - 1
internal/nginx_log/analytics/entries_test.go

@@ -35,7 +35,7 @@ func TestService_GetLogEntriesStats_Success(t *testing.T) {
 					{Term: "POST", Count: 1},
 				},
 			},
-			"path": {
+			"path_exact": {
 				Terms: []*searcher.FacetTerm{
 					{Term: "/a", Count: 3},
 					{Term: "/b", Count: 1},

+ 2 - 2
internal/nginx_log/analytics/service_test.go

@@ -114,9 +114,9 @@ func TestService_ValidateLogPath(t *testing.T) {
 			wantErr: false,
 		},
 		{
-			name:    "non-empty path should be valid",
+			name:    "non-empty path should be invalid without whitelist",
 			logPath: "/var/log/nginx/access.log",
-			wantErr: false,
+			wantErr: true, // In test environment, no whitelist is configured
 		},
 	}
 

+ 0 - 3
internal/nginx_log/indexer/parallel_indexer.go

@@ -778,9 +778,6 @@ func (w *indexWorker) logDocumentToMap(doc *LogDocument) map[string]interface{}
 	if doc.City != "" {
 		docMap["city"] = doc.City
 	}
-	if doc.ISP != "" {
-		docMap["isp"] = doc.ISP
-	}
 	if doc.Protocol != "" {
 		docMap["protocol"] = doc.Protocol
 	}

+ 0 - 1
internal/nginx_log/indexer/parser.go

@@ -46,7 +46,6 @@ func ParseLogLine(line string) (*LogDocument, error) {
 		RegionCode:  entry.RegionCode,
 		Province:    entry.Province,
 		City:        entry.City,
-		ISP:         entry.ISP,
 		Method:      entry.Method,
 		Path:        entry.Path,
 		PathExact:   entry.Path, // Use the same for now

+ 14 - 14
internal/nginx_log/indexer/persistence.go

@@ -2,7 +2,6 @@ package indexer
 
 import (
 	"context"
-	"errors"
 	"fmt"
 	"path/filepath"
 	"regexp"
@@ -14,7 +13,6 @@ import (
 	"github.com/uozi-tech/cosy"
 	"github.com/uozi-tech/cosy/logger"
 	"gorm.io/gen/field"
-	"gorm.io/gorm"
 )
 
 // PersistenceManager handles database operations for log index positions
@@ -71,19 +69,21 @@ func NewPersistenceManager(config *IncrementalIndexConfig) *PersistenceManager {
 // GetLogIndex retrieves the index record for a log file path
 func (pm *PersistenceManager) GetLogIndex(path string) (*model.NginxLogIndex, error) {
 	q := query.NginxLogIndex
-	logIndex, err := q.Where(q.Path.Eq(path)).First()
+
+	// Determine main log path for grouping
+	mainLogPath := getMainLogPathFromFile(path)
+
+	// Use FirstOrCreate to get existing record or create a new one
+	logIndex, err := q.Where(q.Path.Eq(path)).
+		Assign(field.Attrs(&model.NginxLogIndex{
+			Path:        path,
+			MainLogPath: mainLogPath,
+			Enabled:     true,
+		})).
+		FirstOrCreate()
+
 	if err != nil {
-		if errors.Is(err, gorm.ErrRecordNotFound) {
-			// Return a new record for first-time indexing
-			// Determine main log path for grouping
-			mainLogPath := getMainLogPathFromFile(path)
-			return &model.NginxLogIndex{
-				Path:        path,
-				MainLogPath: mainLogPath,
-				Enabled:     true,
-			}, nil
-		}
-		return nil, fmt.Errorf("failed to get log index: %w", err)
+		return nil, fmt.Errorf("failed to get or create log index: %w", err)
 	}
 
 	return logIndex, nil

+ 14 - 2
internal/nginx_log/indexer/shard_manager.go

@@ -142,9 +142,21 @@ func (sm *DefaultShardManager) createShardLocked(id int) error {
 	var err error
 
 	if _, statErr := os.Stat(filepath.Join(shardPath, "index_meta.json")); os.IsNotExist(statErr) {
-		// Create new index
+		// Create new index with optimized disk space configuration
 		mapping := CreateLogIndexMapping()
-		shard, err = bleve.New(shardPath, mapping)
+		
+		// Optimize FloorSegmentFileSize for better disk space usage
+		// FloorSegmentFileSize controls the minimum size of segment files.
+		// Larger values reduce file fragmentation and improve I/O efficiency,
+		// which can save disk space by reducing metadata overhead.
+		// 5MB provides a good balance between space efficiency and performance.
+		kvConfig := map[string]interface{}{
+			"scorchMergePlanOptions": map[string]interface{}{
+				"FloorSegmentFileSize": 5000000, // 5MB minimum segment file size
+			},
+		}
+		
+		shard, err = bleve.NewUsing(shardPath, mapping, bleve.Config.DefaultIndexType, bleve.Config.DefaultMemKVStore, kvConfig)
 		if err != nil {
 			return fmt.Errorf("failed to create new shard index: %w", err)
 		}

+ 0 - 2
internal/nginx_log/indexer/types.go

@@ -53,7 +53,6 @@ type LogDocument struct {
 	RegionCode   string   `json:"region_code,omitempty"`
 	Province     string   `json:"province,omitempty"`
 	City         string   `json:"city,omitempty"`
-	ISP          string   `json:"isp,omitempty"`
 	Method       string   `json:"method"`
 	Path         string   `json:"path"`
 	PathExact    string   `json:"path_exact"`
@@ -235,7 +234,6 @@ func CreateLogIndexMapping() mapping.IndexMapping {
 	docMapping.AddFieldMappingsAt("region_code", regionMapping)
 	docMapping.AddFieldMappingsAt("province", regionMapping)
 	docMapping.AddFieldMappingsAt("city", regionMapping)
-	docMapping.AddFieldMappingsAt("isp", regionMapping)
 
 	// HTTP method - keyword
 	methodMapping := bleve.NewTextFieldMapping()

+ 0 - 1
internal/nginx_log/parser/types.go

@@ -13,7 +13,6 @@ type AccessLogEntry struct {
 	RegionCode   string   `json:"region_code"`
 	Province     string   `json:"province"`
 	City         string   `json:"city"`
-	ISP          string   `json:"isp"`
 	Method       string   `json:"method"`
 	Path         string   `json:"path"`
 	Protocol     string   `json:"protocol"`

+ 120 - 208
internal/nginx_log/searcher/distributed_searcher.go

@@ -3,12 +3,12 @@ package searcher
 import (
 	"context"
 	"fmt"
-	"sort"
 	"sync"
 	"sync/atomic"
 	"time"
 
 	"github.com/blevesearch/bleve/v2"
+	"github.com/blevesearch/bleve/v2/search"
 	"github.com/blevesearch/bleve/v2/search/query"
 )
 
@@ -16,6 +16,7 @@ import (
 type DistributedSearcher struct {
 	config       *Config
 	shards       []bleve.Index
+	indexAlias   bleve.IndexAlias  // Index alias for global scoring
 	queryBuilder *QueryBuilderService
 	cache        *OptimizedSearchCache
 	stats        *searcherStats
@@ -47,9 +48,21 @@ func NewDistributedSearcher(config *Config, shards []bleve.Index) *DistributedSe
 		config = DefaultSearcherConfig()
 	}
 
+	// Create index alias for global scoring across shards
+	indexAlias := bleve.NewIndexAlias(shards...)
+	
+	// Set the index mapping from the first shard (all shards should have the same mapping)
+	if len(shards) > 0 && shards[0] != nil {
+		mapping := shards[0].Mapping()
+		if err := indexAlias.SetIndexMapping(mapping); err != nil {
+			// Log error but continue - this is not critical for basic functionality
+		}
+	}
+
 	searcher := &DistributedSearcher{
 		config:       config,
 		shards:       shards,
+		indexAlias:   indexAlias,
 		queryBuilder: NewQueryBuilderService(),
 		semaphore:    make(chan struct{}, config.MaxConcurrency),
 		stats: &searcherStats{
@@ -176,25 +189,106 @@ func (ds *DistributedSearcher) executeDistributedSearch(ctx context.Context, que
 		return nil, fmt.Errorf("no healthy shards available")
 	}
 
+	// Use Bleve's native distributed search with global scoring for consistent pagination
+	return ds.executeGlobalScoringSearch(ctx, query, req)
+}
+
+// executeGlobalScoringSearch uses Bleve's native distributed search with global scoring
+// This ensures consistent pagination by letting Bleve handle cross-shard ranking
+func (ds *DistributedSearcher) executeGlobalScoringSearch(ctx context.Context, query query.Query, req *SearchRequest) (*SearchResult, error) {
+	// Create search request with proper pagination
 	searchReq := bleve.NewSearchRequest(query)
-	// Use a very large size or implement batching for dashboard requests
-	if req.Limit == 0 {
-		searchReq.Size = 10_000_000 // Very large limit for unlimited requests
-	} else {
-		searchReq.Size = req.Limit + req.Offset // Ensure we get enough data for pagination
+	
+	// Set pagination parameters directly - Bleve will handle distributed pagination correctly
+	searchReq.Size = req.Limit
+	if searchReq.Size <= 0 {
+		searchReq.Size = 50 // Default page size
+	}
+	searchReq.From = req.Offset
+	
+	// Configure the search request with proper sorting and other settings
+	ds.configureSearchRequest(searchReq, req)
+	
+	// Enable global scoring for distributed search consistency
+	// This is the key fix from Bleve documentation for distributed search
+	globalCtx := context.WithValue(ctx, search.SearchTypeKey, search.GlobalScoring)
+	
+	// Execute search using Bleve's IndexAlias with global scoring
+	result, err := ds.indexAlias.SearchInContext(globalCtx, searchReq)
+	if err != nil {
+		return nil, fmt.Errorf("global scoring search failed: %w", err)
 	}
-	searchReq.From = 0
+	
+	// Convert Bleve result to our SearchResult format
+	return ds.convertBleveResult(result), nil
+}
 
-	// Set up sorting with proper direction
-	if req.SortBy != "" {
-		sortField := req.SortBy
-		if req.SortOrder == "desc" {
-			sortField = "-" + sortField // Bleve uses "-" prefix for descending sort
+// convertBleveResult converts a Bleve SearchResult to our SearchResult format
+func (ds *DistributedSearcher) convertBleveResult(bleveResult *bleve.SearchResult) *SearchResult {
+	result := &SearchResult{
+		Hits:      make([]*SearchHit, 0, len(bleveResult.Hits)),
+		TotalHits: bleveResult.Total,
+		MaxScore:  bleveResult.MaxScore,
+		Facets:    make(map[string]*Facet),
+	}
+	
+	// Convert hits
+	for _, hit := range bleveResult.Hits {
+		searchHit := &SearchHit{
+			ID:           hit.ID,
+			Score:        hit.Score,
+			Fields:       hit.Fields,
+			Highlighting: hit.Fragments,
+			Index:        hit.Index,
 		}
-		searchReq.SortBy([]string{sortField})
+		result.Hits = append(result.Hits, searchHit)
+	}
+	
+	// Convert facets if present
+	for name, facet := range bleveResult.Facets {
+		convertedFacet := &Facet{
+			Field:   name,
+			Total:   facet.Total,
+			Missing: facet.Missing,
+			Other:   facet.Other,
+			Terms:   make([]*FacetTerm, 0),
+		}
+		
+		if facet.Terms != nil {
+			facetTerms := facet.Terms.Terms()
+			convertedFacet.Terms = make([]*FacetTerm, 0, len(facetTerms))
+			for _, term := range facetTerms {
+				convertedFacet.Terms = append(convertedFacet.Terms, &FacetTerm{
+					Term:  term.Term,
+					Count: term.Count,
+				})
+			}
+		}
+		
+		result.Facets[name] = convertedFacet
+	}
+	
+	return result
+}
+
+// configureSearchRequest sets up common search request configuration
+func (ds *DistributedSearcher) configureSearchRequest(searchReq *bleve.SearchRequest, req *SearchRequest) {
+	// Set up sorting with proper Bleve syntax
+	sortField := req.SortBy
+	if sortField == "" {
+		sortField = "timestamp" // Default sort field
+	}
+	
+	sortOrder := req.SortOrder
+	if sortOrder == "" {
+		sortOrder = SortOrderDesc // Default sort order
+	}
+	
+	// Apply Bleve sorting - use "-" prefix for descending order
+	if sortOrder == SortOrderDesc {
+		searchReq.SortBy([]string{"-" + sortField})
 	} else {
-		// Default to timestamp descending if no sort specified
-		searchReq.SortBy([]string{"-timestamp"})
+		searchReq.SortBy([]string{sortField})
 	}
 
 	// Configure highlighting
@@ -233,213 +327,31 @@ func (ds *DistributedSearcher) executeDistributedSearch(ctx context.Context, que
 	} else {
 		searchReq.Fields = []string{"*"}
 	}
-
-	// Execute searches in parallel
-	shardResults := make(chan *bleve.SearchResult, len(healthyShards))
-	errChan := make(chan error, len(healthyShards))
-	var wg sync.WaitGroup
-
-	for _, shardID := range healthyShards {
-		wg.Add(1)
-		go func(sid int) {
-			defer wg.Done()
-			shard := ds.shards[sid]
-			if shard == nil {
-				errChan <- fmt.Errorf("shard %d is nil", sid)
-				return
-			}
-			result, err := shard.SearchInContext(ctx, searchReq)
-			if err != nil {
-				errChan <- fmt.Errorf("shard %d error: %w", sid, err)
-				ds.markShardUnhealthy(sid, err)
-				return
-			}
-			shardResults <- result
-			ds.markShardHealthy(sid)
-		}(shardID)
-	}
-
-	wg.Wait()
-	close(errChan)
-	close(shardResults)
-
-	// Collect errors
-	var errors []error
-	for err := range errChan {
-		errors = append(errors, err)
-	}
-	if len(errors) > 0 {
-		// For simplicity, just return the first error. A more robust implementation might wrap all errors.
-		return nil, errors[0]
-	}
-
-	// Convert channel to slice for merging
-	resultsSlice := make([]*bleve.SearchResult, 0, len(shardResults))
-	for result := range shardResults {
-		resultsSlice = append(resultsSlice, result)
-	}
-
-	// Merge results from all shards
-	mergedResult := ds.mergeShardResults(resultsSlice)
-
-	// Perform a stable sort in-memory on the combined result set.
-	// This is inefficient for large datasets but necessary for accurate cross-shard sorting.
-	sort.SliceStable(mergedResult.Hits, func(i, j int) bool {
-		// Handle sorting for different field types
-		val1, ok1 := mergedResult.Hits[i].Fields[req.SortBy]
-		val2, ok2 := mergedResult.Hits[j].Fields[req.SortBy]
-		if !ok1 || !ok2 {
-			return false // Cannot compare if fields are missing
-		}
-
-		// Assuming timestamp or other numeric fields for now
-		fVal1, ok1 := val1.(float64)
-		fVal2, ok2 := val2.(float64)
-		if !ok1 || !ok2 {
-			return false // Cannot compare non-numeric fields
-		}
-
-		if req.SortOrder == SortOrderDesc {
-			return fVal1 > fVal2
-		}
-		return fVal1 < fVal2
-	})
-
-	// Manually apply pagination to the globally sorted list
-	if req.Limit > 0 {
-		start := req.Offset
-		end := start + req.Limit
-
-		if start >= len(mergedResult.Hits) {
-			mergedResult.Hits = []*SearchHit{}
-		} else {
-			if end > len(mergedResult.Hits) {
-				end = len(mergedResult.Hits)
-			}
-			mergedResult.Hits = mergedResult.Hits[start:end]
-		}
-	}
-
-	return mergedResult, nil
 }
 
-// mergeShardResults merges results from multiple Bleve search results into a single SearchResult
-func (ds *DistributedSearcher) mergeShardResults(shardResults []*bleve.SearchResult) *SearchResult {
-	merged := &SearchResult{
-		Hits:      make([]*SearchHit, 0),
-		TotalHits: 0,
-		MaxScore:  0,
-		Facets:    make(map[string]*Facet),
-	}
-
-	for _, result := range shardResults {
-		if result == nil {
-			continue
-		}
-		merged.TotalHits += result.Total
-		if result.MaxScore > merged.MaxScore {
-			merged.MaxScore = result.MaxScore
-		}
-
-		// Merge hits
-		for _, hit := range result.Hits {
-			merged.Hits = append(merged.Hits, &SearchHit{
-				ID:           hit.ID,
-				Score:        hit.Score,
-				Fields:       hit.Fields,
-				Highlighting: hit.Fragments,
-				Index:        hit.Index,
-			})
-		}
-
-		// Merge facets
-		for name, facet := range result.Facets {
-			if _, ok := merged.Facets[name]; !ok {
-				merged.Facets[name] = &Facet{
-					Field: name,
-					Total: 0,
-					Terms: make([]*FacetTerm, 0),
-				}
-			}
-			merged.Facets[name].Total += facet.Total
-			merged.Facets[name].Missing += facet.Missing
-			merged.Facets[name].Other += facet.Other
-
-			// A map-based merge to correctly handle term counts across shards.
-			termMap := make(map[string]*FacetTerm)
-			// Prime the map with already merged terms
-			for _, term := range merged.Facets[name].Terms {
-				termMap[term.Term] = term
-			}
-			// Merge new terms from the current shard's facet result
-			if facet.Terms != nil {
-				for _, term := range facet.Terms.Terms() {
-					if existing, ok := termMap[term.Term]; ok {
-						existing.Count += term.Count
-					} else {
-						termMap[term.Term] = &FacetTerm{Term: term.Term, Count: term.Count}
-					}
-				}
-			}
-
-			// Convert map back to slice and sort
-			newTerms := make([]*FacetTerm, 0, len(termMap))
-			for _, term := range termMap {
-				newTerms = append(newTerms, term)
-			}
-			sort.Slice(newTerms, func(i, j int) bool {
-				return newTerms[i].Count > newTerms[j].Count
-			})
-			merged.Facets[name].Terms = newTerms
-		}
-	}
-
-	return merged
-}
 
 // Utility methods
 
 func (ds *DistributedSearcher) setRequestDefaults(req *SearchRequest) {
-	if req.SortBy == "" {
-		req.SortBy = "timestamp"
-	}
-	if req.SortOrder == "" {
-		req.SortOrder = SortOrderDesc
-	}
 	if req.Timeout == 0 {
 		req.Timeout = ds.config.TimeoutDuration
 	}
-	req.UseCache = ds.config.EnableCache
-}
-
-func (ds *DistributedSearcher) getHealthyShards() []int {
-	var healthy []int
-	ds.stats.mutex.RLock()
-	for id, stat := range ds.stats.shardStats {
-		if stat.IsHealthy {
-			healthy = append(healthy, id)
-		}
+	if req.UseCache && !ds.config.EnableCache {
+		req.UseCache = false
 	}
-	ds.stats.mutex.RUnlock()
-	return healthy
-}
-
-func (ds *DistributedSearcher) markShardHealthy(shardID int) {
-	ds.stats.mutex.Lock()
-	if stat, exists := ds.stats.shardStats[shardID]; exists {
-		stat.IsHealthy = true
-		stat.LastSearchTime = time.Now()
+	if !req.UseCache && ds.config.EnableCache {
+		req.UseCache = true
 	}
-	ds.stats.mutex.Unlock()
 }
 
-func (ds *DistributedSearcher) markShardUnhealthy(shardID int, err error) {
-	ds.stats.mutex.Lock()
-	if stat, exists := ds.stats.shardStats[shardID]; exists {
-		stat.IsHealthy = false
-		stat.ErrorCount++
+func (ds *DistributedSearcher) getHealthyShards() []int {
+	// With IndexAlias, Bleve handles shard health internally
+	// Return all shard IDs since the alias will route correctly
+	healthy := make([]int, len(ds.shards))
+	for i := range ds.shards {
+		healthy[i] = i
 	}
-	ds.stats.mutex.Unlock()
+	return healthy
 }
 
 func (ds *DistributedSearcher) updateShardStats(shardID int, duration time.Duration, success bool) {

+ 3 - 0
internal/nginx_log/searcher/types.go

@@ -111,6 +111,9 @@ type SearchResult struct {
 	// Cache info
 	FromCache bool `json:"from_cache,omitempty"`
 	CacheHit  bool `json:"cache_hit,omitempty"`
+	
+	// Warning message for deep pagination or other issues
+	Warning string `json:"warning,omitempty"`
 }
 
 // SearchHit represents a single search result