facet_aggregator.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. package searcher
  2. import (
  3. "context"
  4. "sort"
  5. "strings"
  6. )
  7. // mergeSingleFacet merges two facets for the same field
  8. func (ds *Searcher) mergeSingleFacet(existing, incoming *Facet) {
  9. // Note: Do NOT sum Total values - it represents unique terms count, not document count
  10. // The Total should be recalculated based on the actual number of unique terms after merging
  11. existing.Missing += incoming.Missing
  12. existing.Other += incoming.Other
  13. // Merge terms
  14. termCounts := make(map[string]int)
  15. // Add existing terms
  16. for _, term := range existing.Terms {
  17. termCounts[term.Term] = term.Count
  18. }
  19. // Add incoming terms
  20. for _, term := range incoming.Terms {
  21. termCounts[term.Term] += term.Count
  22. }
  23. // Convert back to slice and sort by count
  24. terms := make([]*FacetTerm, 0, len(termCounts))
  25. for term, count := range termCounts {
  26. terms = append(terms, &FacetTerm{
  27. Term: term,
  28. Count: count,
  29. })
  30. }
  31. // Sort by count (descending) then by term (ascending)
  32. sort.Slice(terms, func(i, j int) bool {
  33. if terms[i].Count == terms[j].Count {
  34. return terms[i].Term < terms[j].Term
  35. }
  36. return terms[i].Count > terms[j].Count
  37. })
  38. // Limit to top terms
  39. if len(terms) > DefaultFacetSize {
  40. // Calculate "other" count
  41. otherCount := 0
  42. for _, term := range terms[DefaultFacetSize:] {
  43. otherCount += term.Count
  44. }
  45. existing.Other += otherCount
  46. terms = terms[:DefaultFacetSize]
  47. }
  48. existing.Terms = terms
  49. // Set Total to the actual number of unique terms (not sum of totals)
  50. existing.Total = len(termCounts)
  51. }
  52. // Aggregate performs aggregations on search results
  53. func (ds *Searcher) Aggregate(ctx context.Context, req *AggregationRequest) (*AggregationResult, error) {
  54. // This is a simplified implementation
  55. // In a full implementation, you would execute the aggregation across all shards
  56. // and merge the results similar to how facets are handled
  57. result := &AggregationResult{
  58. Field: req.Field,
  59. Type: req.Type,
  60. }
  61. // For now, return a placeholder result
  62. // This would need to be implemented based on specific requirements
  63. switch req.Type {
  64. case AggregationTerms:
  65. result.Data = map[string]interface{}{
  66. "buckets": []map[string]interface{}{},
  67. }
  68. case AggregationStats:
  69. result.Data = map[string]interface{}{
  70. "count": 0,
  71. "min": 0,
  72. "max": 0,
  73. "avg": 0,
  74. "sum": 0,
  75. }
  76. case AggregationHistogram:
  77. result.Data = map[string]interface{}{
  78. "buckets": []map[string]interface{}{},
  79. }
  80. case AggregationDateHistogram:
  81. result.Data = map[string]interface{}{
  82. "buckets": []map[string]interface{}{},
  83. }
  84. case AggregationCardinality:
  85. result.Data = map[string]interface{}{
  86. "value": 0,
  87. }
  88. }
  89. return result, nil
  90. }
  91. // Suggest provides search suggestions
  92. func (ds *Searcher) Suggest(ctx context.Context, text string, field string, size int) ([]*Suggestion, error) {
  93. if size <= 0 || size > 100 {
  94. size = 10
  95. }
  96. // Create search request
  97. req := &SearchRequest{
  98. Query: text,
  99. Fields: []string{field},
  100. Limit: size * 2, // Get more results to have better suggestions
  101. SortBy: "_score",
  102. SortOrder: SortOrderDesc,
  103. }
  104. // Execute search
  105. result, err := ds.Search(ctx, req)
  106. if err != nil {
  107. return nil, err
  108. }
  109. // Convert results to suggestions
  110. suggestions := make([]*Suggestion, 0, size)
  111. seen := make(map[string]bool)
  112. for _, hit := range result.Hits {
  113. if len(suggestions) >= size {
  114. break
  115. }
  116. // Extract text from the specified field
  117. if fieldValue, exists := hit.Fields[field]; exists {
  118. if textValue, ok := fieldValue.(string); ok {
  119. // Simple suggestion extraction - this could be made more sophisticated
  120. terms := ds.extractSuggestionTerms(textValue, text)
  121. for _, term := range terms {
  122. if len(suggestions) >= size {
  123. break
  124. }
  125. if !seen[term] && strings.Contains(strings.ToLower(term), strings.ToLower(text)) {
  126. suggestions = append(suggestions, &Suggestion{
  127. Text: term,
  128. Score: hit.Score,
  129. Freq: 1, // Would need to be calculated from corpus
  130. })
  131. seen[term] = true
  132. }
  133. }
  134. }
  135. }
  136. }
  137. // Sort suggestions by score
  138. sort.Slice(suggestions, func(i, j int) bool {
  139. return suggestions[i].Score > suggestions[j].Score
  140. })
  141. return suggestions, nil
  142. }
  143. // extractSuggestionTerms extracts potential suggestion terms from text
  144. func (ds *Searcher) extractSuggestionTerms(text string, query string) []string {
  145. // Simple term extraction - this could be enhanced with NLP
  146. terms := strings.Fields(text)
  147. // Filter and clean terms
  148. var suggestions []string
  149. for _, term := range terms {
  150. term = strings.TrimSpace(term)
  151. if len(term) > 2 && !isCommonWord(term) {
  152. suggestions = append(suggestions, term)
  153. }
  154. }
  155. return suggestions
  156. }
  157. // isCommonWord checks if a word is too common to be a good suggestion
  158. func isCommonWord(word string) bool {
  159. commonWords := map[string]bool{
  160. "the": true, "and": true, "or": true, "but": true,
  161. "in": true, "on": true, "at": true, "to": true,
  162. "for": true, "of": true, "with": true, "by": true,
  163. "a": true, "an": true, "as": true, "is": true,
  164. "are": true, "was": true, "were": true, "be": true,
  165. "been": true, "have": true, "has": true, "had": true,
  166. "do": true, "does": true, "did": true, "will": true,
  167. "would": true, "could": true, "should": true, "may": true,
  168. "might": true, "must": true, "can": true, "shall": true,
  169. }
  170. return commonWords[strings.ToLower(word)]
  171. }
  172. // Analyze analyzes text using a specified analyzer
  173. func (ds *Searcher) Analyze(ctx context.Context, text string, analyzer string) ([]string, error) {
  174. // This would typically use Bleve's analysis capabilities
  175. // For now, provide a simple implementation
  176. if analyzer == "" {
  177. analyzer = "standard"
  178. }
  179. // Simple tokenization - this should use proper analyzers
  180. terms := strings.Fields(strings.ToLower(text))
  181. // Remove punctuation and short terms
  182. var analyzed []string
  183. for _, term := range terms {
  184. term = strings.Trim(term, ".,!?;:\"'()[]{}/-_")
  185. if len(term) > 2 {
  186. analyzed = append(analyzed, term)
  187. }
  188. }
  189. return analyzed, nil
  190. }
  191. // Cache operations
  192. func (ds *Searcher) getFromCache(req *SearchRequest) *SearchResult {
  193. if ds.cache == nil {
  194. return nil
  195. }
  196. return ds.cache.Get(req)
  197. }
  198. func (ds *Searcher) cacheResult(req *SearchRequest, result *SearchResult) {
  199. if ds.cache == nil {
  200. return
  201. }
  202. ds.cache.Put(req, result, DefaultCacheTTL)
  203. }
  204. // ClearCache clears the search cache
  205. func (ds *Searcher) ClearCache() error {
  206. if ds.cache != nil {
  207. ds.cache.Clear()
  208. }
  209. return nil
  210. }
  211. // GetCacheStats returns cache statistics
  212. func (ds *Searcher) GetCacheStats() *CacheStats {
  213. if ds.cache != nil {
  214. return ds.cache.GetStats()
  215. }
  216. return nil
  217. }