123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318 |
- package searcher
- import (
- "context"
- "sort"
- "strings"
- "github.com/blevesearch/bleve/v2/search"
- )
- // convertFacets converts Bleve facets to our facet format
- func (ds *DistributedSearcher) convertFacets(bleveFacets search.FacetResults) map[string]*Facet {
- facets := make(map[string]*Facet)
- for name, result := range bleveFacets {
- facet := &Facet{
- Field: name,
- Total: result.Total,
- Missing: result.Missing,
- Other: result.Other,
- Terms: make([]*FacetTerm, 0),
- }
- // Handle Terms facet
- if result.Terms != nil {
- for _, term := range result.Terms.Terms() {
- facet.Terms = append(facet.Terms, &FacetTerm{
- Term: term.Term,
- Count: term.Count,
- })
- }
- }
- facets[name] = facet
- }
- return facets
- }
- // mergeFacets merges facet results from multiple shards
- func (ds *DistributedSearcher) mergeFacets(combined, incoming map[string]*Facet) {
- for field, incomingFacet := range incoming {
- if existingFacet, exists := combined[field]; exists {
- ds.mergeSingleFacet(existingFacet, incomingFacet)
- } else {
- combined[field] = ds.copyFacet(incomingFacet)
- }
- }
- }
- // mergeSingleFacet merges two facets for the same field
- func (ds *DistributedSearcher) mergeSingleFacet(existing, incoming *Facet) {
- // Note: Do NOT sum Total values - it represents unique terms count, not document count
- // The Total should be recalculated based on the actual number of unique terms after merging
- existing.Missing += incoming.Missing
- existing.Other += incoming.Other
- // Merge terms
- termCounts := make(map[string]int)
- // Add existing terms
- for _, term := range existing.Terms {
- termCounts[term.Term] = term.Count
- }
- // Add incoming terms
- for _, term := range incoming.Terms {
- termCounts[term.Term] += term.Count
- }
- // Convert back to slice and sort by count
- terms := make([]*FacetTerm, 0, len(termCounts))
- for term, count := range termCounts {
- terms = append(terms, &FacetTerm{
- Term: term,
- Count: count,
- })
- }
- // Sort by count (descending) then by term (ascending)
- sort.Slice(terms, func(i, j int) bool {
- if terms[i].Count == terms[j].Count {
- return terms[i].Term < terms[j].Term
- }
- return terms[i].Count > terms[j].Count
- })
- // Limit to top terms
- if len(terms) > DefaultFacetSize {
- // Calculate "other" count
- otherCount := 0
- for _, term := range terms[DefaultFacetSize:] {
- otherCount += term.Count
- }
- existing.Other += otherCount
- terms = terms[:DefaultFacetSize]
- }
- existing.Terms = terms
- // Set Total to the actual number of unique terms (not sum of totals)
- existing.Total = len(termCounts)
- }
- // copyFacet creates a deep copy of a facet
- func (ds *DistributedSearcher) copyFacet(original *Facet) *Facet {
- facet := &Facet{
- Field: original.Field,
- Total: original.Total,
- Missing: original.Missing,
- Other: original.Other,
- Terms: make([]*FacetTerm, len(original.Terms)),
- }
- for i, term := range original.Terms {
- facet.Terms[i] = &FacetTerm{
- Term: term.Term,
- Count: term.Count,
- }
- }
- return facet
- }
- // Aggregate performs aggregations on search results
- func (ds *DistributedSearcher) Aggregate(ctx context.Context, req *AggregationRequest) (*AggregationResult, error) {
- // This is a simplified implementation
- // In a full implementation, you would execute the aggregation across all shards
- // and merge the results similar to how facets are handled
- result := &AggregationResult{
- Field: req.Field,
- Type: req.Type,
- }
- // For now, return a placeholder result
- // This would need to be implemented based on specific requirements
- switch req.Type {
- case AggregationTerms:
- result.Data = map[string]interface{}{
- "buckets": []map[string]interface{}{},
- }
- case AggregationStats:
- result.Data = map[string]interface{}{
- "count": 0,
- "min": 0,
- "max": 0,
- "avg": 0,
- "sum": 0,
- }
- case AggregationHistogram:
- result.Data = map[string]interface{}{
- "buckets": []map[string]interface{}{},
- }
- case AggregationDateHistogram:
- result.Data = map[string]interface{}{
- "buckets": []map[string]interface{}{},
- }
- case AggregationCardinality:
- result.Data = map[string]interface{}{
- "value": 0,
- }
- }
- return result, nil
- }
- // Suggest provides search suggestions
- func (ds *DistributedSearcher) Suggest(ctx context.Context, text string, field string, size int) ([]*Suggestion, error) {
- if size <= 0 || size > 100 {
- size = 10
- }
- // Create search request
- req := &SearchRequest{
- Query: text,
- Fields: []string{field},
- Limit: size * 2, // Get more results to have better suggestions
- SortBy: "_score",
- SortOrder: SortOrderDesc,
- }
- // Execute search
- result, err := ds.Search(ctx, req)
- if err != nil {
- return nil, err
- }
- // Convert results to suggestions
- suggestions := make([]*Suggestion, 0, size)
- seen := make(map[string]bool)
- for _, hit := range result.Hits {
- if len(suggestions) >= size {
- break
- }
- // Extract text from the specified field
- if fieldValue, exists := hit.Fields[field]; exists {
- if textValue, ok := fieldValue.(string); ok {
- // Simple suggestion extraction - this could be made more sophisticated
- terms := ds.extractSuggestionTerms(textValue, text)
- for _, term := range terms {
- if len(suggestions) >= size {
- break
- }
- if !seen[term] && strings.Contains(strings.ToLower(term), strings.ToLower(text)) {
- suggestions = append(suggestions, &Suggestion{
- Text: term,
- Score: hit.Score,
- Freq: 1, // Would need to be calculated from corpus
- })
- seen[term] = true
- }
- }
- }
- }
- }
- // Sort suggestions by score
- sort.Slice(suggestions, func(i, j int) bool {
- return suggestions[i].Score > suggestions[j].Score
- })
- return suggestions, nil
- }
- // extractSuggestionTerms extracts potential suggestion terms from text
- func (ds *DistributedSearcher) extractSuggestionTerms(text string, query string) []string {
- // Simple term extraction - this could be enhanced with NLP
- terms := strings.Fields(text)
- // Filter and clean terms
- var suggestions []string
- for _, term := range terms {
- term = strings.TrimSpace(term)
- if len(term) > 2 && !isCommonWord(term) {
- suggestions = append(suggestions, term)
- }
- }
- return suggestions
- }
- // isCommonWord checks if a word is too common to be a good suggestion
- func isCommonWord(word string) bool {
- commonWords := map[string]bool{
- "the": true, "and": true, "or": true, "but": true,
- "in": true, "on": true, "at": true, "to": true,
- "for": true, "of": true, "with": true, "by": true,
- "a": true, "an": true, "as": true, "is": true,
- "are": true, "was": true, "were": true, "be": true,
- "been": true, "have": true, "has": true, "had": true,
- "do": true, "does": true, "did": true, "will": true,
- "would": true, "could": true, "should": true, "may": true,
- "might": true, "must": true, "can": true, "shall": true,
- }
- return commonWords[strings.ToLower(word)]
- }
- // Analyze analyzes text using a specified analyzer
- func (ds *DistributedSearcher) Analyze(ctx context.Context, text string, analyzer string) ([]string, error) {
- // This would typically use Bleve's analysis capabilities
- // For now, provide a simple implementation
- if analyzer == "" {
- analyzer = "standard"
- }
- // Simple tokenization - this should use proper analyzers
- terms := strings.Fields(strings.ToLower(text))
- // Remove punctuation and short terms
- var analyzed []string
- for _, term := range terms {
- term = strings.Trim(term, ".,!?;:\"'()[]{}/-_")
- if len(term) > 2 {
- analyzed = append(analyzed, term)
- }
- }
- return analyzed, nil
- }
- // Cache operations
- func (ds *DistributedSearcher) getFromCache(req *SearchRequest) *SearchResult {
- if ds.cache == nil {
- return nil
- }
- return ds.cache.Get(req)
- }
- func (ds *DistributedSearcher) cacheResult(req *SearchRequest, result *SearchResult) {
- if ds.cache == nil {
- return
- }
- ds.cache.Put(req, result, DefaultCacheTTL)
- }
- // ClearCache clears the search cache
- func (ds *DistributedSearcher) ClearCache() error {
- if ds.cache != nil {
- ds.cache.Clear()
- }
- return nil
- }
- // GetCacheStats returns cache statistics
- func (ds *DistributedSearcher) GetCacheStats() *CacheStats {
- if ds.cache != nil {
- return ds.cache.GetStats()
- }
- return nil
- }
|