1 year ago · fa2f81353e
--- a/docs/reference/search/search-your-data/cohere-es.asciidoc
+++ b/docs/reference/search/search-your-data/cohere-es.asciidoc
@@ -0,0 +1,371 @@
 
				+[[cohere-es]]
			
 
				+=== Tutorial: Using Cohere with {es}
			
 
				+++++
			
 
				+<titleabbrev>Using Cohere with {es}</titleabbrev>
			
 
				+++++
			
 
				+
			
 
				+The instructions in this tutorial shows you how to compute embeddings with
			
 
				+Cohere using the {infer} API and store them for efficient vector or hybrid
			
 
				+search in {es}. This tutorial will use the Python {es} client to perform the
			
 
				+operations.
			
 
				+
			
 
				+You'll learn how to:
			
 
				+
			
 
				+* create an {infer} endpoint for text embedding using the Cohere service,
			
 
				+* create the necessary index mapping for the {es} index,
			
 
				+* build an {infer} pipeline to ingest documents into the index together with the
			
 
				+embeddings,
			
 
				+* perform hybrid search on the data,
			
 
				+* rerank search results by using Cohere's rerank model,
			
 
				+* design a RAG system with Cohere's Chat API.
			
 
				+
			
 
				+The tutorial uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data
			
 
				+set.
			
 
				+
			
 
				+Refer to https://docs.cohere.com/docs/elasticsearch-and-cohere[Cohere's tutorial]
			
 
				+for an example using a different data set.
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-req]]
			
 
				+==== Requirements
			
 
				+
			
 
				+* A https://cohere.com/[Cohere account],
			
 
				+* an https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html[Elastic Cloud]
			
 
				+account,
			
 
				+* Python 3.7 or higher.
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-packages]]
			
 
				+==== Istall required packages
			
 
				+
			
 
				+Install {es} and Cohere:
			
 
				+
			
 
				+[source,py]
			
 
				+------------------------------------------------------------
			
 
				+!pip install elasticsearch
			
 
				+!pip install cohere
			
 
				+------------------------------------------------------------
			
 
				+
			
 
				+Import the required packages:
			
 
				+
			
 
				+[source,py]
			
 
				+------------------------------------------------------------
			
 
				+from elasticsearch import Elasticsearch, helpers
			
 
				+import cohere
			
 
				+import json
			
 
				+import requests
			
 
				+------------------------------------------------------------
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-client]]
			
 
				+==== Create the {es} client
			
 
				+
			
 
				+To create your {es} client, you need:
			
 
				+* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id[your Cloud ID],
			
 
				+* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key[an encoded API key].
			
 
				+
			
 
				+[source,py]
			
 
				+------------------------------------------------------------
			
 
				+ELASTICSEARCH_ENDPOINT = "elastic_endpoint"
			
 
				+ELASTIC_API_KEY = "elastic_api_key"
			
 
				+
			
 
				+client = Elasticsearch(
			
 
				+  cloud_id=ELASTICSEARCH_ENDPOINT,
			
 
				+  api_key=ELASTIC_API_KEY
			
 
				+)
			
 
				+
			
 
				+# Confirm the client has connected
			
 
				+print(client.info())
			
 
				+------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-infer-endpoint]]
			
 
				+==== Create the {infer} endpoint
			
 
				+
			
 
				+<<put-inference-api,Create the {infer} endpoint>> first. In this example, the
			
 
				+{infer} endpoint uses Cohere's `embed-english-v3.0` model and the
			
 
				+`embedding_type` is set to `byte`.
			
 
				+
			
 
				+[source,py]
			
 
				+------------------------------------------------------------
			
 
				+COHERE_API_KEY = "cohere_api_key"
			
 
				+
			
 
				+client.inference.put_model(
			
 
				+    task_type="text_embedding",
			
 
				+    inference_id="cohere_embeddings",
			
 
				+    body={
			
 
				+        "service": "cohere",
			
 
				+        "service_settings": {
			
 
				+            "api_key": COHERE_API_KEY,
			
 
				+            "model_id": "embed-english-v3.0",
			
 
				+            "embedding_type": "byte"
			
 
				+        }
			
 
				+    },
			
 
				+)
			
 
				+------------------------------------------------------------
			
 
				+
			
 
				+You can find your API keys in your Cohere dashboard under the
			
 
				+https://dashboard.cohere.com/api-keys[API keys section].
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-index-mapping]]
			
 
				+==== Create the index mapping
			
 
				+
			
 
				+Create the index mapping for the index that will contain the embeddings.
			
 
				+
			
 
				+[source,py]
			
 
				+------------------------------------------------------------
			
 
				+client.indices.create(
			
 
				+    index="cohere-embeddings",
			
 
				+    settings={"index": {"default_pipeline": "cohere_embeddings"}},
			
 
				+    mappings={
			
 
				+        "properties": {
			
 
				+            "text_embedding": {
			
 
				+                "type": "dense_vector",
			
 
				+                "dims": 1024,
			
 
				+                "element_type": "byte",
			
 
				+            },
			
 
				+            "text": {"type": "text"},
			
 
				+            "id": {"type": "integer"},
			
 
				+            "title": {"type": "text"}
			
 
				+        }
			
 
				+    },
			
 
				+)
			
 
				+------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-infer-pipeline]]
			
 
				+==== Create the {infer} pipeline
			
 
				+
			
 
				+Now you have an {infer} endpoint and an index ready to store embeddings. The
			
 
				+next step is to create an <<ingest,ingest pipeline>> with an
			
 
				+<<inference-processor,{infer} processor>> that will create the embeddings using
			
 
				+the {infer} endpoint and stores them in the index.
			
 
				+
			
 
				+[source,py]
			
 
				+--------------------------------------------------
			
 
				+client.ingest.put_pipeline(
			
 
				+    id="cohere_embeddings",
			
 
				+    description="Ingest pipeline for Cohere inference.",
			
 
				+    processors=[
			
 
				+        {
			
 
				+            "inference": {
			
 
				+                "model_id": "cohere_embeddings",
			
 
				+                "input_output": {
			
 
				+                    "input_field": "text",
			
 
				+                    "output_field": "text_embedding",
			
 
				+                },
			
 
				+            }
			
 
				+        }
			
 
				+    ],
			
 
				+)
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-insert-documents]]
			
 
				+==== Prepare data and insert documents
			
 
				+
			
 
				+This example uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data
			
 
				+set that you can find on HuggingFace.
			
 
				+
			
 
				+[source,py]
			
 
				+--------------------------------------------------
			
 
				+url = 'https://huggingface.co/datasets/mteb/scifact/raw/main/corpus.jsonl'
			
 
				+
			
 
				+# Fetch the JSONL data from the URL
			
 
				+response = requests.get(url)
			
 
				+response.raise_for_status()  # Ensure noticing bad responses
			
 
				+
			
 
				+# Split the content by new lines and parse each line as JSON
			
 
				+data = [json.loads(line) for line in response.text.strip().split('\n') if line]
			
 
				+# Now data is a list of dictionaries
			
 
				+
			
 
				+# Change `_id` key to `id` as `_id` is a reserved key in Elasticsearch.
			
 
				+for item in data:
			
 
				+    if '_id' in item:
			
 
				+        item['id'] = item.pop('_id')
			
 
				+
			
 
				+# Prepare the documents to be indexed
			
 
				+documents = []
			
 
				+for line in data:
			
 
				+    data_dict = line
			
 
				+    documents.append({
			
 
				+        "_index": "cohere-embeddings",
			
 
				+        "_source": data_dict,
			
 
				+        }
			
 
				+      )
			
 
				+
			
 
				+# Use the bulk endpoint to index
			
 
				+helpers.bulk(client, documents)
			
 
				+
			
 
				+print("Data ingestion completed, text embeddings generated!")
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+Your index is populated with the SciFact data and text embeddings for the text
			
 
				+field.
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-hybrid-search]]
			
 
				+==== Hybrid search
			
 
				+
			
 
				+Let's start querying the index!
			
 
				+
			
 
				+The code below performs a hybrid search. The `kNN` query computes the relevance
			
 
				+of search results based on vector similarity using the `text_embedding` field,
			
 
				+the lexical search query uses BM25 retrieval to compute keyword similarity on
			
 
				+the `title` and `text` fields.
			
 
				+
			
 
				+[source,py]
			
 
				+--------------------------------------------------
			
 
				+query = "What is biosimilarity?"
			
 
				+
			
 
				+response = client.search(
			
 
				+    index="cohere-embeddings",
			
 
				+    size=100,
			
 
				+    knn={
			
 
				+        "field": "text_embedding",
			
 
				+        "query_vector_builder": {
			
 
				+            "text_embedding": {
			
 
				+                "model_id": "cohere_embeddings",
			
 
				+                "model_text": query,
			
 
				+            }
			
 
				+        },
			
 
				+        "k": 10,
			
 
				+        "num_candidates": 50,
			
 
				+    },
			
 
				+    query={
			
 
				+        "multi_match": {
			
 
				+            "query": query,
			
 
				+            "fields": ["text", "title"]
			
 
				+        }
			
 
				+    }
			
 
				+)
			
 
				+
			
 
				+raw_documents = response["hits"]["hits"]
			
 
				+
			
 
				+# Display the first 10 results
			
 
				+for document in raw_documents[0:10]:
			
 
				+  print(f'Title: {document["_source"]["title"]}\nText: {document["_source"]["text"]}\n')
			
 
				+
			
 
				+# Format the documents for ranking
			
 
				+documents = []
			
 
				+for hit in response["hits"]["hits"]:
			
 
				+    documents.append(hit["_source"]["text"])
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-rerank-results]]
			
 
				+===== Rerank search results
			
 
				+
			
 
				+To combine the results more effectively, use 
			
 
				+https://docs.cohere.com/docs/rerank-2[Cohere's Rerank v3] model through the
			
 
				+{infer} API to provide a more precise semantic reranking of the results.
			
 
				+
			
 
				+Create an {infer} endpoint with your Cohere API key and the used model name as
			
 
				+the `model_id` (`rerank-english-v3.0` in this example).
			
 
				+
			
 
				+[source,py]
			
 
				+--------------------------------------------------
			
 
				+client.inference.put_model(
			
 
				+    task_type="rerank",
			
 
				+    inference_id="cohere_rerank",
			
 
				+    body={
			
 
				+        "service": "cohere",
			
 
				+        "service_settings":{
			
 
				+            "api_key": COHERE_API_KEY,
			
 
				+            "model_id": "rerank-english-v3.0"
			
 
				+           },
			
 
				+        "task_settings": {
			
 
				+            "top_n": 10,
			
 
				+        },
			
 
				+    }
			
 
				+)
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+Rerank the results using the new {infer} endpoint.
			
 
				+
			
 
				+[source,py]
			
 
				+--------------------------------------------------
			
 
				+# Pass the query and the search results to the service
			
 
				+response = client.inference.inference(
			
 
				+    inference_id="cohere_rerank",
			
 
				+    body={
			
 
				+        "query": query,
			
 
				+        "input": documents,
			
 
				+        "task_settings": {
			
 
				+            "return_documents": False
			
 
				+            }
			
 
				+        }
			
 
				+)
			
 
				+
			
 
				+# Reconstruct the input documents based on the index provided in the rereank response
			
 
				+ranked_documents = []
			
 
				+for document in response.body["rerank"]:
			
 
				+  ranked_documents.append({
			
 
				+      "title": raw_documents[int(document["index"])]["_source"]["title"],
			
 
				+      "text": raw_documents[int(document["index"])]["_source"]["text"]
			
 
				+  })
			
 
				+
			
 
				+# Print the top 10 results
			
 
				+for document in ranked_documents[0:10]:
			
 
				+  print(f"Title: {document['title']}\nText: {document['text']}\n")
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+The response is a list of documents in descending order of relevance. Each
			
 
				+document has a corresponding index that reflects the order of the documents when 
			
 
				+they were sent to the {infer} endpoint.
			
 
				+
			
 
				+
			
 
				+[discrete]
			
 
				+[[cohere-es-rag]]
			
 
				+==== Retrieval Augmented Generation (RAG) with Cohere and {es}
			
 
				+
			
 
				+RAG is a method for generating text using additional information fetched from an
			
 
				+external data source. With the ranked results, you can build a RAG system on the
			
 
				+top of what you previously created by using 
			
 
				+https://docs.cohere.com/docs/chat-api[Cohere's Chat API].
			
 
				+
			
 
				+Pass in the retrieved documents and the query to receive a grounded response
			
 
				+using Cohere's newest generative model 
			
 
				+https://docs.cohere.com/docs/command-r-plus[Command R+].
			
 
				+
			
 
				+Then pass in the query and the documents to the Chat API, and print out the
			
 
				+response. 
			
 
				+
			
 
				+[source,py]
			
 
				+--------------------------------------------------
			
 
				+response = co.chat(message=query, documents=ranked_documents, model='command-r-plus')
			
 
				+
			
 
				+source_documents = []
			
 
				+for citation in response.citations:
			
 
				+    for document_id in citation.document_ids:
			
 
				+        if document_id not in source_documents:
			
 
				+            source_documents.append(document_id)
			
 
				+
			
 
				+print(f"Query: {query}")
			
 
				+print(f"Response: {response.text}")
			
 
				+print("Sources:")
			
 
				+for document in response.documents:
			
 
				+    if document['id'] in source_documents:
			
 
				+        print(f"{document['title']}: {document['text']}")
			
 
				+
			
 
				+--------------------------------------------------
			
 
				+
			
 
				+The response will look similar to this:
			
 
				+
			
 
				+[source,consol-result]
			
 
				+--------------------------------------------------
			
 
				+Query: What is biosimilarity?
			
 
				+Response: Biosimilarity is based on the comparability concept, which has been used successfully for several decades to ensure close similarity of a biological product before and after a manufacturing change. Over the last 10 years, experience with biosimilars has shown that even complex biotechnology-derived proteins can be copied successfully.
			
 
				+Sources:
			
 
				+Interchangeability of Biosimilars: A European Perspective: (...)
			
 
				+--------------------------------------------------
			
 
				+// NOTCONSOLE
			
--- a/docs/reference/search/search-your-data/semantic-search.asciidoc
+++ b/docs/reference/search/search-your-data/semantic-search.asciidoc
@@ -136,3 +136,4 @@ include::{es-ref-dir}/tab-widgets/semantic-search/hybrid-search-widget.asciidoc[
 
				 
			
 
				 include::semantic-search-elser.asciidoc[]
			
 
				 include::semantic-search-inference.asciidoc[]
			
 
				+include::cohere-es.asciidoc[]