|
@@ -0,0 +1,371 @@
|
|
|
+[[cohere-es]]
|
|
|
+=== Tutorial: Using Cohere with {es}
|
|
|
+++++
|
|
|
+<titleabbrev>Using Cohere with {es}</titleabbrev>
|
|
|
+++++
|
|
|
+
|
|
|
+The instructions in this tutorial shows you how to compute embeddings with
|
|
|
+Cohere using the {infer} API and store them for efficient vector or hybrid
|
|
|
+search in {es}. This tutorial will use the Python {es} client to perform the
|
|
|
+operations.
|
|
|
+
|
|
|
+You'll learn how to:
|
|
|
+
|
|
|
+* create an {infer} endpoint for text embedding using the Cohere service,
|
|
|
+* create the necessary index mapping for the {es} index,
|
|
|
+* build an {infer} pipeline to ingest documents into the index together with the
|
|
|
+embeddings,
|
|
|
+* perform hybrid search on the data,
|
|
|
+* rerank search results by using Cohere's rerank model,
|
|
|
+* design a RAG system with Cohere's Chat API.
|
|
|
+
|
|
|
+The tutorial uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data
|
|
|
+set.
|
|
|
+
|
|
|
+Refer to https://docs.cohere.com/docs/elasticsearch-and-cohere[Cohere's tutorial]
|
|
|
+for an example using a different data set.
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-req]]
|
|
|
+==== Requirements
|
|
|
+
|
|
|
+* A https://cohere.com/[Cohere account],
|
|
|
+* an https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html[Elastic Cloud]
|
|
|
+account,
|
|
|
+* Python 3.7 or higher.
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-packages]]
|
|
|
+==== Istall required packages
|
|
|
+
|
|
|
+Install {es} and Cohere:
|
|
|
+
|
|
|
+[source,py]
|
|
|
+------------------------------------------------------------
|
|
|
+!pip install elasticsearch
|
|
|
+!pip install cohere
|
|
|
+------------------------------------------------------------
|
|
|
+
|
|
|
+Import the required packages:
|
|
|
+
|
|
|
+[source,py]
|
|
|
+------------------------------------------------------------
|
|
|
+from elasticsearch import Elasticsearch, helpers
|
|
|
+import cohere
|
|
|
+import json
|
|
|
+import requests
|
|
|
+------------------------------------------------------------
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-client]]
|
|
|
+==== Create the {es} client
|
|
|
+
|
|
|
+To create your {es} client, you need:
|
|
|
+* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id[your Cloud ID],
|
|
|
+* https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key[an encoded API key].
|
|
|
+
|
|
|
+[source,py]
|
|
|
+------------------------------------------------------------
|
|
|
+ELASTICSEARCH_ENDPOINT = "elastic_endpoint"
|
|
|
+ELASTIC_API_KEY = "elastic_api_key"
|
|
|
+
|
|
|
+client = Elasticsearch(
|
|
|
+ cloud_id=ELASTICSEARCH_ENDPOINT,
|
|
|
+ api_key=ELASTIC_API_KEY
|
|
|
+)
|
|
|
+
|
|
|
+# Confirm the client has connected
|
|
|
+print(client.info())
|
|
|
+------------------------------------------------------------
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-infer-endpoint]]
|
|
|
+==== Create the {infer} endpoint
|
|
|
+
|
|
|
+<<put-inference-api,Create the {infer} endpoint>> first. In this example, the
|
|
|
+{infer} endpoint uses Cohere's `embed-english-v3.0` model and the
|
|
|
+`embedding_type` is set to `byte`.
|
|
|
+
|
|
|
+[source,py]
|
|
|
+------------------------------------------------------------
|
|
|
+COHERE_API_KEY = "cohere_api_key"
|
|
|
+
|
|
|
+client.inference.put_model(
|
|
|
+ task_type="text_embedding",
|
|
|
+ inference_id="cohere_embeddings",
|
|
|
+ body={
|
|
|
+ "service": "cohere",
|
|
|
+ "service_settings": {
|
|
|
+ "api_key": COHERE_API_KEY,
|
|
|
+ "model_id": "embed-english-v3.0",
|
|
|
+ "embedding_type": "byte"
|
|
|
+ }
|
|
|
+ },
|
|
|
+)
|
|
|
+------------------------------------------------------------
|
|
|
+
|
|
|
+You can find your API keys in your Cohere dashboard under the
|
|
|
+https://dashboard.cohere.com/api-keys[API keys section].
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-index-mapping]]
|
|
|
+==== Create the index mapping
|
|
|
+
|
|
|
+Create the index mapping for the index that will contain the embeddings.
|
|
|
+
|
|
|
+[source,py]
|
|
|
+------------------------------------------------------------
|
|
|
+client.indices.create(
|
|
|
+ index="cohere-embeddings",
|
|
|
+ settings={"index": {"default_pipeline": "cohere_embeddings"}},
|
|
|
+ mappings={
|
|
|
+ "properties": {
|
|
|
+ "text_embedding": {
|
|
|
+ "type": "dense_vector",
|
|
|
+ "dims": 1024,
|
|
|
+ "element_type": "byte",
|
|
|
+ },
|
|
|
+ "text": {"type": "text"},
|
|
|
+ "id": {"type": "integer"},
|
|
|
+ "title": {"type": "text"}
|
|
|
+ }
|
|
|
+ },
|
|
|
+)
|
|
|
+------------------------------------------------------------
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-infer-pipeline]]
|
|
|
+==== Create the {infer} pipeline
|
|
|
+
|
|
|
+Now you have an {infer} endpoint and an index ready to store embeddings. The
|
|
|
+next step is to create an <<ingest,ingest pipeline>> with an
|
|
|
+<<inference-processor,{infer} processor>> that will create the embeddings using
|
|
|
+the {infer} endpoint and stores them in the index.
|
|
|
+
|
|
|
+[source,py]
|
|
|
+--------------------------------------------------
|
|
|
+client.ingest.put_pipeline(
|
|
|
+ id="cohere_embeddings",
|
|
|
+ description="Ingest pipeline for Cohere inference.",
|
|
|
+ processors=[
|
|
|
+ {
|
|
|
+ "inference": {
|
|
|
+ "model_id": "cohere_embeddings",
|
|
|
+ "input_output": {
|
|
|
+ "input_field": "text",
|
|
|
+ "output_field": "text_embedding",
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ],
|
|
|
+)
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-insert-documents]]
|
|
|
+==== Prepare data and insert documents
|
|
|
+
|
|
|
+This example uses the https://huggingface.co/datasets/mteb/scifact[SciFact] data
|
|
|
+set that you can find on HuggingFace.
|
|
|
+
|
|
|
+[source,py]
|
|
|
+--------------------------------------------------
|
|
|
+url = 'https://huggingface.co/datasets/mteb/scifact/raw/main/corpus.jsonl'
|
|
|
+
|
|
|
+# Fetch the JSONL data from the URL
|
|
|
+response = requests.get(url)
|
|
|
+response.raise_for_status() # Ensure noticing bad responses
|
|
|
+
|
|
|
+# Split the content by new lines and parse each line as JSON
|
|
|
+data = [json.loads(line) for line in response.text.strip().split('\n') if line]
|
|
|
+# Now data is a list of dictionaries
|
|
|
+
|
|
|
+# Change `_id` key to `id` as `_id` is a reserved key in Elasticsearch.
|
|
|
+for item in data:
|
|
|
+ if '_id' in item:
|
|
|
+ item['id'] = item.pop('_id')
|
|
|
+
|
|
|
+# Prepare the documents to be indexed
|
|
|
+documents = []
|
|
|
+for line in data:
|
|
|
+ data_dict = line
|
|
|
+ documents.append({
|
|
|
+ "_index": "cohere-embeddings",
|
|
|
+ "_source": data_dict,
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+# Use the bulk endpoint to index
|
|
|
+helpers.bulk(client, documents)
|
|
|
+
|
|
|
+print("Data ingestion completed, text embeddings generated!")
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+Your index is populated with the SciFact data and text embeddings for the text
|
|
|
+field.
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-hybrid-search]]
|
|
|
+==== Hybrid search
|
|
|
+
|
|
|
+Let's start querying the index!
|
|
|
+
|
|
|
+The code below performs a hybrid search. The `kNN` query computes the relevance
|
|
|
+of search results based on vector similarity using the `text_embedding` field,
|
|
|
+the lexical search query uses BM25 retrieval to compute keyword similarity on
|
|
|
+the `title` and `text` fields.
|
|
|
+
|
|
|
+[source,py]
|
|
|
+--------------------------------------------------
|
|
|
+query = "What is biosimilarity?"
|
|
|
+
|
|
|
+response = client.search(
|
|
|
+ index="cohere-embeddings",
|
|
|
+ size=100,
|
|
|
+ knn={
|
|
|
+ "field": "text_embedding",
|
|
|
+ "query_vector_builder": {
|
|
|
+ "text_embedding": {
|
|
|
+ "model_id": "cohere_embeddings",
|
|
|
+ "model_text": query,
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "k": 10,
|
|
|
+ "num_candidates": 50,
|
|
|
+ },
|
|
|
+ query={
|
|
|
+ "multi_match": {
|
|
|
+ "query": query,
|
|
|
+ "fields": ["text", "title"]
|
|
|
+ }
|
|
|
+ }
|
|
|
+)
|
|
|
+
|
|
|
+raw_documents = response["hits"]["hits"]
|
|
|
+
|
|
|
+# Display the first 10 results
|
|
|
+for document in raw_documents[0:10]:
|
|
|
+ print(f'Title: {document["_source"]["title"]}\nText: {document["_source"]["text"]}\n')
|
|
|
+
|
|
|
+# Format the documents for ranking
|
|
|
+documents = []
|
|
|
+for hit in response["hits"]["hits"]:
|
|
|
+ documents.append(hit["_source"]["text"])
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-rerank-results]]
|
|
|
+===== Rerank search results
|
|
|
+
|
|
|
+To combine the results more effectively, use
|
|
|
+https://docs.cohere.com/docs/rerank-2[Cohere's Rerank v3] model through the
|
|
|
+{infer} API to provide a more precise semantic reranking of the results.
|
|
|
+
|
|
|
+Create an {infer} endpoint with your Cohere API key and the used model name as
|
|
|
+the `model_id` (`rerank-english-v3.0` in this example).
|
|
|
+
|
|
|
+[source,py]
|
|
|
+--------------------------------------------------
|
|
|
+client.inference.put_model(
|
|
|
+ task_type="rerank",
|
|
|
+ inference_id="cohere_rerank",
|
|
|
+ body={
|
|
|
+ "service": "cohere",
|
|
|
+ "service_settings":{
|
|
|
+ "api_key": COHERE_API_KEY,
|
|
|
+ "model_id": "rerank-english-v3.0"
|
|
|
+ },
|
|
|
+ "task_settings": {
|
|
|
+ "top_n": 10,
|
|
|
+ },
|
|
|
+ }
|
|
|
+)
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+Rerank the results using the new {infer} endpoint.
|
|
|
+
|
|
|
+[source,py]
|
|
|
+--------------------------------------------------
|
|
|
+# Pass the query and the search results to the service
|
|
|
+response = client.inference.inference(
|
|
|
+ inference_id="cohere_rerank",
|
|
|
+ body={
|
|
|
+ "query": query,
|
|
|
+ "input": documents,
|
|
|
+ "task_settings": {
|
|
|
+ "return_documents": False
|
|
|
+ }
|
|
|
+ }
|
|
|
+)
|
|
|
+
|
|
|
+# Reconstruct the input documents based on the index provided in the rereank response
|
|
|
+ranked_documents = []
|
|
|
+for document in response.body["rerank"]:
|
|
|
+ ranked_documents.append({
|
|
|
+ "title": raw_documents[int(document["index"])]["_source"]["title"],
|
|
|
+ "text": raw_documents[int(document["index"])]["_source"]["text"]
|
|
|
+ })
|
|
|
+
|
|
|
+# Print the top 10 results
|
|
|
+for document in ranked_documents[0:10]:
|
|
|
+ print(f"Title: {document['title']}\nText: {document['text']}\n")
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The response is a list of documents in descending order of relevance. Each
|
|
|
+document has a corresponding index that reflects the order of the documents when
|
|
|
+they were sent to the {infer} endpoint.
|
|
|
+
|
|
|
+
|
|
|
+[discrete]
|
|
|
+[[cohere-es-rag]]
|
|
|
+==== Retrieval Augmented Generation (RAG) with Cohere and {es}
|
|
|
+
|
|
|
+RAG is a method for generating text using additional information fetched from an
|
|
|
+external data source. With the ranked results, you can build a RAG system on the
|
|
|
+top of what you previously created by using
|
|
|
+https://docs.cohere.com/docs/chat-api[Cohere's Chat API].
|
|
|
+
|
|
|
+Pass in the retrieved documents and the query to receive a grounded response
|
|
|
+using Cohere's newest generative model
|
|
|
+https://docs.cohere.com/docs/command-r-plus[Command R+].
|
|
|
+
|
|
|
+Then pass in the query and the documents to the Chat API, and print out the
|
|
|
+response.
|
|
|
+
|
|
|
+[source,py]
|
|
|
+--------------------------------------------------
|
|
|
+response = co.chat(message=query, documents=ranked_documents, model='command-r-plus')
|
|
|
+
|
|
|
+source_documents = []
|
|
|
+for citation in response.citations:
|
|
|
+ for document_id in citation.document_ids:
|
|
|
+ if document_id not in source_documents:
|
|
|
+ source_documents.append(document_id)
|
|
|
+
|
|
|
+print(f"Query: {query}")
|
|
|
+print(f"Response: {response.text}")
|
|
|
+print("Sources:")
|
|
|
+for document in response.documents:
|
|
|
+ if document['id'] in source_documents:
|
|
|
+ print(f"{document['title']}: {document['text']}")
|
|
|
+
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The response will look similar to this:
|
|
|
+
|
|
|
+[source,consol-result]
|
|
|
+--------------------------------------------------
|
|
|
+Query: What is biosimilarity?
|
|
|
+Response: Biosimilarity is based on the comparability concept, which has been used successfully for several decades to ensure close similarity of a biological product before and after a manufacturing change. Over the last 10 years, experience with biosimilars has shown that even complex biotechnology-derived proteins can be copied successfully.
|
|
|
+Sources:
|
|
|
+Interchangeability of Biosimilars: A European Perspective: (...)
|
|
|
+--------------------------------------------------
|
|
|
+// NOTCONSOLE
|