Select Effective Examples
We can select effective in-context examples by choosing those that are semantically closer to the query using KNN
.
In the below implementation using instructor
, we follow these steps:
- Embed the query examples
- Embed the query that we want to answer
- Find the k query examples closest to the query
- Use the chosen examples and their as the context for the LLM
import instructor
from pydantic import BaseModel
from openai import OpenAI
import math
from textwrap import dedent
class Example(BaseModel):
question: str
answer: str
class Response(BaseModel):
answer: str
oai = OpenAI()
client = instructor.from_openai(oai)
def distance(a: list[float], b: list[float]):
return 1 - sum(ai * bi for ai, bi in zip(a, b)) / (
math.sqrt(sum(ai**2 for ai in a)) * math.sqrt(sum(bi**2 for bi in b))
)
def embed_queries(queries: list[str]) -> list[tuple[list[float], str]]:
return [
(embedding_item.embedding, query)
for embedding_item, query in zip(
oai.embeddings.create(input=queries, model="text-embedding-3-large").data,
queries,
)
]
def knn(
embedded_examples: list[tuple[list[float], str]],
query_embedding: list[float],
k: int,
):
distances = [
(distance(embedding, query_embedding), example)
for embedding, example in embedded_examples
]
distances.sort(key=lambda x: x[0])
return distances[:k]
def generate_response(examples: list[str], query: str):
formatted_examples = "\n".join(examples)
return client.chat.completions.create(
model="gpt-4o",
response_model=Response,
messages=[
{
"role": "user",
"content": dedent(
f"""
Respond to the following query with the most accurate
and concise answer possible.
<examples>
{formatted_examples}
</examples>
<query>
{query}
</query>
"""
),
}
],
)
def generate_question_and_answer_pair(
questions: list[str], question_and_answers: list[dict[str, str]]
) -> list[str]:
question_to_answer = {}
for question in question_and_answers:
question_to_answer[question["question"]] = question["answer"]
return [
dedent(
f"""
<example>
<question>{question}</question>
<answer>{question_to_answer[question]}</answer>
</example>
"""
)
for question in questions
]
if __name__ == "__main__":
examples = [
{"question": "What is the capital of France?", "answer": "Paris"},
{"question": "Who wrote Romeo and Juliet", "answer": "Shakespeare"},
{"question": "What is the capital of Germany?", "answer": "Berlin"},
]
query = "What is the capital of Italy?"
# Step 1 : Embed the Examples
embeddings = embed_queries([example["question"] for example in examples] + [query])
embedded_examples = embeddings[:-1]
embedded_query = embeddings[-1]
# # Step 3: Find the k closest examples to the query
k_closest_examples = knn(embedded_examples, embedded_query[0], 2)
for example in k_closest_examples:
print(example)
#> (0.4015450506411443, 'What is the capital of France?')
#> (0.4472610680568724, 'What is the capital of Germany?')
# Step 4: Use these examples as in-context examples
formatted_examples = generate_question_and_answer_pair(
[example[1] for example in k_closest_examples], examples
)
response = generate_response(formatted_examples, query)
print(response.answer)
#> Rome
References¶
1: What Makes Good In-Context Examples for GPT-3?
*: The Prompt Report: A Systematic Survey of Prompting Techniques