Automate Example Selection
How can we improve the performance of few-shot CoT?
While few-shot CoT reasoning is effective, its effectiveness relies on manually crafted examples. Further, choosing diverse examples has shown effective in reducing reasoning errors from CoT.
Here, we automate CoT to choose diverse examples. Given a list of potential examples:
- Cluster: Cluster potential examples
- Sample: For each cluster,
- Sort examples by distance from cluster center
- Select the first example that meets a predefined selection criteria
- Prompt: Incorporate the chosen questions from each cluster as examples in the LLM prompt
Info
A sample selection criteria could be limiting the number of reasoning steps to a maximum of 5 steps to encourage sampling examples with simpler rationales.
import instructor
import numpy as np
from openai import OpenAI
from pydantic import BaseModel
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer
client = instructor.patch(OpenAI())
NUM_CLUSTERS = 2
class Example(BaseModel):
question: str
reasoning_steps: list[str]
class FinalAnswer(BaseModel):
reasoning_steps: list[str]
answer: int
def cluster_and_sort(questions, n_clusters=NUM_CLUSTERS):
# Cluster
embeddings = SentenceTransformer('all-MiniLM-L6-v2').encode(questions)
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10).fit(embeddings)
# Sort
sorted_clusters = [[] for _ in range(kmeans.n_clusters)]
for question, embedding, label in zip(questions, embeddings, kmeans.labels_):
center = kmeans.cluster_centers_[label]
distance = np.linalg.norm(embedding - center)
sorted_clusters[label].append((distance, question))
for cluster in sorted_clusters:
cluster.sort() # Sort by distance
return sorted_clusters
def sample(cluster):
for question in cluster:
response = client.chat.completions.create(
model="gpt-4o",
response_model=Example,
messages=[
{
"role": "system",
"content": "You are an AI assistant that generates step-by-step reasoning for mathematical questions.",
},
{
"role": "user",
"content": f"Q: {question}\nA: Let's think step by step.",
},
],
)
if (
len(response.reasoning_steps) <= 5
): # If we satisfy the selection criteria, we've found our question for this cluster
return response
if __name__ == "__main__":
questions = [
"How many apples are left if you have 10 apples and eat 3?",
"What's the sum of 5 and 7?",
"If you have 15 candies and give 6 to your friend, how many do you have left?",
"What's 8 plus 4?",
"You start with 20 stickers and use 8. How many stickers remain?",
"Calculate 6 added to 9.",
]
# Cluster and sort the questions
sorted_clusters = cluster_and_sort(questions)
# Sample questions that match selection criteria for each cluster
selected_examples = [sample(cluster) for cluster in sorted_clusters]
print(selected_examples)
"""
[
Example(
question='If you have 15 candies and give 6 to your friend, how many do you have left?',
reasoning_steps=[
'Start with the total number of candies you have, which is 15.',
'Subtract the number of candies you give to your friend, which is 6, from the total candies.',
'15 - 6 = 9, so you are left with 9 candies.',
],
),
Example(
question="What's the sum of 5 and 7?",
reasoning_steps=[
'Identify the numbers to be added: 5 and 7.',
'Perform the addition: 5 + 7.',
'The sum is 12.',
],
),
]
"""
# Use selected questions as examples for the LLM
response = client.chat.completions.create(
model="gpt-4o",
response_model=FinalAnswer,
messages=[
{
"role": "user",
"content": f"""
{selected_examples}
If there are 10 books in my bad and I read 8 of them, how many books do I have left? Let's think step by step.
""",
}
],
)
print(response.reasoning_steps)
"""
[
'Start with the total number of books in the bag, which is 10.',
"Subtract the number of books you've read, which is 8, from the total books.",
'10 - 8 = 2, so you have 2 books left.',
]
"""
print(response.answer)
#> 2
References¶
1: Automatic Chain of Thought Prompting in Large Language Models
*: The Prompt Report: A Systematic Survey of Prompting Techniques