Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lm_eval/tasks/mmlu_pro/_default_template_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ fewshot_split: validation
fewshot_config:
sampler: first_n
doc_to_text: !function utils.fewshot_to_text
doc_to_target: ""
doc_to_target: !function utils.fewshot_to_target
output_type: generate_until
doc_to_text: !function utils.doc_to_text
doc_to_target: answer
Expand Down
8 changes: 7 additions & 1 deletion lm_eval/tasks/mmlu_pro/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,14 @@ def format_cot_example(example, including_answer=True):
return prompt


def format_cot_target(example, including_answer=True):
cot_content = example["cot_content"].replace("A: Let's think step by step. ", "")
return cot_content


doc_to_text = partial(format_cot_example, including_answer=False)
fewshot_to_text = partial(format_cot_example, including_answer=True)
fewshot_to_text = partial(format_cot_example, including_answer=False)
fewshot_to_target = partial(format_cot_target, including_answer=True)


def process_docs(dataset, subject):
Expand Down
24 changes: 21 additions & 3 deletions tests/test_fewshot_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,9 +566,9 @@ def test_with_choices(self, mock_configurable_task):
mock_configurable_task.doc_to_text = Mock(side_effect=lambda d, *args: d["q"])
mock_configurable_task.doc_to_target = Mock(side_effect=lambda d, *args: d["a"])
mock_configurable_task.doc_to_choice = Mock(
side_effect=lambda d, *args: ["A", "B"]
if d == fs_doc
else ["Apple", "Banana"]
side_effect=lambda d, *args: (
["A", "B"] if d == fs_doc else ["Apple", "Banana"]
)
)

result = ConfigurableTask.fewshot_context(
Expand Down Expand Up @@ -755,3 +755,21 @@ def test_singleturn_collapse_for_chat(self):
assert "Q1" in result[1]["content"]
assert "A1" in result[1]["content"]
assert "Q2" in result[1]["content"]


def test_mmlu_pro_fewshot_chat_template_split():
"""Fewshot user turn must not contain the answer."""
from lm_eval.tasks.mmlu_pro.utils import fewshot_to_target, fewshot_to_text

fake_doc = {
"question": "What is 2+2?",
"options": ["3", "4", "5", "6"],
"cot_content": "A: Let's think step by step. Basic arithmetic gives 4.",
"answer": "B",
}
user_text = fewshot_to_text(fake_doc)
assistant_text = fewshot_to_target(fake_doc)

assert user_text.endswith("Answer: Let's think step by step.")
assert "Basic arithmetic" not in user_text
assert assistant_text == "Basic arithmetic gives 4."