Skip to content

Commit

Permalink
simplify run_mmlu return value
Browse files Browse the repository at this point in the history
Signed-off-by: Roni Friedman-Melamed <Roni.friedman-melamed@il.ibm.com>
  • Loading branch information
Roni-Friedman committed Nov 7, 2024
1 parent a4612e7 commit 905c81e
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 8 deletions.
10 changes: 3 additions & 7 deletions src/instructlab/eval/mmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def run(self, server_url: str | None = None) -> tuple:
agg_score: float = 0.0

results = self._run_mmlu(server_url)
for task, result in results.items():
for task, result in results['results'].items():
agg_score += float(result["acc,none"])
individual_scores[task] = {
"score": float(result["acc,none"]),
Expand All @@ -154,7 +154,7 @@ def run(self, server_url: str | None = None) -> tuple:
return overall_score, individual_scores

def _run_mmlu(
self, server_url: str | None = None, return_all_results: bool = False
self, server_url: str | None = None
) -> dict:
if server_url is not None:
# Requires lm_eval >= 0.4.4
Expand All @@ -179,11 +179,7 @@ def _run_mmlu(
device=self.device,
task_manager=tm,
)
if return_all_results:
results = mmlu_output
else:
results = mmlu_output["results"]
return results
return mmlu_output

# This method converts general errors from simple_evaluate
# into a more user-understandable error
Expand Down
2 changes: 1 addition & 1 deletion src/instructlab/eval/unitxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def run(self, server_url: str | None = None) -> tuple:
self.prepare_unitxt_files()
logger.debug(locals())
os.environ["TOKENIZERS_PARALLELISM"] = "true"
results = self._run_mmlu(server_url=server_url, return_all_results=True)
results = self._run_mmlu(server_url=server_url)
taskname = self.tasks[0]
global_scores = results["results"][taskname]
global_scores.pop("alias")
Expand Down

0 comments on commit 905c81e

Please sign in to comment.