diff --git a/src/helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py b/src/helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py index 4a0f849d334..86420cf4705 100644 --- a/src/helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +++ b/src/helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py @@ -2,56 +2,40 @@ Available subjects: "clinical_knowledge", "college_medicine", "virology" Available langs: "af", "zu", "xh", "am", "bm", "ig", "nso", "sn", "st", "tn", "ts" (see lang_map below for language code mapping to language name, or here for ISO code reference: https://huggingface.co/languages) -""" +""" # noqa: E501 -from helm.benchmark.adaptation.adapter_spec import ( - ADAPT_GENERATION, - ADAPT_MULTIPLE_CHOICE_JOINT, - AdapterSpec, -) -from helm.benchmark.adaptation.common_adapter_specs import ( - get_generation_adapter_spec, - get_machine_translation_adapter_spec, - get_multiple_choice_adapter_spec, -) -from helm.benchmark.metrics.common_metric_specs import ( - get_basic_generation_metric_specs, - get_basic_metric_specs, - get_exact_match_metric_specs, - get_f1_metric_specs, - get_generative_harms_metric_specs, - get_generic_metric_specs, - get_open_ended_generation_metric_specs, -) +from helm.benchmark.adaptation.adapter_spec import ADAPT_MULTIPLE_CHOICE_JOINT +from helm.benchmark.adaptation.common_adapter_specs import get_multiple_choice_adapter_spec +from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs from helm.benchmark.run_spec import RunSpec, run_spec_function -from helm.benchmark.runner import get_benchmark_output_path -from helm.benchmark.scenarios.scenario import ScenarioSpec, get_scenario_cache_path +from helm.benchmark.scenarios.scenario import ScenarioSpec @run_spec_function("mmlu_clinical_afr") def get_mmlu_clinical_afr_spec(subject: str, lang: str, method: str = ADAPT_MULTIPLE_CHOICE_JOINT) -> RunSpec: scenario_spec = ScenarioSpec( - class_name="helm.benchmark.scenarios.mmlu_clinical_afr_scenario.MMLU_Clinical_Afr_Scenario", args={"subject": subject, "lang": lang} + class_name="helm.benchmark.scenarios.mmlu_clinical_afr_scenario.MMLU_Clinical_Afr_Scenario", + args={"subject": subject, "lang": lang}, ) lang_map = { - 'af': 'Afrikaans', - 'zu': 'Zulu', - 'xh': 'Xhosa', - 'am': 'Amharic', - 'bm': 'Bambara', - 'ig': 'Igbo', - 'nso': 'Sepedi', - 'sn': 'Shona', - 'st': 'Sesotho', - 'tn': 'Setswana', - 'ts': 'Tsonga', + "af": "Afrikaans", + "zu": "Zulu", + "xh": "Xhosa", + "am": "Amharic", + "bm": "Bambara", + "ig": "Igbo", + "nso": "Sepedi", + "sn": "Shona", + "st": "Sesotho", + "tn": "Setswana", + "ts": "Tsonga", } adapter_spec = get_multiple_choice_adapter_spec( method=method, instructions=f"The following are multiple choice questions (with answers) about {subject.replace('_', ' ')} " - f"in {lang_map[lang]}.", + f"in {lang_map[lang]}.", input_noun="Question", output_noun="Answer", ) diff --git a/src/helm/benchmark/run_specs/winogrande_afr_run_specs.py b/src/helm/benchmark/run_specs/winogrande_afr_run_specs.py index cccbb61a82d..58e1adca2a3 100644 --- a/src/helm/benchmark/run_specs/winogrande_afr_run_specs.py +++ b/src/helm/benchmark/run_specs/winogrande_afr_run_specs.py @@ -1,30 +1,13 @@ """Run spec functions for Winogrande human-translated into 11 African languages Available langs: "af", "zu", "xh", "am", "bm", "ig", "nso", "sn", "st", "tn", "ts" (see lang_map below for language code mapping to language name, or here for ISO code reference: https://huggingface.co/languages) -""" +""" # noqa: E501 -from helm.benchmark.adaptation.adapter_spec import ( - ADAPT_GENERATION, - ADAPT_MULTIPLE_CHOICE_JOINT, - AdapterSpec, -) -from helm.benchmark.adaptation.common_adapter_specs import ( - get_generation_adapter_spec, - get_machine_translation_adapter_spec, - get_multiple_choice_adapter_spec, -) -from helm.benchmark.metrics.common_metric_specs import ( - get_basic_generation_metric_specs, - get_basic_metric_specs, - get_exact_match_metric_specs, - get_f1_metric_specs, - get_generative_harms_metric_specs, - get_generic_metric_specs, - get_open_ended_generation_metric_specs, -) +from helm.benchmark.adaptation.adapter_spec import ADAPT_MULTIPLE_CHOICE_JOINT +from helm.benchmark.adaptation.common_adapter_specs import get_multiple_choice_adapter_spec +from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs from helm.benchmark.run_spec import RunSpec, run_spec_function -from helm.benchmark.runner import get_benchmark_output_path -from helm.benchmark.scenarios.scenario import ScenarioSpec, get_scenario_cache_path +from helm.benchmark.scenarios.scenario import ScenarioSpec @run_spec_function("winogrande_afr") @@ -34,23 +17,23 @@ def get_winogrande_afr_spec(lang: str, method: str = ADAPT_MULTIPLE_CHOICE_JOINT ) lang_map = { - 'af': 'Afrikaans', - 'zu': 'Zulu', - 'xh': 'Xhosa', - 'am': 'Amharic', - 'bm': 'Bambara', - 'ig': 'Igbo', - 'nso': 'Sepedi', - 'sn': 'Shona', - 'st': 'Sesotho', - 'tn': 'Setswana', - 'ts': 'Tsonga', + "af": "Afrikaans", + "zu": "Zulu", + "xh": "Xhosa", + "am": "Amharic", + "bm": "Bambara", + "ig": "Igbo", + "nso": "Sepedi", + "sn": "Shona", + "st": "Sesotho", + "tn": "Setswana", + "ts": "Tsonga", } adapter_spec = get_multiple_choice_adapter_spec( method=method, - instructions=f"The following are binary choice fill-in-the-blank sentences (with answers), requiring common sense reasoning " - f"in {lang_map[lang]}.", + instructions=f"The following are binary choice fill-in-the-blank sentences (with answers), " + f"requiring common sense reasoning in {lang_map[lang]}.", input_noun="Question", output_noun="Answer", ) diff --git a/src/helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py b/src/helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py index a2601940570..82e0ba8e80d 100644 --- a/src/helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +++ b/src/helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py @@ -23,10 +23,10 @@ def __init__(self, subject: str = "clinical_knowledge", lang: str = "af"): def download_mmlu_clinical_afr(self, path: str): ensure_file_downloaded( - source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip", + source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip", # noqa: E501 target_path=path, unpack=True, - unpack_type='unzip' + unpack_type="unzip", ) def process_csv(self, csv_path: str, split: str) -> List[Instance]: @@ -53,7 +53,7 @@ def answer_to_reference(answer: str) -> Reference: def get_instances(self, output_path: str) -> List[Instance]: # Download the raw data - desired_dir = 'mmlu_cm_ck_vir' + desired_dir = "mmlu_cm_ck_vir" data_path: str = os.path.join(output_path, desired_dir) self.download_mmlu_clinical_afr(data_path) diff --git a/src/helm/benchmark/scenarios/winogrande_afr_scenario.py b/src/helm/benchmark/scenarios/winogrande_afr_scenario.py index b5dbdf571b1..b89bba5dc53 100644 --- a/src/helm/benchmark/scenarios/winogrande_afr_scenario.py +++ b/src/helm/benchmark/scenarios/winogrande_afr_scenario.py @@ -22,16 +22,16 @@ def __init__(self, lang: str = "af"): def download_winogrande_afr(self, path: str): ensure_file_downloaded( - source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip", + source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip", # noqa: E501 target_path=path, unpack=True, - unpack_type='unzip' + unpack_type="unzip", ) def process_csv(self, csv_path: str, split: str, pseudo_split: str) -> List[Instance]: # Match naming in Winogrande - if pseudo_split == 'val': - pseudo_split = 'train_s' + if pseudo_split == "val": + pseudo_split = "train_s" instances: List[Instance] = [] hlog(f"Reading {csv_path}") with open(csv_path) as f: @@ -57,7 +57,7 @@ def answer_to_reference(answer: str) -> Reference: def get_instances(self, output_path: str) -> List[Instance]: # Download the raw data - desired_dir = 'winogrande_s' + desired_dir = "winogrande_s" data_path: str = os.path.join(output_path, desired_dir) self.download_winogrande_afr(data_path)