From 23265b09be909458dedf5b057914bd88b90be4e9 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Mon, 12 Feb 2024 10:17:00 +0100 Subject: [PATCH] fix training report aggregation --- .../backends/onnxruntime/backend.py | 38 +++++++++---------- .../benchmarks/training/benchmark.py | 1 + .../benchmarks/training/report.py | 33 +++++++++++++++- 3 files changed, 51 insertions(+), 21 deletions(-) diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py index 0801b000..9983ead2 100644 --- a/optimum_benchmark/backends/onnxruntime/backend.py +++ b/optimum_benchmark/backends/onnxruntime/backend.py @@ -1,18 +1,29 @@ import gc import os from logging import getLogger +from collections import OrderedDict from tempfile import TemporaryDirectory from typing import Any, Callable, Dict, List +from ...generators.dataset_generator import DatasetGenerator +from ...task_utils import TEXT_GENERATION_TASKS +from .config import ORTConfig +from ..base import Backend +from .utils import ( + format_calibration_config, + format_quantization_config, + TASKS_TO_ORTMODELS, + TASKS_TO_ORTSD, +) import torch from datasets import Dataset from hydra.utils import get_class from onnxruntime import SessionOptions from safetensors.torch import save_file -from transformers.utils import ModelOutput from transformers import TrainerCallback, TrainerState from transformers.modeling_utils import no_init_weights from transformers.utils.logging import set_verbosity_error +from optimum.onnxruntime import ONNX_DECODER_WITH_PAST_NAME, ONNX_DECODER_NAME, ORTOptimizer, ORTQuantizer from optimum.onnxruntime.configuration import ( AutoOptimizationConfig, AutoQuantizationConfig, @@ -21,23 +32,7 @@ QuantizationConfig, CalibrationConfig, ) -from optimum.onnxruntime import ( - ONNX_DECODER_WITH_PAST_NAME, - ONNX_DECODER_NAME, - ORTOptimizer, - ORTQuantizer, -) -from ...generators.dataset_generator import DatasetGenerator -from ...task_utils import TEXT_GENERATION_TASKS -from .config import ORTConfig -from ..base import Backend -from .utils import ( - format_calibration_config, - format_quantization_config, - TASKS_TO_ORTMODELS, - TASKS_TO_ORTSD, -) # disable transformers logging set_verbosity_error() @@ -353,12 +348,15 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: return inputs - def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput: - return self.pretrained_model(**inputs, **kwargs) + def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: + return self.pretrained_model.forward(**inputs, **kwargs) - def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput: + def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: return self.pretrained_model.generate(**inputs, **kwargs) + def call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: + return self.pretrained_model(**inputs, **kwargs) + def train( self, training_dataset: Dataset, diff --git a/optimum_benchmark/benchmarks/training/benchmark.py b/optimum_benchmark/benchmarks/training/benchmark.py index 5ae70a44..6f27b5f4 100644 --- a/optimum_benchmark/benchmarks/training/benchmark.py +++ b/optimum_benchmark/benchmarks/training/benchmark.py @@ -34,6 +34,7 @@ def run(self, backend: Backend[BackendConfigT]) -> None: LOGGER.info("\t+ Initializing training report") self.report = TrainingReport( + num_processes=1, # this process max_steps=self.config.max_steps, warmup_steps=self.config.warmup_steps, per_process_batch_size=self.config.training_arguments["per_device_train_batch_size"], diff --git a/optimum_benchmark/benchmarks/training/report.py b/optimum_benchmark/benchmarks/training/report.py index a4bc65b2..6f5e6f11 100644 --- a/optimum_benchmark/benchmarks/training/report.py +++ b/optimum_benchmark/benchmarks/training/report.py @@ -12,6 +12,7 @@ class TrainingReport(BenchmarkReport): max_steps: int warmup_steps: int + num_processes: int per_process_batch_size: int gradient_accumulation_steps: int @@ -82,13 +83,43 @@ def __add__(self, other: "TrainingReport") -> "TrainingReport": self.gradient_accumulation_steps == other.gradient_accumulation_steps ), "Both reports must have the same gradient_accumulation_steps" - TrainingReport( + agg_report = TrainingReport( max_steps=self.max_steps, warmup_steps=self.warmup_steps, + num_processes=self.num_processes + other.num_processes, gradient_accumulation_steps=self.gradient_accumulation_steps, per_process_batch_size=self.per_process_batch_size + other.per_process_batch_size, ) + if "latency" in self.training and "latency" in other.training: + agg_training_latencies_list = [ + max(lat_1, lat_2) + for lat_1, lat_2 in zip(self.training["latency"]["list[s]"], other.training["latency"]["list[s]"]) + ] + agg_report.populate_latency(agg_training_latencies_list) + + if "memory" in self.training and "memory" in other.training: + agg_training_memories_dict = {} + for key in self.training["memory"]: + if "vram" in key: + # our vram measures are not process-specific + agg_training_memories_dict[key] = max(self.training["memory"][key], other.training["memory"][key]) + else: + # ram and pytorch measures are process-specific + agg_training_memories_dict[key] = self.training["memory"][key] + other.training["memory"][key] + + agg_report.populate_memory(agg_training_memories_dict) + + if "energy" in self.training and "energy" in other.training: + agg_training_energies_dict = {} + for key in self.training["energy"]: + # theoretically, the energies measured by codecarbon are process-specific (it's not clear from the code) + agg_training_energies_dict[key] = self.training["energy"][key] + other.training["energy"][key] + + agg_report.populate_energy(agg_training_energies_dict) + + return agg_report + def compute_mean(values: List[float]) -> float: return mean(values) if len(values) > 0 else 0.0