backends

huggingface · Feb 12, 2024 · 8ba0a57 · 8ba0a57
1 parent f1b9134
commit 8ba0a57
Show file tree

Hide file tree

Showing 18 changed files with 300 additions and 390 deletions.
diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
@@ -1,90 +1,64 @@
 import gc
-import os
 import random
-import shutil
 from abc import ABC
 from logging import getLogger
-from typing import (
-    Optional,
-    ClassVar,
-    Generic,
-    Dict,
-    Any,
-)
-
-import numpy as np
-from transformers.utils import ModelOutput
-from transformers import (
-    GenerationConfig,
-    PretrainedConfig,
-    PreTrainedModel,
-    TrainerState,
-    AutoModel,
-)
+from collections import OrderedDict
+from typing import Optional, ClassVar, Generic, Dict, Any
 
 from .config import BackendConfigT
 from ..task_utils import get_automodel_class_for_task
-from .diffusers_utils import (
-    extract_diffusers_shapes_from_config,
-    get_diffusers_pretrained_config,
-)
+
+from .diffusers_utils import extract_diffusers_shapes_from_config, get_diffusers_pretrained_config
+from .timm_utils import extract_timm_shapes_from_config, get_timm_pretrained_config, get_timm_pre_processor
 from .transformers_utils import (
     extract_transformers_shapes_from_artifacts,
-    get_transformers_pretrained_processor,
     get_transformers_generation_config,
     get_transformers_pretrained_config,
-    get_transformers_cache_dir,
+    get_transformers_pre_processor,
     PretrainedProcessor,
 )
-from .timm_utils import (
-    extract_timm_shapes_from_config,
-    get_timm_pretrained_processor,
-    get_timm_pretrained_config,
-)
+
+import numpy as np
+from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel, TrainerState
 
 LOGGER = getLogger("backend")
 
 
 class Backend(Generic[BackendConfigT], ABC):
     NAME: ClassVar[str]
 
-    config: BackendConfigT
-    automodel_class: AutoModel
-    pretrained_model: PreTrainedModel
+    model_type: str
     model_shapes: Dict[str, int]
 
+    pretrained_model: PreTrainedModel
     pretrained_config: Optional[PretrainedConfig]
-    pretrained_processor: Optional[PretrainedProcessor]
-    pretrained_generation_config: Optional[GenerationConfig]
+    generation_config: Optional[GenerationConfig]
+    pre_processor: Optional[PretrainedProcessor]
 
     def __init__(self, config: BackendConfigT):
         LOGGER.info(f"َAllocating {self.NAME} backend")
         self.config = config
+        self.seed()
 
         if self.config.library == "diffusers":
-            self.pretrained_processor = None
-            self.pretrained_generation_config = None
-            self.pretrained_config = get_diffusers_pretrained_config(model=self.config.model, **self.config.hub_kwargs)
-            self.model_shapes = extract_diffusers_shapes_from_config(model=self.config.model, **self.config.hub_kwargs)
+            self.pretrained_config = get_diffusers_pretrained_config(self.config.model, **self.config.hub_kwargs)
+            self.model_shapes = extract_diffusers_shapes_from_config(self.config.model, **self.config.hub_kwargs)
             self.model_type = self.config.task
+            self.generation_config = None
+            self.pre_processor = None
+
         elif self.config.library == "timm":
-            self.pretrained_processor = get_timm_pretrained_processor(self.config.model)
+            self.pre_processor = get_timm_pre_processor(self.config.model)
             self.pretrained_config = get_timm_pretrained_config(self.config.model)
             self.model_shapes = extract_timm_shapes_from_config(config=self.pretrained_config)
             self.model_type = self.pretrained_config.architecture
-            self.pretrained_generation_config = None
+            self.generation_config = None
+
         else:
+            self.pre_processor = get_transformers_pre_processor(self.config.model, **self.config.hub_kwargs)
+            self.generation_config = get_transformers_generation_config(self.config.model, **self.config.hub_kwargs)
             self.pretrained_config = get_transformers_pretrained_config(self.config.model, **self.config.hub_kwargs)
-            self.pretrained_generation_config = get_transformers_generation_config(
-                self.config.model, **self.config.hub_kwargs
-            )
-            self.pretrained_processor = get_transformers_pretrained_processor(
-                self.config.model, **self.config.hub_kwargs
-            )
-            self.model_shapes = extract_transformers_shapes_from_artifacts(
-                config=self.pretrained_config,
-                processor=self.pretrained_processor,
-            )
+            self.model_shapes = extract_transformers_shapes_from_artifacts(self.pretrained_config, self.pre_processor)
             self.model_type = self.pretrained_config.model_type
 
         self.automodel_class = get_automodel_class_for_task(
@@ -95,6 +69,7 @@ def __init__(self, config: BackendConfigT):
         )
 
     def seed(self) -> None:
+        LOGGER.info(f"\t+ Setting random seed to {self.config.seed}")
         random.seed(self.config.seed)
         np.random.seed(self.config.seed)
 
@@ -112,40 +87,35 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         """
         return inputs
 
-    def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
+    def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
         """
         This method is used to perform the forward pass of the model.
         """
         raise NotImplementedError("Backend must implement forward method")
 
-    def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
+    def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
         """
         This method is used to perform the generation pass of the model.
         """
         raise NotImplementedError("Backend must implement generate method")
 
+    def call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
+        """
+        This method is used to call a whole pipeline.
+        """
+        raise NotImplementedError("Backend must implement call method")
+
     def train(self, **kwargs) -> TrainerState:
         """
         This method is used to train the model.
         """
         raise NotImplementedError("Backend must implement train method")
 
-    def delete_hf_model_cache(self) -> None:
-        LOGGER.info("\t+ Deleting model cache")
-        transformers_cache_path = get_transformers_cache_dir()
-        model_cache_folder = f"models/{self.config.model}".replace("/", "--")
-        model_cache_path = os.path.join(transformers_cache_path, model_cache_folder)
-        shutil.rmtree(model_cache_path, ignore_errors=True)
-
     def delete_pretrained_model(self) -> None:
-        LOGGER.info("\t+ Deleting pretrained model")
-        del self.pretrained_model
-        gc.collect()
+        if hasattr(self, "pretrained_model"):
+            del self.pretrained_model
 
     def clean(self) -> None:
         LOGGER.info(f"Cleaning {self.NAME} backend")
-
-        if hasattr(self, "pretrained_model"):
-            self.delete_pretrained_model()
-
+        self.delete_pretrained_model()
         gc.collect()
diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
@@ -4,20 +4,20 @@
 from dataclasses import dataclass, field
 from typing import Optional, TypeVar, Dict, Any
 
-from psutil import cpu_count
+from ..import_utils import is_psutil_available
+from ..env_utils import get_cuda_device_ids, is_nvidia_system, is_rocm_system
+from ..task_utils import infer_library_from_model_name_or_path, infer_task_from_model_name_or_path
 
-from ..env_utils import get_gpus, is_nvidia_system, is_rocm_system
-from ..task_utils import (
-    infer_library_from_model_name_or_path,
-    infer_task_from_model_name_or_path,
-)
+if is_psutil_available():
+    from psutil import cpu_count
 
 LOGGER = getLogger("backend")
 
 HUB_KWARGS = {
     "revision": "main",
     "force_download": False,
     "local_files_only": False,
+    "trust_remote_code": False,
 }
 
 
@@ -31,6 +31,10 @@ class BackendConfig(ABC):
 
     model: Optional[str] = None
     device: Optional[str] = None
+    # yes we use a string here instead of a list
+    # it's easier to pass in a yaml or from cli
+    # also it's consistent with CUDA_VISIBLE_DEVICES
+    device_ids: Optional[str] = None
 
     task: Optional[str] = None
     library: Optional[str] = None
@@ -48,41 +52,20 @@ def __post_init__(self):
             self.device = "cuda" if is_nvidia_system() or is_rocm_system() else "cpu"
 
         if ":" in self.device:
-            raise ValueError(
-                f"Device was specified as {self.device} with a target index."
-                "We recommend using the main cuda device (e.g. `cuda`) and "
-                "specifying the target index in `CUDA_VISIBLE_DEVICES`."
-            )
+            # using device index
+            self.device = self.device.split(":")[0]
+            self.device_ids = self.device.split(":")[1]
+
+        if self.device == "cuda":
+            if self.device_ids is None:
+                self.device_ids = get_cuda_device_ids()
+
+            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+            os.environ["CUDA_VISIBLE_DEVICES"] = self.device_ids
+            # TODO: add rocm specific environment variables ?
 
         if self.device not in ["cuda", "cpu", "mps", "xla"]:
-            raise ValueError("`device` must be either `cuda`, `cpu`, `mps` or `xla`.")
-
-        if self.device == "cuda" and len(get_gpus()) > 1:
-            if os.environ.get("CUDA_VISIBLE_DEVICES", None) is None:
-                LOGGER.warning(
-                    "Multiple GPUs detected but CUDA_VISIBLE_DEVICES is not set. "
-                    "This means that code might allocate resources from the wrong GPUs. "
-                    "For example, with `auto_device='auto'. `We recommend setting CUDA_VISIBLE_DEVICES "
-                    "to isolate the GPUs that will be used for this experiment. `CUDA_VISIBLE_DEVICES` will "
-                    "be set to `0` to ensure that only the first GPU is used. If you want to use multiple "
-                    "GPUs, please set `CUDA_VISIBLE_DEVICES` to the desired GPU indices."
-                )
-                os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-
-            if os.environ.get("CUDA_DEVICE_ORDER", None) != "PCI_BUS_ID":
-                LOGGER.warning(
-                    "Multiple GPUs detected but CUDA_DEVICE_ORDER is not set to `PCI_BUS_ID`. "
-                    "This means that code might allocate resources from the wrong GPUs even if "
-                    "`CUDA_VISIBLE_DEVICES` is set. For example pytorch uses the `FASTEST_FIRST` "
-                    "order by default, which is not guaranteed to be the same as nvidia-smi. `CUDA_DEVICE_ORDER` "
-                    "will be set to `PCI_BUS_ID` to ensure that the GPUs are allocated in the same order as nvidia-smi. "
-                )
-                os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-
-        elif self.device == "cuda" and len(get_gpus()) == 1:
-            if os.environ.get("CUDA_VISIBLE_DEVICES", None) is None:
-                os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-                os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+            raise ValueError(f"`device` must be either `cuda`, `cpu`, `mps` or `xla`, but got {self.device}")
 
         if self.task is None:
             self.task = infer_task_from_model_name_or_path(self.model)

diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
@@ -4,31 +4,27 @@
 
 from ..import_utils import is_diffusers_available
 
-
 if is_diffusers_available():
     import diffusers
 
 
 def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
-    assert is_diffusers_available(), "Diffusers is not available"
     return diffusers.DiffusionPipeline.load_config(model, **kwargs)
 
 
 def extract_diffusers_shapes_from_config(model: str, **kwargs) -> Dict[str, int]:
-    assert is_diffusers_available(), "Diffusers is not available"
+    config = diffusers.DiffusionPipeline.load_config(model, **kwargs)
 
     shapes = {}
-    pip_config = diffusers.DiffusionPipeline.load_config(model, **kwargs)
-
-    if "vae" in pip_config:
-        vae_import_path = pip_config["vae"]
+    if "vae" in config:
+        vae_import_path = config["vae"]
         vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
         vae_config = vae_class.load_config(model, subfolder="vae", **kwargs)
         shapes["num_channels"] = vae_config["out_channels"]
         shapes["height"] = vae_config["sample_size"]
         shapes["width"] = vae_config["sample_size"]
-    elif "vae_encoder" in pip_config:
-        vae_import_path = pip_config["vae_encoder"]
+    elif "vae_encoder" in config:
+        vae_import_path = config["vae_encoder"]
         vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
         vae_config = vae_class.load_config(model, subfolder="vae", **kwargs)
         shapes["num_channels"] = vae_config["out_channels"]

diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py
@@ -3,6 +3,7 @@
 import inspect
 from typing import Any, Dict
 from logging import getLogger
+from collections import OrderedDict
 from tempfile import TemporaryDirectory
 
 import torch
@@ -11,7 +12,6 @@
 from safetensors.torch import save_file
 from optimum.intel.openvino import OVQuantizer
 from transformers.modeling_utils import no_init_weights
-from transformers.utils import ModelOutput
 from transformers.utils.logging import set_verbosity_error
 from optimum.intel.openvino import OVConfig as OVQuantizationConfig  # naming conflict
 
@@ -196,12 +196,15 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
 
         return inputs
 
-    def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
-        return self.pretrained_model(**inputs, **kwargs)
+    def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
+        return self.pretrained_model.forward(**inputs, **kwargs)
 
-    def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
+    def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
         return self.pretrained_model.generate(**inputs, **kwargs)
 
+    def call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
+        return self.pretrained_model(**inputs, **kwargs)
+
     def clean(self) -> None:
         super().clean()
 

diff --git a/optimum_benchmark/backends/peft_utils.py b/optimum_benchmark/backends/peft_utils.py
@@ -13,7 +13,6 @@
         PromptLearningConfig,
     )
 
-
 PEFT_TASKS_TYPES = [
     "SEQ_CLS",
     "SEQ_2_SEQ_LM",