From bacfc987e65f12951f6e57cbe17162a8489660e1 Mon Sep 17 00:00:00 2001
From: baptiste colle <collebaptiste@gmail.com>
Date: Thu, 1 Aug 2024 11:32:30 +0000
Subject: [PATCH 01/44] build autoawq and auto-gptq from source

---
 CONTRIBUTING.md        |  6 +++---
 Makefile               |  3 ++-
 docker/cuda/Dockerfile |  2 +-
 setup.py               | 10 ++++++----
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4da9ba85..899e9139 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -55,9 +55,9 @@ If you would like to work on any of the open Issues:
 	For a better development experience, we recommend using isolated docker containers to run tests:
 	
 	```bash
-	make build_docker_cpu
-	make run_docker_cpu
-	make install_cli_cpu_pytorch_extras
+	make build_cpu_image
+	make run_cpu_container
+	make install_cli_cpu_pytorch
 	make test_cli_cpu_pytorch
 	```
 
diff --git a/Makefile b/Makefile
index 9ef27918..e30c1062 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@
 PWD := $(shell pwd)
 USER_ID := $(shell id -u)
 GROUP_ID := $(shell id -g)
+TORCH_VERSION := 2.3.1
 
 quality:
 	ruff check .
@@ -23,7 +24,7 @@ build_cpu_image:
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
-	docker build -t optimum-benchmark:latest-cuda docker/cuda
+	docker build --build-arg TORCH_VERSION=$(TORCH_VERSION) -t optimum-benchmark:latest-cuda docker/cuda
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index 6ec05d28..decbb345 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel 
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests
 
 # Install PyTorch
 ARG TORCH_CUDA=cu121
diff --git a/setup.py b/setup.py
index b024a738..ffc15cc5 100644
--- a/setup.py
+++ b/setup.py
@@ -62,8 +62,10 @@
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
 else:
-    AUTOAWQ = "autoawq==0.2.1"
-    AUTOGPTQ = "auto-gptq==0.7.1"
+    AUTOAWQ = "autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"
+    AUTOAWQ_KERNELS = "autoawq-kernels@git+https://github.com/casper-hansen/AutoAWQ_kernels.git"
+
+    AUTOGPTQ = "auto-gptq@git+https://github.com/PanQiWei/AutoGPTQ.git"
 
 EXTRAS_REQUIRE = {
     "quality": ["ruff"],
@@ -79,7 +81,7 @@
     "py-txi": ["py-txi"],
     "vllm": ["vllm"],
     # optional dependencies
-    "autoawq": [AUTOAWQ],
+    "autoawq": [AUTOAWQ_KERNELS, AUTOAWQ],
     "auto-gptq": ["optimum", AUTOGPTQ],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
@@ -112,7 +114,7 @@
         "License :: OSI Approved :: Apache Software License",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
-    keywords="benchmaek, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
+    keywords="benchmark, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
     "habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, auto-gptq, autoawq, "
     "sentence-transformers, bitsandbytes, codecarbon, flash-attn, deepspeed, diffusers, timm, peft",
     long_description=open("README.md", "r", encoding="utf-8").read(),

From e996b896372055ddb647ad34ea38c1140b11f9c0 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Thu, 8 Aug 2024 12:15:18 +0000
Subject: [PATCH 02/44] install from source with unpinned torch

---
 .gitignore |  2 ++
 Makefile   |  7 ++++---
 setup.py   | 12 ++++++++----
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index b30407e0..3315d216 100644
--- a/.gitignore
+++ b/.gitignore
@@ -172,3 +172,5 @@ work-in-progress/
 experiments/
 amdsmi/
 amd-*
+
+external_repos/
\ No newline at end of file
diff --git a/Makefile b/Makefile
index e30c1062..06062867 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,7 @@ build_cpu_image:
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
-	docker build --build-arg TORCH_VERSION=$(TORCH_VERSION) -t optimum-benchmark:latest-cuda docker/cuda
+	docker build -t optimum-benchmark:latest-cuda docker/cuda
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
@@ -109,7 +109,8 @@ install_cli_cpu_neural_compressor:
 	pip install -e .[testing,peft,timm,diffusers,neural-compressor]
 
 install_cli_cuda_pytorch:
-	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
+	python scripts/install_autoawq.py
+	pip install -e .[testing,timm,diffusers,peft,auto-gptq,bitsandbytes,deepspeed]
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
@@ -159,7 +160,7 @@ test_cli_cuda_pytorch_multi_gpu:
 	pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not awq"
 
 test_cli_cuda_pytorch_single_gpu:
-	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not awq"
+	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not awq" --ignore=external_repos
 
 test_cli_cuda_torch_ort_multi_gpu:
 	pytest -s -k "cli and cuda and torch-ort and (dp or ddp or device_map or deepspeed) and not peft"
diff --git a/setup.py b/setup.py
index ffc15cc5..8782ba7d 100644
--- a/setup.py
+++ b/setup.py
@@ -58,13 +58,14 @@
             "Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature."
         )
 
+
+
 if USE_ROCM:
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
-else:
-    AUTOAWQ = "autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"
-    AUTOAWQ_KERNELS = "autoawq-kernels@git+https://github.com/casper-hansen/AutoAWQ_kernels.git"
 
+else:
+    # AUTOAWQ will be installed from source via scripts/install_autoawq.py script
     AUTOGPTQ = "auto-gptq@git+https://github.com/PanQiWei/AutoGPTQ.git"
 
 EXTRAS_REQUIRE = {
@@ -81,7 +82,7 @@
     "py-txi": ["py-txi"],
     "vllm": ["vllm"],
     # optional dependencies
-    "autoawq": [AUTOAWQ_KERNELS, AUTOAWQ],
+    "autoawq": [],
     "auto-gptq": ["optimum", AUTOGPTQ],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
@@ -93,6 +94,9 @@
     "peft": ["peft"],
 }
 
+if USE_ROCM:
+    EXTRAS_REQUIRE["autoawq"] = [AUTOAWQ]
+
 
 setup(
     packages=find_packages(),

From 0696ba4f3465b5ebfc1be0ef96ba01a856783efb Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Thu, 15 Aug 2024 12:06:57 +0000
Subject: [PATCH 03/44] try loading the kernels directly and provide a utility
 to install autoawq and autogtpq from source

---
 .gitignore                                    |  3 +-
 Makefile                                      |  6 +-
 optimum_benchmark/backends/pytorch/backend.py | 20 +++++
 optimum_benchmark/cli.py                      | 12 +++
 optimum_benchmark/install_utils.py            | 90 +++++++++++++++++++
 setup.py                                      |  9 +-
 6 files changed, 129 insertions(+), 11 deletions(-)
 create mode 100644 optimum_benchmark/install_utils.py

diff --git a/.gitignore b/.gitignore
index 3315d216..99ad7db2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,4 +173,5 @@ experiments/
 amdsmi/
 amd-*
 
-external_repos/
\ No newline at end of file
+external_repos/
+outputs/
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 06062867..8cc13472 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,6 @@
 PWD := $(shell pwd)
 USER_ID := $(shell id -u)
 GROUP_ID := $(shell id -g)
-TORCH_VERSION := 2.3.1
 
 quality:
 	ruff check .
@@ -109,8 +108,9 @@ install_cli_cpu_neural_compressor:
 	pip install -e .[testing,peft,timm,diffusers,neural-compressor]
 
 install_cli_cuda_pytorch:
-	python scripts/install_autoawq.py
-	pip install -e .[testing,timm,diffusers,peft,auto-gptq,bitsandbytes,deepspeed]
+	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
+	optimum-benchmark +install_auto_awq_from_source=True
+	optimum-benchmark +install_auto_gptq_from_source=True
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 33914164..2c570b40 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -286,11 +286,31 @@ def create_no_weights_model(self) -> None:
     def process_quantization_config(self) -> None:
         if self.is_gptq_quantized:
             self.logger.info("\t+ Processing GPTQ config")
+
+            try:
+                import exllamav2_kernels  # noqa: F401
+            except ImportError:
+                raise ImportError(
+                    "GPTQ quantization requires the AutoGPTQ package. "
+                    "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
+                    "Or run optimum-benchmark +install_auto_gptq=True"
+                )
+
             self.quantization_config = GPTQConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
         elif self.is_awq_quantized:
             self.logger.info("\t+ Processing AWQ config")
+
+            try:
+                import awq_ext  # noqa: F401
+            except ImportError:
+                raise ImportError(
+                    "AWQ quantization requires the AutoAWQ package. "
+                    "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
+                    "Or run optimum-benchmark +install_auto_awq=True"
+                )
+
             self.quantization_config = AwqConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index 7b44f621..b91e9f35 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -6,6 +6,8 @@
 from hydra.core.config_store import ConfigStore
 from omegaconf import DictConfig, OmegaConf
 
+from optimum_benchmark.install_utils import InstallConfig, install_autoawq, install_autogptq
+
 from . import (
     Benchmark,
     BenchmarkConfig,
@@ -53,10 +55,20 @@
 cs.store(group="launcher", name=ProcessConfig.name, node=ProcessConfig)
 cs.store(group="launcher", name=TorchrunConfig.name, node=TorchrunConfig)
 
+cs.store(name="install_config", node=InstallConfig)
+
 
 # optimum-benchmark
 @hydra.main(version_base=None)
 def main(config: DictConfig) -> None:
+    if "install_auto_awq_from_source" in config and config.install_auto_awq_from_source:
+        install_autoawq()
+        return
+
+    if "install_auto_gptq_from_source" in config and config.install_auto_gptq_from_source:
+        install_autogptq()
+        return
+
     log_level = os.environ.get("LOG_LEVEL", "INFO")
     log_to_file = os.environ.get("LOG_TO_FILE", "1") == "1"
     override_benchmarks = os.environ.get("OVERRIDE_BENCHMARKS", "0") == "1"
diff --git a/optimum_benchmark/install_utils.py b/optimum_benchmark/install_utils.py
new file mode 100644
index 00000000..cf004ac2
--- /dev/null
+++ b/optimum_benchmark/install_utils.py
@@ -0,0 +1,90 @@
+import os
+import re
+import subprocess
+import sys
+from dataclasses import dataclass
+
+EXTERNAL_REPOS_DIR = "external_repos"
+
+
+@dataclass
+class InstallConfig:
+    install_auto_awq_from_source: bool = False
+    install_auto_gptq_from_source: bool = False
+
+
+def remove_torch_from_setup(setup_file_path):
+    """Remove any torch requirement from the setup.py file."""
+    with open(setup_file_path, "r") as file:
+        setup_content = file.read()
+
+    # Use a regular expression to remove any line containing "torch=="
+    setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
+
+    # Write the modified content back to setup.py
+    with open(setup_file_path, "w") as file:
+        file.write(setup_content)
+
+
+def clone_or_pull_repo(repo_url, repo_location_path):
+    """Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
+    if os.path.exists(repo_location_path):
+        print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
+        subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
+    else:
+        repo_name = repo_location_path.split("/")[-1]
+        print(f"Cloning {repo_name} into {repo_location_path}")
+        subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
+
+
+def install_autogptq():
+    """Install the AutoGPTQ package from GitHub."""
+
+    print("Installing AutoGPTQ package.")
+
+    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
+
+    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
+
+    subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
+
+    subprocess.run(
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation -e .",
+        shell=True,
+        check=True,
+    )
+
+    print("AutoGPTQ package installed.")
+
+
+def install_autoawq():
+    """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
+
+    print("Installing AutoAWQ and AutoAWQ_kernels packages.")
+
+    autoawq_kernels_repo_name = "AutoAWQ_kernels"
+    autoawq_repo_name = "AutoAWQ"
+
+    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
+    autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
+
+    kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
+    remove_torch_from_setup(kernels_setup_file_path)
+    subprocess.run(
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
+
+    autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
+    remove_torch_from_setup(autoawq_setup_file_path)
+    subprocess.run(
+        f"cd {autoawq_repo_path} && {sys.executable} -m pip install -e .", shell=True, check=True, env=os.environ
+    )
+
+    print("AutoAWQ and AutoAWQ_kernels packages installed.")
diff --git a/setup.py b/setup.py
index 8782ba7d..d04b61d7 100644
--- a/setup.py
+++ b/setup.py
@@ -59,15 +59,10 @@
         )
 
 
-
 if USE_ROCM:
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
 
-else:
-    # AUTOAWQ will be installed from source via scripts/install_autoawq.py script
-    AUTOGPTQ = "auto-gptq@git+https://github.com/PanQiWei/AutoGPTQ.git"
-
 EXTRAS_REQUIRE = {
     "quality": ["ruff"],
     "testing": ["pytest", "hydra-joblib-launcher"],
@@ -83,7 +78,7 @@
     "vllm": ["vllm"],
     # optional dependencies
     "autoawq": [],
-    "auto-gptq": ["optimum", AUTOGPTQ],
+    "auto-gptq": ["optimum"],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
     "codecarbon": ["codecarbon"],
@@ -96,7 +91,7 @@
 
 if USE_ROCM:
     EXTRAS_REQUIRE["autoawq"] = [AUTOAWQ]
-
+    EXTRAS_REQUIRE["auto-gptq"].append(AUTOGPTQ)
 
 setup(
     packages=find_packages(),

From f3058f10e36077c2736db9e38affaa6cb2f083a7 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Thu, 15 Aug 2024 12:56:47 +0000
Subject: [PATCH 04/44] udpate cli argument

---
 optimum_benchmark/backends/pytorch/backend.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 830aa30c..1992720f 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or run optimum-benchmark +install_auto_gptq=True"
+                    "Or run optimum-benchmark +install_auto_gptq_from_source=True"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or run optimum-benchmark +install_auto_awq=True"
+                    "Or run optimum-benchmark +install_auto_awq_from_source=True"
                 )
 
             self.quantization_config = AwqConfig(

From 342ddf1b0096a355ab8a8ecc82d299f571db42c1 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 07:37:42 +0000
Subject: [PATCH 05/44] add quanization lib to an install script

---
 Makefile                                      |  3 +-
 optimum_benchmark/backends/pytorch/backend.py |  4 +-
 optimum_benchmark/cli.py                      | 12 ---
 optimum_benchmark/install_utils.py            | 90 -------------------
 4 files changed, 3 insertions(+), 106 deletions(-)
 delete mode 100644 optimum_benchmark/install_utils.py

diff --git a/Makefile b/Makefile
index 61a2edad..6bc83f80 100644
--- a/Makefile
+++ b/Makefile
@@ -109,8 +109,7 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	optimum-benchmark +install_auto_awq_from_source=True
-	optimum-benchmark +install_auto_gptq_from_source=True
+	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq --install-auto-gptq
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 1992720f..22259e09 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or run optimum-benchmark +install_auto_gptq_from_source=True"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-auto-gptq`"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or run optimum-benchmark +install_auto_awq_from_source=True"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq`"
                 )
 
             self.quantization_config = AwqConfig(
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index 014698f2..57c6b054 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -6,8 +6,6 @@
 from hydra.core.config_store import ConfigStore
 from omegaconf import DictConfig, OmegaConf
 
-from optimum_benchmark.install_utils import InstallConfig, install_autoawq, install_autogptq
-
 from . import (
     Benchmark,
     BenchmarkConfig,
@@ -57,20 +55,10 @@
 cs.store(group="launcher", name=ProcessConfig.name, node=ProcessConfig)
 cs.store(group="launcher", name=TorchrunConfig.name, node=TorchrunConfig)
 
-cs.store(name="install_config", node=InstallConfig)
-
 
 # optimum-benchmark
 @hydra.main(version_base=None)
 def main(config: DictConfig) -> None:
-    if "install_auto_awq_from_source" in config and config.install_auto_awq_from_source:
-        install_autoawq()
-        return
-
-    if "install_auto_gptq_from_source" in config and config.install_auto_gptq_from_source:
-        install_autogptq()
-        return
-
     log_level = os.environ.get("LOG_LEVEL", "INFO")
     log_to_file = os.environ.get("LOG_TO_FILE", "1") == "1"
     override_benchmarks = os.environ.get("OVERRIDE_BENCHMARKS", "0") == "1"
diff --git a/optimum_benchmark/install_utils.py b/optimum_benchmark/install_utils.py
deleted file mode 100644
index cf004ac2..00000000
--- a/optimum_benchmark/install_utils.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import os
-import re
-import subprocess
-import sys
-from dataclasses import dataclass
-
-EXTERNAL_REPOS_DIR = "external_repos"
-
-
-@dataclass
-class InstallConfig:
-    install_auto_awq_from_source: bool = False
-    install_auto_gptq_from_source: bool = False
-
-
-def remove_torch_from_setup(setup_file_path):
-    """Remove any torch requirement from the setup.py file."""
-    with open(setup_file_path, "r") as file:
-        setup_content = file.read()
-
-    # Use a regular expression to remove any line containing "torch=="
-    setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
-
-    # Write the modified content back to setup.py
-    with open(setup_file_path, "w") as file:
-        file.write(setup_content)
-
-
-def clone_or_pull_repo(repo_url, repo_location_path):
-    """Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
-    if os.path.exists(repo_location_path):
-        print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
-        subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
-    else:
-        repo_name = repo_location_path.split("/")[-1]
-        print(f"Cloning {repo_name} into {repo_location_path}")
-        subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
-
-
-def install_autogptq():
-    """Install the AutoGPTQ package from GitHub."""
-
-    print("Installing AutoGPTQ package.")
-
-    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
-
-    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
-
-    subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
-
-    subprocess.run(
-        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation -e .",
-        shell=True,
-        check=True,
-    )
-
-    print("AutoGPTQ package installed.")
-
-
-def install_autoawq():
-    """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
-
-    print("Installing AutoAWQ and AutoAWQ_kernels packages.")
-
-    autoawq_kernels_repo_name = "AutoAWQ_kernels"
-    autoawq_repo_name = "AutoAWQ"
-
-    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
-    autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
-
-    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
-
-    kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
-    remove_torch_from_setup(kernels_setup_file_path)
-    subprocess.run(
-        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
-        shell=True,
-        check=True,
-        env=os.environ,
-    )
-
-    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
-
-    autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
-    remove_torch_from_setup(autoawq_setup_file_path)
-    subprocess.run(
-        f"cd {autoawq_repo_path} && {sys.executable} -m pip install -e .", shell=True, check=True, env=os.environ
-    )
-
-    print("AutoAWQ and AutoAWQ_kernels packages installed.")

From 433670f244bbf2119696b6b71c52546fad08375b Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 07:39:23 +0000
Subject: [PATCH 06/44] fix style

---
 .gitignore | 1 -
 setup.py   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5d2155a5..3ecf8df4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,7 +173,6 @@ experiments/
 amdsmi/
 amd-*
 
-
 external_repos/
 outputs/
 
diff --git a/setup.py b/setup.py
index 2d4ed9ca..2a603550 100644
--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,6 @@
             "Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature."
         )
 
-
 if USE_ROCM:
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"

From 37f43f1b2b1eeab5b3ae2ef0e697a2a067699c04 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 07:42:10 +0000
Subject: [PATCH 07/44] fix typos

---
 Makefile                                      | 2 +-
 optimum_benchmark/backends/pytorch/backend.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 6bc83f80..53fa057b 100644
--- a/Makefile
+++ b/Makefile
@@ -109,7 +109,7 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq --install-auto-gptq
+	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs.py | python - --install-autoawq-from-source --install-autogptq-from-source
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 22259e09..b1a6c74d 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-auto-gptq`"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autogptq-from-source`"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq`"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autoawq-from-source`"
                 )
 
             self.quantization_config = AwqConfig(

From db950d4a513cf320d6525da7b55ca6452e27b6e4 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 08:35:06 +0000
Subject: [PATCH 08/44] add install script

---
 scripts/install_quantization_libs.py | 110 +++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 scripts/install_quantization_libs.py

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
new file mode 100644
index 00000000..e46eff83
--- /dev/null
+++ b/scripts/install_quantization_libs.py
@@ -0,0 +1,110 @@
+import argparse
+import os
+import re
+import subprocess
+import sys
+
+EXTERNAL_REPOS_DIR = "external_repos"
+
+
+def remove_torch_from_setup(setup_file_path):
+    """Remove any torch requirement from the setup.py file."""
+    with open(setup_file_path, "r") as file:
+        setup_content = file.read()
+
+    # Use a regular expression to remove any line containing "torch=="
+    setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
+
+    # Write the modified content back to setup.py
+    with open(setup_file_path, "w") as file:
+        file.write(setup_content)
+
+
+def clone_or_pull_repo(repo_url, repo_location_path):
+    """Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
+    if os.path.exists(repo_location_path):
+        print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
+        subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
+    else:
+        repo_name = repo_location_path.split("/")[-1]
+        print(f"Cloning {repo_name} into {repo_location_path}")
+        subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
+
+
+def install_autogptq_from_source():
+    """Install the AutoGPTQ package from GitHub."""
+
+    print("Installing AutoGPTQ package.")
+
+    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
+
+    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
+
+    subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
+
+    subprocess.run(
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation -e .",
+        shell=True,
+        check=True,
+    )
+
+    print("AutoGPTQ package installed.")
+
+
+def install_autoawq_from_source():
+    """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
+
+    print("Installing AutoAWQ and AutoAWQ_kernels packages.")
+
+    autoawq_kernels_repo_name = "AutoAWQ_kernels"
+    autoawq_repo_name = "AutoAWQ"
+
+    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
+    autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
+
+    kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
+    remove_torch_from_setup(kernels_setup_file_path)
+    subprocess.run(
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
+
+    autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
+    remove_torch_from_setup(autoawq_setup_file_path)
+    subprocess.run(
+        f"cd {autoawq_repo_path} && {sys.executable} -m pip install -e .", shell=True, check=True, env=os.environ
+    )
+
+    print("AutoAWQ and AutoAWQ_kernels packages installed.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Install AutoAWQ or AutoGPTQ from source.")
+    parser.add_argument(
+        "--install-autoawq-from-source",
+        action="store_true",
+        help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
+    )
+    parser.add_argument(
+        "--install-autogptq-from-source", action="store_true", help="Install AutoGPTQ package from source."
+    )
+
+    args = parser.parse_args()
+
+    if args.install_autoawq_from_source:
+        install_autoawq_from_source()
+    elif args.install_autogptq_from_source:
+        install_autogptq_from_source()
+    else:
+        print("Please specify an installation option. Use --install-autoawq or --install-autogptq.")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

From 23bf7da73541bc114d4869c7c09aa7acf34025d5 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 08:38:34 +0000
Subject: [PATCH 09/44] fix typo

---
 scripts/install_quantization_libs.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index e46eff83..39358fa6 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -33,9 +33,7 @@ def clone_or_pull_repo(repo_url, repo_location_path):
 
 def install_autogptq_from_source():
     """Install the AutoGPTQ package from GitHub."""
-
     print("Installing AutoGPTQ package.")
-
     autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
 
     clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
@@ -53,7 +51,6 @@ def install_autogptq_from_source():
 
 def install_autoawq_from_source():
     """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
-
     print("Installing AutoAWQ and AutoAWQ_kernels packages.")
 
     autoawq_kernels_repo_name = "AutoAWQ_kernels"
@@ -102,7 +99,9 @@ def main():
     elif args.install_autogptq_from_source:
         install_autogptq_from_source()
     else:
-        print("Please specify an installation option. Use --install-autoawq or --install-autogptq.")
+        print(
+            "Please specify an installation option. Use --install-autoawq-from-source or --install-autogptq-from-source."
+        )
         sys.exit(1)
 
 

From 62099879d83ccdc31a439fe4eec73aab79ea64e6 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 09:02:29 +0000
Subject: [PATCH 10/44] update installation script

---
 Makefile                                      | 2 +-
 optimum_benchmark/backends/pytorch/backend.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 53fa057b..6e48f2e1 100644
--- a/Makefile
+++ b/Makefile
@@ -109,7 +109,7 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs.py | python - --install-autoawq-from-source --install-autogptq-from-source
+	python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index b1a6c74d..dd0523f9 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autogptq-from-source`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` from our repository"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autoawq-from-source`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` from our repository"
                 )
 
             self.quantization_config = AwqConfig(

From ffa5c99e5fb6df9772f63a26f2e857b4bf2ed899 Mon Sep 17 00:00:00 2001
From: baptiste colle <collebaptiste@gmail.com>
Date: Thu, 1 Aug 2024 11:32:30 +0000
Subject: [PATCH 11/44] build autoawq and auto-gptq from source

---
 Makefile               |  3 ++-
 docker/cuda/Dockerfile |  2 +-
 setup.py               | 12 +++++++-----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 96624c29..80fd55e9 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@
 PWD := $(shell pwd)
 USER_ID := $(shell id -u)
 GROUP_ID := $(shell id -g)
+TORCH_VERSION := 2.3.1
 
 quality:
 	ruff check .
@@ -23,7 +24,7 @@ build_cpu_image:
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
-	docker build -t optimum-benchmark:latest-cuda docker/cuda
+	docker build --build-arg TORCH_VERSION=$(TORCH_VERSION) -t optimum-benchmark:latest-cuda docker/cuda
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index 6ec05d28..decbb345 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel 
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests
 
 # Install PyTorch
 ARG TORCH_CUDA=cu121
diff --git a/setup.py b/setup.py
index d5abc58c..013c3374 100644
--- a/setup.py
+++ b/setup.py
@@ -62,8 +62,10 @@
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
 else:
-    AUTOAWQ = "autoawq==0.2.1"
-    AUTOGPTQ = "auto-gptq==0.7.1"
+    AUTOAWQ = "autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"
+    AUTOAWQ_KERNELS = "autoawq-kernels@git+https://github.com/casper-hansen/AutoAWQ_kernels.git"
+
+    AUTOGPTQ = "auto-gptq@git+https://github.com/PanQiWei/AutoGPTQ.git"
 
 EXTRAS_REQUIRE = {
     "quality": ["ruff"],
@@ -80,7 +82,7 @@
     "py-txi": ["py-txi"],
     "vllm": ["vllm"],
     # optional dependencies
-    "autoawq": [AUTOAWQ],
+    "autoawq": [AUTOAWQ_KERNELS, AUTOAWQ],
     "auto-gptq": ["optimum", AUTOGPTQ],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
@@ -113,8 +115,8 @@
         "License :: OSI Approved :: Apache Software License",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
-    keywords="benchmaek, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
-    "habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, llama-cpp, auto-gptq, autoawq, "
+    keywords="benchmark, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
+    "habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, auto-gptq, autoawq, "
     "sentence-transformers, bitsandbytes, codecarbon, flash-attn, deepspeed, diffusers, timm, peft",
     long_description=open("README.md", "r", encoding="utf-8").read(),
     long_description_content_type="text/markdown",

From 51834fabec0c34c5edd5c2a2e3996a1f31e256c0 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Thu, 8 Aug 2024 12:15:18 +0000
Subject: [PATCH 12/44] install from source with unpinned torch

---
 .gitignore |  3 ++-
 Makefile   |  7 ++++---
 setup.py   | 12 ++++++++----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index f26fda31..31471e80 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,5 +174,6 @@ amdsmi/
 amd-*
 
 # Mac specific
+external_repos/
 .DS_Store
-outputs/
\ No newline at end of file
+outputs/
diff --git a/Makefile b/Makefile
index 80fd55e9..f8b0e2c4 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,7 @@ build_cpu_image:
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
-	docker build --build-arg TORCH_VERSION=$(TORCH_VERSION) -t optimum-benchmark:latest-cuda docker/cuda
+	docker build -t optimum-benchmark:latest-cuda docker/cuda
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
@@ -109,7 +109,8 @@ install_cli_cpu_neural_compressor:
 	pip install -e .[testing,peft,timm,diffusers,neural-compressor]
 
 install_cli_cuda_pytorch:
-	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
+	python scripts/install_autoawq.py
+	pip install -e .[testing,timm,diffusers,peft,auto-gptq,bitsandbytes,deepspeed]
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
@@ -159,7 +160,7 @@ test_cli_cuda_pytorch_multi_gpu:
 	pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not awq"
 
 test_cli_cuda_pytorch_single_gpu:
-	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not awq"
+	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not awq" --ignore=external_repos
 
 test_cli_cuda_torch_ort_multi_gpu:
 	pytest -s -k "cli and cuda and torch-ort and (dp or ddp or device_map or deepspeed) and not peft"
diff --git a/setup.py b/setup.py
index 013c3374..ba2a76d3 100644
--- a/setup.py
+++ b/setup.py
@@ -58,13 +58,14 @@
             "Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature."
         )
 
+
+
 if USE_ROCM:
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
-else:
-    AUTOAWQ = "autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"
-    AUTOAWQ_KERNELS = "autoawq-kernels@git+https://github.com/casper-hansen/AutoAWQ_kernels.git"
 
+else:
+    # AUTOAWQ will be installed from source via scripts/install_autoawq.py script
     AUTOGPTQ = "auto-gptq@git+https://github.com/PanQiWei/AutoGPTQ.git"
 
 EXTRAS_REQUIRE = {
@@ -82,7 +83,7 @@
     "py-txi": ["py-txi"],
     "vllm": ["vllm"],
     # optional dependencies
-    "autoawq": [AUTOAWQ_KERNELS, AUTOAWQ],
+    "autoawq": [],
     "auto-gptq": ["optimum", AUTOGPTQ],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
@@ -94,6 +95,9 @@
     "peft": ["peft"],
 }
 
+if USE_ROCM:
+    EXTRAS_REQUIRE["autoawq"] = [AUTOAWQ]
+
 
 setup(
     packages=find_packages(),

From 48547858976ca42d49ea16267a589ffe96cf63f9 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Thu, 15 Aug 2024 12:06:57 +0000
Subject: [PATCH 13/44] try loading the kernels directly and provide a utility
 to install autoawq and autogtpq from source

---
 .gitignore                                    |  1 -
 Makefile                                      |  6 +-
 optimum_benchmark/backends/pytorch/backend.py | 20 +++++
 optimum_benchmark/cli.py                      | 12 +++
 optimum_benchmark/install_utils.py            | 90 +++++++++++++++++++
 setup.py                                      |  9 +-
 6 files changed, 127 insertions(+), 11 deletions(-)
 create mode 100644 optimum_benchmark/install_utils.py

diff --git a/.gitignore b/.gitignore
index 31471e80..62416b0c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,7 +173,6 @@ experiments/
 amdsmi/
 amd-*
 
-# Mac specific
 external_repos/
 .DS_Store
 outputs/
diff --git a/Makefile b/Makefile
index f8b0e2c4..2692984c 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,6 @@
 PWD := $(shell pwd)
 USER_ID := $(shell id -u)
 GROUP_ID := $(shell id -g)
-TORCH_VERSION := 2.3.1
 
 quality:
 	ruff check .
@@ -109,8 +108,9 @@ install_cli_cpu_neural_compressor:
 	pip install -e .[testing,peft,timm,diffusers,neural-compressor]
 
 install_cli_cuda_pytorch:
-	python scripts/install_autoawq.py
-	pip install -e .[testing,timm,diffusers,peft,auto-gptq,bitsandbytes,deepspeed]
+	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
+	optimum-benchmark +install_auto_awq_from_source=True
+	optimum-benchmark +install_auto_gptq_from_source=True
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index bb747223..830aa30c 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -286,11 +286,31 @@ def create_no_weights_model(self) -> None:
     def process_quantization_config(self) -> None:
         if self.is_gptq_quantized:
             self.logger.info("\t+ Processing GPTQ config")
+
+            try:
+                import exllamav2_kernels  # noqa: F401
+            except ImportError:
+                raise ImportError(
+                    "GPTQ quantization requires the AutoGPTQ package. "
+                    "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
+                    "Or run optimum-benchmark +install_auto_gptq=True"
+                )
+
             self.quantization_config = GPTQConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
         elif self.is_awq_quantized:
             self.logger.info("\t+ Processing AWQ config")
+
+            try:
+                import awq_ext  # noqa: F401
+            except ImportError:
+                raise ImportError(
+                    "AWQ quantization requires the AutoAWQ package. "
+                    "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
+                    "Or run optimum-benchmark +install_auto_awq=True"
+                )
+
             self.quantization_config = AwqConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index 57c6b054..014698f2 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -6,6 +6,8 @@
 from hydra.core.config_store import ConfigStore
 from omegaconf import DictConfig, OmegaConf
 
+from optimum_benchmark.install_utils import InstallConfig, install_autoawq, install_autogptq
+
 from . import (
     Benchmark,
     BenchmarkConfig,
@@ -55,10 +57,20 @@
 cs.store(group="launcher", name=ProcessConfig.name, node=ProcessConfig)
 cs.store(group="launcher", name=TorchrunConfig.name, node=TorchrunConfig)
 
+cs.store(name="install_config", node=InstallConfig)
+
 
 # optimum-benchmark
 @hydra.main(version_base=None)
 def main(config: DictConfig) -> None:
+    if "install_auto_awq_from_source" in config and config.install_auto_awq_from_source:
+        install_autoawq()
+        return
+
+    if "install_auto_gptq_from_source" in config and config.install_auto_gptq_from_source:
+        install_autogptq()
+        return
+
     log_level = os.environ.get("LOG_LEVEL", "INFO")
     log_to_file = os.environ.get("LOG_TO_FILE", "1") == "1"
     override_benchmarks = os.environ.get("OVERRIDE_BENCHMARKS", "0") == "1"
diff --git a/optimum_benchmark/install_utils.py b/optimum_benchmark/install_utils.py
new file mode 100644
index 00000000..cf004ac2
--- /dev/null
+++ b/optimum_benchmark/install_utils.py
@@ -0,0 +1,90 @@
+import os
+import re
+import subprocess
+import sys
+from dataclasses import dataclass
+
+EXTERNAL_REPOS_DIR = "external_repos"
+
+
+@dataclass
+class InstallConfig:
+    install_auto_awq_from_source: bool = False
+    install_auto_gptq_from_source: bool = False
+
+
+def remove_torch_from_setup(setup_file_path):
+    """Remove any torch requirement from the setup.py file."""
+    with open(setup_file_path, "r") as file:
+        setup_content = file.read()
+
+    # Use a regular expression to remove any line containing "torch=="
+    setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
+
+    # Write the modified content back to setup.py
+    with open(setup_file_path, "w") as file:
+        file.write(setup_content)
+
+
+def clone_or_pull_repo(repo_url, repo_location_path):
+    """Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
+    if os.path.exists(repo_location_path):
+        print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
+        subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
+    else:
+        repo_name = repo_location_path.split("/")[-1]
+        print(f"Cloning {repo_name} into {repo_location_path}")
+        subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
+
+
+def install_autogptq():
+    """Install the AutoGPTQ package from GitHub."""
+
+    print("Installing AutoGPTQ package.")
+
+    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
+
+    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
+
+    subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
+
+    subprocess.run(
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation -e .",
+        shell=True,
+        check=True,
+    )
+
+    print("AutoGPTQ package installed.")
+
+
+def install_autoawq():
+    """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
+
+    print("Installing AutoAWQ and AutoAWQ_kernels packages.")
+
+    autoawq_kernels_repo_name = "AutoAWQ_kernels"
+    autoawq_repo_name = "AutoAWQ"
+
+    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
+    autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
+
+    kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
+    remove_torch_from_setup(kernels_setup_file_path)
+    subprocess.run(
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
+
+    autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
+    remove_torch_from_setup(autoawq_setup_file_path)
+    subprocess.run(
+        f"cd {autoawq_repo_path} && {sys.executable} -m pip install -e .", shell=True, check=True, env=os.environ
+    )
+
+    print("AutoAWQ and AutoAWQ_kernels packages installed.")
diff --git a/setup.py b/setup.py
index ba2a76d3..3d3f4fd4 100644
--- a/setup.py
+++ b/setup.py
@@ -59,15 +59,10 @@
         )
 
 
-
 if USE_ROCM:
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
 
-else:
-    # AUTOAWQ will be installed from source via scripts/install_autoawq.py script
-    AUTOGPTQ = "auto-gptq@git+https://github.com/PanQiWei/AutoGPTQ.git"
-
 EXTRAS_REQUIRE = {
     "quality": ["ruff"],
     "testing": ["pytest", "hydra-joblib-launcher"],
@@ -84,7 +79,7 @@
     "vllm": ["vllm"],
     # optional dependencies
     "autoawq": [],
-    "auto-gptq": ["optimum", AUTOGPTQ],
+    "auto-gptq": ["optimum"],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
     "codecarbon": ["codecarbon"],
@@ -97,7 +92,7 @@
 
 if USE_ROCM:
     EXTRAS_REQUIRE["autoawq"] = [AUTOAWQ]
-
+    EXTRAS_REQUIRE["auto-gptq"].append(AUTOGPTQ)
 
 setup(
     packages=find_packages(),

From 1581df8d94dc14b969a699fa12513f14c50aaed3 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Thu, 15 Aug 2024 12:56:47 +0000
Subject: [PATCH 14/44] udpate cli argument

---
 optimum_benchmark/backends/pytorch/backend.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 830aa30c..1992720f 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or run optimum-benchmark +install_auto_gptq=True"
+                    "Or run optimum-benchmark +install_auto_gptq_from_source=True"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or run optimum-benchmark +install_auto_awq=True"
+                    "Or run optimum-benchmark +install_auto_awq_from_source=True"
                 )
 
             self.quantization_config = AwqConfig(

From b2729946af92ab11bb824ec2fee3397a076df99d Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 07:37:42 +0000
Subject: [PATCH 15/44] add quanization lib to an install script

---
 Makefile                                      |  3 +-
 optimum_benchmark/backends/pytorch/backend.py |  4 +-
 optimum_benchmark/cli.py                      | 12 ---
 optimum_benchmark/install_utils.py            | 90 -------------------
 4 files changed, 3 insertions(+), 106 deletions(-)
 delete mode 100644 optimum_benchmark/install_utils.py

diff --git a/Makefile b/Makefile
index 2692984c..e78eafe0 100644
--- a/Makefile
+++ b/Makefile
@@ -109,8 +109,7 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	optimum-benchmark +install_auto_awq_from_source=True
-	optimum-benchmark +install_auto_gptq_from_source=True
+	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq --install-auto-gptq
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 1992720f..22259e09 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or run optimum-benchmark +install_auto_gptq_from_source=True"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-auto-gptq`"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or run optimum-benchmark +install_auto_awq_from_source=True"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq`"
                 )
 
             self.quantization_config = AwqConfig(
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index 014698f2..57c6b054 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -6,8 +6,6 @@
 from hydra.core.config_store import ConfigStore
 from omegaconf import DictConfig, OmegaConf
 
-from optimum_benchmark.install_utils import InstallConfig, install_autoawq, install_autogptq
-
 from . import (
     Benchmark,
     BenchmarkConfig,
@@ -57,20 +55,10 @@
 cs.store(group="launcher", name=ProcessConfig.name, node=ProcessConfig)
 cs.store(group="launcher", name=TorchrunConfig.name, node=TorchrunConfig)
 
-cs.store(name="install_config", node=InstallConfig)
-
 
 # optimum-benchmark
 @hydra.main(version_base=None)
 def main(config: DictConfig) -> None:
-    if "install_auto_awq_from_source" in config and config.install_auto_awq_from_source:
-        install_autoawq()
-        return
-
-    if "install_auto_gptq_from_source" in config and config.install_auto_gptq_from_source:
-        install_autogptq()
-        return
-
     log_level = os.environ.get("LOG_LEVEL", "INFO")
     log_to_file = os.environ.get("LOG_TO_FILE", "1") == "1"
     override_benchmarks = os.environ.get("OVERRIDE_BENCHMARKS", "0") == "1"
diff --git a/optimum_benchmark/install_utils.py b/optimum_benchmark/install_utils.py
deleted file mode 100644
index cf004ac2..00000000
--- a/optimum_benchmark/install_utils.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import os
-import re
-import subprocess
-import sys
-from dataclasses import dataclass
-
-EXTERNAL_REPOS_DIR = "external_repos"
-
-
-@dataclass
-class InstallConfig:
-    install_auto_awq_from_source: bool = False
-    install_auto_gptq_from_source: bool = False
-
-
-def remove_torch_from_setup(setup_file_path):
-    """Remove any torch requirement from the setup.py file."""
-    with open(setup_file_path, "r") as file:
-        setup_content = file.read()
-
-    # Use a regular expression to remove any line containing "torch=="
-    setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
-
-    # Write the modified content back to setup.py
-    with open(setup_file_path, "w") as file:
-        file.write(setup_content)
-
-
-def clone_or_pull_repo(repo_url, repo_location_path):
-    """Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
-    if os.path.exists(repo_location_path):
-        print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
-        subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
-    else:
-        repo_name = repo_location_path.split("/")[-1]
-        print(f"Cloning {repo_name} into {repo_location_path}")
-        subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
-
-
-def install_autogptq():
-    """Install the AutoGPTQ package from GitHub."""
-
-    print("Installing AutoGPTQ package.")
-
-    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
-
-    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
-
-    subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
-
-    subprocess.run(
-        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation -e .",
-        shell=True,
-        check=True,
-    )
-
-    print("AutoGPTQ package installed.")
-
-
-def install_autoawq():
-    """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
-
-    print("Installing AutoAWQ and AutoAWQ_kernels packages.")
-
-    autoawq_kernels_repo_name = "AutoAWQ_kernels"
-    autoawq_repo_name = "AutoAWQ"
-
-    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
-    autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
-
-    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
-
-    kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
-    remove_torch_from_setup(kernels_setup_file_path)
-    subprocess.run(
-        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
-        shell=True,
-        check=True,
-        env=os.environ,
-    )
-
-    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
-
-    autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
-    remove_torch_from_setup(autoawq_setup_file_path)
-    subprocess.run(
-        f"cd {autoawq_repo_path} && {sys.executable} -m pip install -e .", shell=True, check=True, env=os.environ
-    )
-
-    print("AutoAWQ and AutoAWQ_kernels packages installed.")

From 1eee838ac11e3f4e10caaf5dea07057dc36c1385 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 07:39:23 +0000
Subject: [PATCH 16/44] fix style

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3d3f4fd4..c4e9debd 100644
--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,6 @@
             "Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature."
         )
 
-
 if USE_ROCM:
     AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
     AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"

From 8f68fa478621221f5497ee76ccdbe5410292dd3a Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 07:42:10 +0000
Subject: [PATCH 17/44] fix typos

---
 Makefile                                      | 2 +-
 optimum_benchmark/backends/pytorch/backend.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index e78eafe0..3594dd3d 100644
--- a/Makefile
+++ b/Makefile
@@ -109,7 +109,7 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq --install-auto-gptq
+	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs.py | python - --install-autoawq-from-source --install-autogptq-from-source
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 22259e09..b1a6c74d 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-auto-gptq`"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autogptq-from-source`"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/total_tests_runs.py | python - --install-autoawq`"
+                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autoawq-from-source`"
                 )
 
             self.quantization_config = AwqConfig(

From 5aa6ecdc3e84d2a7d5ad330a0577f0c038bfcd11 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 08:35:06 +0000
Subject: [PATCH 18/44] add install script

---
 scripts/install_quantization_libs.py | 110 +++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 scripts/install_quantization_libs.py

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
new file mode 100644
index 00000000..e46eff83
--- /dev/null
+++ b/scripts/install_quantization_libs.py
@@ -0,0 +1,110 @@
+import argparse
+import os
+import re
+import subprocess
+import sys
+
+EXTERNAL_REPOS_DIR = "external_repos"
+
+
+def remove_torch_from_setup(setup_file_path):
+    """Remove any torch requirement from the setup.py file."""
+    with open(setup_file_path, "r") as file:
+        setup_content = file.read()
+
+    # Use a regular expression to remove any line containing "torch=="
+    setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
+
+    # Write the modified content back to setup.py
+    with open(setup_file_path, "w") as file:
+        file.write(setup_content)
+
+
+def clone_or_pull_repo(repo_url, repo_location_path):
+    """Clone the repo if it doesn't exist; otherwise, pull the latest changes."""
+    if os.path.exists(repo_location_path):
+        print(f"Directory {repo_location_path} already exists. Pulling the latest changes.")
+        subprocess.run(f"cd {repo_location_path} && git pull", shell=True, check=True)
+    else:
+        repo_name = repo_location_path.split("/")[-1]
+        print(f"Cloning {repo_name} into {repo_location_path}")
+        subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
+
+
+def install_autogptq_from_source():
+    """Install the AutoGPTQ package from GitHub."""
+
+    print("Installing AutoGPTQ package.")
+
+    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
+
+    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
+
+    subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
+
+    subprocess.run(
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation -e .",
+        shell=True,
+        check=True,
+    )
+
+    print("AutoGPTQ package installed.")
+
+
+def install_autoawq_from_source():
+    """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
+
+    print("Installing AutoAWQ and AutoAWQ_kernels packages.")
+
+    autoawq_kernels_repo_name = "AutoAWQ_kernels"
+    autoawq_repo_name = "AutoAWQ"
+
+    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
+    autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
+
+    kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
+    remove_torch_from_setup(kernels_setup_file_path)
+    subprocess.run(
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
+
+    autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
+    remove_torch_from_setup(autoawq_setup_file_path)
+    subprocess.run(
+        f"cd {autoawq_repo_path} && {sys.executable} -m pip install -e .", shell=True, check=True, env=os.environ
+    )
+
+    print("AutoAWQ and AutoAWQ_kernels packages installed.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Install AutoAWQ or AutoGPTQ from source.")
+    parser.add_argument(
+        "--install-autoawq-from-source",
+        action="store_true",
+        help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
+    )
+    parser.add_argument(
+        "--install-autogptq-from-source", action="store_true", help="Install AutoGPTQ package from source."
+    )
+
+    args = parser.parse_args()
+
+    if args.install_autoawq_from_source:
+        install_autoawq_from_source()
+    elif args.install_autogptq_from_source:
+        install_autogptq_from_source()
+    else:
+        print("Please specify an installation option. Use --install-autoawq or --install-autogptq.")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

From 78b3ad7bb05b63780218974e63f7a21ffb150422 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 08:38:34 +0000
Subject: [PATCH 19/44] fix typo

---
 scripts/install_quantization_libs.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index e46eff83..39358fa6 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -33,9 +33,7 @@ def clone_or_pull_repo(repo_url, repo_location_path):
 
 def install_autogptq_from_source():
     """Install the AutoGPTQ package from GitHub."""
-
     print("Installing AutoGPTQ package.")
-
     autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
 
     clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
@@ -53,7 +51,6 @@ def install_autogptq_from_source():
 
 def install_autoawq_from_source():
     """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
-
     print("Installing AutoAWQ and AutoAWQ_kernels packages.")
 
     autoawq_kernels_repo_name = "AutoAWQ_kernels"
@@ -102,7 +99,9 @@ def main():
     elif args.install_autogptq_from_source:
         install_autogptq_from_source()
     else:
-        print("Please specify an installation option. Use --install-autoawq or --install-autogptq.")
+        print(
+            "Please specify an installation option. Use --install-autoawq-from-source or --install-autogptq-from-source."
+        )
         sys.exit(1)
 
 

From 9bfb22ffe53c0e54256613f733a3c3aa544e3c51 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Fri, 16 Aug 2024 09:02:29 +0000
Subject: [PATCH 20/44] update installation script

---
 Makefile                                      | 2 +-
 optimum_benchmark/backends/pytorch/backend.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 3594dd3d..dc749225 100644
--- a/Makefile
+++ b/Makefile
@@ -109,7 +109,7 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs.py | python - --install-autoawq-from-source --install-autogptq-from-source
+	python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index b1a6c74d..dd0523f9 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -293,7 +293,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "GPTQ quantization requires the AutoGPTQ package. "
                     "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autogptq-from-source`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` from our repository"
                 )
 
             self.quantization_config = GPTQConfig(
@@ -308,7 +308,7 @@ def process_quantization_config(self) -> None:
                 raise ImportError(
                     "AWQ quantization requires the AutoAWQ package. "
                     "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or `curl -s https://raw.githubusercontent.com/huggingface/optimum-benchmark/main/scripts/install_quantization_libs | python - --install-autoawq-from-source`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` from our repository"
                 )
 
             self.quantization_config = AwqConfig(

From fd4992e8d957158e63923614b8e3409d53ed9920 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Mon, 19 Aug 2024 13:02:40 +0000
Subject: [PATCH 21/44] fix failing test

---
 .github/workflows/test_cli_cuda_pytorch_single_gpu.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
index 0a404bd0..534c7713 100644
--- a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
@@ -49,4 +49,5 @@ jobs:
             --workdir /workspace
           run: |
             pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq]
+            python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
             pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (awq)"

From e9f4e0e899171b33c1d1dc637a70f4866b53f63c Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Mon, 19 Aug 2024 13:09:56 +0000
Subject: [PATCH 22/44] fix failing test

---
 .github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml  | 1 +
 .github/workflows/test_cli_rocm_pytorch_single_gpu.yaml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
index acf519c8..6808edc5 100644
--- a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
@@ -64,4 +64,5 @@ jobs:
             --workdir /workspace
           run: |
             pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq]
+            python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
             pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
diff --git a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
index ca6096b2..6d08541a 100644
--- a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
@@ -60,4 +60,5 @@ jobs:
             --workdir /workspace
           run: |
             pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
+            python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
             pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"

From e28815907c9ab450b24600570c156f51d8c78d1b Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Mon, 19 Aug 2024 13:21:59 +0000
Subject: [PATCH 23/44] fix failing test

---
 .github/workflows/test_cli_rocm_pytorch_single_gpu.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
index 6d08541a..ca6096b2 100644
--- a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
@@ -60,5 +60,4 @@ jobs:
             --workdir /workspace
           run: |
             pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
-            python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
             pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"

From 72921e3cc16ee3a8ae271051a3da30682e5990fe Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Mon, 19 Aug 2024 14:21:50 +0000
Subject: [PATCH 24/44] fix failing test

---
 .github/workflows/test_cli_cuda_pytorch_single_gpu.yaml | 2 +-
 .github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml  | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
index 534c7713..fe4462a1 100644
--- a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
@@ -50,4 +50,4 @@ jobs:
           run: |
             pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq]
             python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
-            pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (awq)"
+            pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (awq)" --ignore=external_repos
diff --git a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
index 6808edc5..acf519c8 100644
--- a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
@@ -64,5 +64,4 @@ jobs:
             --workdir /workspace
           run: |
             pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq]
-            python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
             pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"

From 154ad41e95291c95b8011c0802e36f951705e536 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Tue, 20 Aug 2024 05:01:58 +0000
Subject: [PATCH 25/44] fix auto-gptq install

---
 scripts/install_quantization_libs.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index 39358fa6..140767dd 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -89,16 +89,19 @@ def main():
         help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
     )
     parser.add_argument(
-        "--install-autogptq-from-source", action="store_true", help="Install AutoGPTQ package from source."
+        "--install-autogptq-from-source", 
+        action="store_true", 
+        help="Install AutoGPTQ package from source."
     )
 
     args = parser.parse_args()
 
     if args.install_autoawq_from_source:
         install_autoawq_from_source()
-    elif args.install_autogptq_from_source:
+    if  args.install_autogptq_from_source:
         install_autogptq_from_source()
-    else:
+
+    if not args.install_autoawq_from_source and not args.install_autogptq_from_source:
         print(
             "Please specify an installation option. Use --install-autoawq-from-source or --install-autogptq-from-source."
         )

From aad635b9a28ed1957146eac017625dd6241d3b56 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Tue, 20 Aug 2024 05:03:54 +0000
Subject: [PATCH 26/44] fix style

---
 scripts/install_quantization_libs.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index 140767dd..eb910dc3 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -89,16 +89,14 @@ def main():
         help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
     )
     parser.add_argument(
-        "--install-autogptq-from-source", 
-        action="store_true", 
-        help="Install AutoGPTQ package from source."
+        "--install-autogptq-from-source", action="store_true", help="Install AutoGPTQ package from source."
     )
 
     args = parser.parse_args()
 
     if args.install_autoawq_from_source:
         install_autoawq_from_source()
-    if  args.install_autogptq_from_source:
+    if args.install_autogptq_from_source:
         install_autogptq_from_source()
 
     if not args.install_autoawq_from_source and not args.install_autogptq_from_source:

From 9e9e526f8853827e9f75926665dbd54e95404868 Mon Sep 17 00:00:00 2001
From: baptiste <collebaptiste@gmail.com>
Date: Wed, 21 Aug 2024 19:32:33 +0000
Subject: [PATCH 27/44] fix dockerfile

---
 Makefile               | 4 ++--
 docker/cuda/Dockerfile | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index dc749225..5c63df66 100644
--- a/Makefile
+++ b/Makefile
@@ -23,7 +23,7 @@ build_cpu_image:
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
-	docker build -t optimum-benchmark:latest-cuda docker/cuda
+	docker build -t optimum-benchmark:latest-cuda -f docker/cuda/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
@@ -109,7 +109,7 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
+	# python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index decbb345..9dc19e41 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -19,7 +19,7 @@ ARG UBUNTU_VERSION=22.04
 FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}
 
 # Install necessary packages
-ENV DEBIAN_FRONTEND noninteractive
+ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y --no-install-recommends \
     sudo build-essential git bash-completion \
     python3.10 python3-pip python3.10-dev && \
@@ -37,4 +37,9 @@ elif [ "${TORCH_VERSION}" = "nighly" ]; then \
     pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
 else \
     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
-fi
\ No newline at end of file
+fi
+
+# Install quantization libraries from source
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
+RUN python internal/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
\ No newline at end of file

From 120acc7c586ce70120009b81c027ab9749f3d5cf Mon Sep 17 00:00:00 2001
From: Baptiste Colle <32412211+baptistecolle@users.noreply.github.com>
Date: Thu, 22 Aug 2024 12:42:15 +0200
Subject: [PATCH 28/44] update arch list

---
 docker/cuda/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index 9dc19e41..650f4a48 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -40,6 +40,6 @@ else \
 fi
 
 # Install quantization libraries from source
-ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
+ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
 COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
-RUN python internal/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
\ No newline at end of file
+RUN python internal/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source

From a50a5ef25eb16acbb25449059e917fcaf4cb8af0 Mon Sep 17 00:00:00 2001
From: Baptiste Colle <32412211+baptistecolle@users.noreply.github.com>
Date: Thu, 22 Aug 2024 12:48:09 +0200
Subject: [PATCH 29/44] Update torch arch list

---
 docker/cuda/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index 650f4a48..b37ba7a2 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -40,6 +40,6 @@ else \
 fi
 
 # Install quantization libraries from source
-ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
+ENV TORCH_CUDA_ARCH_LIST="5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
 COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
 RUN python internal/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source

From dc8f7630b28ab8a27f9e9ce7a7d5e3350c311468 Mon Sep 17 00:00:00 2001
From: Baptiste Colle <32412211+baptistecolle@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:22:58 +0200
Subject: [PATCH 30/44] Remove commented code

---
 Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5c63df66..15146f49 100644
--- a/Makefile
+++ b/Makefile
@@ -109,7 +109,6 @@ install_cli_cpu_neural_compressor:
 
 install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
-	# python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
 
 install_cli_rocm_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]

From 49588001fce524c4cf465ab1221536caeeba3756 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:00 +0200
Subject: [PATCH 31/44] Update scripts/install_quantization_libs.py

---
 scripts/install_quantization_libs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index eb910dc3..a101d3a3 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -75,7 +75,7 @@ def install_autoawq_from_source():
     autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
     remove_torch_from_setup(autoawq_setup_file_path)
     subprocess.run(
-        f"cd {autoawq_repo_path} && {sys.executable} -m pip install -e .", shell=True, check=True, env=os.environ
+        f"cd {autoawq_repo_path} && {sys.executable} -m pip install .", shell=True, check=True, env=os.environ
     )
 
     print("AutoAWQ and AutoAWQ_kernels packages installed.")

From 072c4931ff1eef9b029a586a07c9485e13b287fe Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:08 +0200
Subject: [PATCH 32/44] Update
 .github/workflows/test_cli_cuda_pytorch_single_gpu.yaml

---
 .github/workflows/test_cli_cuda_pytorch_single_gpu.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
index fe4462a1..b8fe985e 100644
--- a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
@@ -50,4 +50,4 @@ jobs:
           run: |
             pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq]
             python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
-            pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (awq)" --ignore=external_repos
+            pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"

From 3545bc8db26899934ce45e3c9e5327c47e45f24f Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:16 +0200
Subject: [PATCH 33/44] Update Makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 15146f49..1e0edc05 100644
--- a/Makefile
+++ b/Makefile
@@ -158,7 +158,7 @@ test_cli_cuda_pytorch_multi_gpu:
 	pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not awq"
 
 test_cli_cuda_pytorch_single_gpu:
-	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not awq" --ignore=external_repos
+	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not awq"
 
 test_cli_cuda_torch_ort_multi_gpu:
 	pytest -s -k "cli and cuda and torch-ort and (dp or ddp or device_map or deepspeed) and not peft"

From 9103b39d0d51657e2faa51d636854343e9df020e Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:22 +0200
Subject: [PATCH 34/44] Update scripts/install_quantization_libs.py

---
 scripts/install_quantization_libs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index a101d3a3..19afa472 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -64,7 +64,7 @@ def install_autoawq_from_source():
     kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
     remove_torch_from_setup(kernels_setup_file_path)
     subprocess.run(
-        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation .",
         shell=True,
         check=True,
         env=os.environ,

From 08a5c42495390efcb5cb53419985a0a07c93e13b Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:31 +0200
Subject: [PATCH 35/44] Update scripts/install_quantization_libs.py

---
 scripts/install_quantization_libs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index 19afa472..9dcb460d 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -41,7 +41,7 @@ def install_autogptq_from_source():
     subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
 
     subprocess.run(
-        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation -e .",
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation .",
         shell=True,
         check=True,
     )

From 32d8f9b00d9defa010316159a508401ea2c66e5f Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:44 +0200
Subject: [PATCH 36/44] Update setup.py

---
 setup.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/setup.py b/setup.py
index 2a603550..31ea6c8b 100644
--- a/setup.py
+++ b/setup.py
@@ -58,9 +58,6 @@
             "Please install amdsmi from https://github.com/ROCm/amdsmi to enable this feature."
         )
 
-if USE_ROCM:
-    AUTOAWQ = "autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1+rocm571-cp310-cp310-linux_x86_64.whl"
-    AUTOGPTQ = "auto-gptq@https://huggingface.github.io/autogptq-index/whl/rocm573/auto-gptq/auto_gptq-0.7.1%2Brocm5.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
 
 EXTRAS_REQUIRE = {
     "quality": ["ruff"],

From 5bfb74298df2bcc3fc9e96042adc59100789987d Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:52 +0200
Subject: [PATCH 37/44] Update setup.py

---
 setup.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/setup.py b/setup.py
index 31ea6c8b..adcd57df 100644
--- a/setup.py
+++ b/setup.py
@@ -86,9 +86,6 @@
     "peft": ["peft"],
 }
 
-if USE_ROCM:
-    EXTRAS_REQUIRE["autoawq"] = [AUTOAWQ]
-    EXTRAS_REQUIRE["auto-gptq"].append(AUTOGPTQ)
 
 setup(
     packages=find_packages(),

From c5621569801e7d4301d41954e8ceaf04617fd442 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 15:34:58 +0200
Subject: [PATCH 38/44] Update setup.py

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index adcd57df..5f6db273 100644
--- a/setup.py
+++ b/setup.py
@@ -74,8 +74,8 @@
     "py-txi": ["py-txi"],
     "vllm": ["vllm"],
     # optional dependencies
-    "autoawq": [],
-    "auto-gptq": ["optimum"],
+    "autoawq": ["autoawq"],
+    "auto-gptq": ["optimum", "auto-gptq"],
     "sentence-transformers": ["sentence-transformers"],
     "bitsandbytes": ["bitsandbytes"],
     "codecarbon": ["codecarbon"],

From b62c8caf6a5b53de39a9fc7c8d3baca75dace4b9 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 30 Aug 2024 16:25:21 +0200
Subject: [PATCH 39/44] add ninja

---
 docker/cuda-ort/Dockerfile | 2 +-
 docker/cuda/Dockerfile     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/cuda-ort/Dockerfile b/docker/cuda-ort/Dockerfile
index 056d3ebf..9b94fae7 100644
--- a/docker/cuda-ort/Dockerfile
+++ b/docker/cuda-ort/Dockerfile
@@ -39,5 +39,5 @@ else \
     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
 fi
 
-ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
+ENV TORCH_CUDA_ARCH_LIST="5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
 RUN pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index b37ba7a2..dfb84e18 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel requests
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests ninja
 
 # Install PyTorch
 ARG TORCH_CUDA=cu121

From 3b5b5e296d443dd3b02c6110f87b483c13d31b8d Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 30 Aug 2024 17:02:16 +0200
Subject: [PATCH 40/44] fix is_cpu_only

---
 scripts/install_quantization_libs.py | 62 +++++++++++++++-------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index 9dcb460d..d24a9b27 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -7,14 +7,18 @@
 EXTERNAL_REPOS_DIR = "external_repos"
 
 
-def remove_torch_from_setup(setup_file_path):
-    """Remove any torch requirement from the setup.py file."""
+def process_setup_file(setup_file_path):
     with open(setup_file_path, "r") as file:
         setup_content = file.read()
 
     # Use a regular expression to remove any line containing "torch=="
     setup_content = re.sub(r'"torch==[^\"]+",', "", setup_content)
 
+    # Set IS_CPU_ONLY to False
+    setup_content = setup_content.replace(
+        "IS_CPU_ONLY = not torch.backends.mps.is_available() and not torch.cuda.is_available()", "IS_CPU_ONLY = False"
+    )
+
     # Write the modified content back to setup.py
     with open(setup_file_path, "w") as file:
         file.write(setup_content)
@@ -31,56 +35,58 @@ def clone_or_pull_repo(repo_url, repo_location_path):
         subprocess.run(f"git clone {repo_url} {repo_location_path}", shell=True, check=True)
 
 
-def install_autogptq_from_source():
-    """Install the AutoGPTQ package from GitHub."""
-    print("Installing AutoGPTQ package.")
-    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
-
-    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
-
-    subprocess.run("pip install numpy gekko pandas", shell=True, check=True)
-
-    subprocess.run(
-        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation .",
-        shell=True,
-        check=True,
-    )
-
-    print("AutoGPTQ package installed.")
-
-
 def install_autoawq_from_source():
     """Install the AutoAWQ and AutoAWQ_kernels packages from GitHub."""
     print("Installing AutoAWQ and AutoAWQ_kernels packages.")
 
-    autoawq_kernels_repo_name = "AutoAWQ_kernels"
     autoawq_repo_name = "AutoAWQ"
+    autoawq_kernels_repo_name = "AutoAWQ_kernels"
 
-    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
     autoawq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_repo_name)
+    kernels_repo_path = os.path.join(EXTERNAL_REPOS_DIR, autoawq_kernels_repo_name)
 
     clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_kernels_repo_name}", kernels_repo_path)
-
     kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
-    remove_torch_from_setup(kernels_setup_file_path)
+    process_setup_file(kernels_setup_file_path)
     subprocess.run(
-        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation .",
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
         shell=True,
         check=True,
         env=os.environ,
     )
 
     clone_or_pull_repo(f"https://github.com/casper-hansen/{autoawq_repo_name}", autoawq_repo_path)
-
     autoawq_setup_file_path = os.path.join(autoawq_repo_path, "setup.py")
-    remove_torch_from_setup(autoawq_setup_file_path)
+    process_setup_file(autoawq_setup_file_path)
     subprocess.run(
-        f"cd {autoawq_repo_path} && {sys.executable} -m pip install .", shell=True, check=True, env=os.environ
+        f"cd {autoawq_repo_path} && {sys.executable} -m pip install .",
+        shell=True,
+        check=True,
+        env=os.environ,
     )
 
     print("AutoAWQ and AutoAWQ_kernels packages installed.")
 
 
+def install_autogptq_from_source():
+    """Install the AutoGPTQ package from GitHub."""
+    print("Installing AutoGPTQ package.")
+    autogptq_repo_path = os.path.join(EXTERNAL_REPOS_DIR, "AutoGPTQ")
+
+    clone_or_pull_repo("https://github.com/PanQiWei/AutoGPTQ.git", autogptq_repo_path)
+    subprocess.run("pip install numpy gekko pandas", shell=True, check=True, env=os.environ)
+    autogptq_setup_file_path = os.path.join(autogptq_repo_path, "setup.py")
+    process_setup_file(autogptq_setup_file_path)
+    subprocess.run(
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation .",
+        shell=True,
+        check=True,
+        env=os.environ,
+    )
+
+    print("AutoGPTQ package installed.")
+
+
 def main():
     parser = argparse.ArgumentParser(description="Install AutoAWQ or AutoGPTQ from source.")
     parser.add_argument(

From 59917832586936096cb4262774ec86654a320f51 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 30 Aug 2024 17:04:48 +0200
Subject: [PATCH 41/44] fix min compute capability for auto-gptq

---
 docker/cuda/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index dfb84e18..5218fffc 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -40,6 +40,6 @@ else \
 fi
 
 # Install quantization libraries from source
-ENV TORCH_CUDA_ARCH_LIST="5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
+ENV TORCH_CUDA_ARCH_LIST="6.0 7.0 7.5 8.0 8.6 9.0+PTX"
 COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
-RUN python internal/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
+RUN python internal/install_quantization_libs.py --install-autogptq-from-source

From 8a03ac0ddaee0c1af2858c7f8c75345e50c35cda Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 30 Aug 2024 17:06:51 +0200
Subject: [PATCH 42/44] Update
 .github/workflows/test_cli_cuda_pytorch_single_gpu.yaml

---
 .github/workflows/test_cli_cuda_pytorch_single_gpu.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
index b8fe985e..3ba94abc 100644
--- a/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch_single_gpu.yaml
@@ -49,5 +49,4 @@ jobs:
             --workdir /workspace
           run: |
             pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq]
-            python scripts/install_quantization_libs.py --install-autoawq-from-source --install-autogptq-from-source
             pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"

From 1b6659597a7cd81e6ac3cfcff32880cc773207a5 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 4 Sep 2024 09:16:15 +0000
Subject: [PATCH 43/44] fix

---
 scripts/install_quantization_libs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index d24a9b27..3e1ff9aa 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -49,7 +49,7 @@ def install_autoawq_from_source():
     kernels_setup_file_path = os.path.join(kernels_repo_path, "setup.py")
     process_setup_file(kernels_setup_file_path)
     subprocess.run(
-        f"cd {kernels_repo_path} && {sys.executable} -m pip install --no-build-isolation -e .",
+        f"cd {kernels_repo_path} && {sys.executable} -m pip install .",
         shell=True,
         check=True,
         env=os.environ,
@@ -78,7 +78,7 @@ def install_autogptq_from_source():
     autogptq_setup_file_path = os.path.join(autogptq_repo_path, "setup.py")
     process_setup_file(autogptq_setup_file_path)
     subprocess.run(
-        f"cd {autogptq_repo_path} && {sys.executable} -m pip install -vvv --no-build-isolation .",
+        f"cd {autogptq_repo_path} && {sys.executable} -m pip install .",
         shell=True,
         check=True,
         env=os.environ,

From 7d0faa81cbc45458bdc0c0caa5f52793755fd193 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 4 Sep 2024 11:58:34 +0000
Subject: [PATCH 44/44] rocm

---
 .../test_cli_rocm_pytorch_multi_gpu.yaml       |  5 +----
 .../test_cli_rocm_pytorch_single_gpu.yaml      |  5 +----
 Makefile                                       | 12 ++++++------
 docker/cuda/Dockerfile                         |  6 ++++--
 docker/rocm/Dockerfile                         | 14 +++++++++++---
 optimum_benchmark/backends/pytorch/backend.py  | 18 +++++++++++-------
 scripts/install_quantization_libs.py           |  4 +++-
 7 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
index 9b8ba321..050b53e1 100644
--- a/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
@@ -49,8 +49,5 @@ jobs:
           pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15"
 
       - name: Run tests
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          PUSH_REPO_ID: optimum-benchmark/rocm
         run: |
-          pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+          pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
diff --git a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
index 3f19ad11..59fdd4f6 100644
--- a/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
@@ -49,8 +49,5 @@ jobs:
           pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
 
       - name: Run tests
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          PUSH_REPO_ID: optimum-benchmark/rocm
         run: |
-          pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)"
+          pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
diff --git a/Makefile b/Makefile
index ffa2c7c7..0e14a5db 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ install:
 ## Build docker
 
 build_cpu_image:
-	docker build -t optimum-benchmark:latest-cpu docker/cpu
+	docker build -t optimum-benchmark:latest-cpu -f docker/cpu/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cpu --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cpu docker/unroot
 
 build_cuda_image:
@@ -27,11 +27,11 @@ build_cuda_image:
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda docker/unroot
 
 build_cuda_ort_image:
-	docker build -t optimum-benchmark:latest-cuda-ort docker/cuda-ort
+	docker build -t optimum-benchmark:latest-cuda-ort -f docker/cuda-ort/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-cuda-ort --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-cuda-ort docker/unroot
 
 build_rocm_image:
-	docker build -t optimum-benchmark:latest-rocm docker/rocm
+	docker build -t optimum-benchmark:latest-rocm -f docker/rocm/Dockerfile .
 	docker build --build-arg IMAGE=optimum-benchmark:latest-rocm --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t optimum-benchmark:latest-rocm docker/unroot
 
 # Run docker
@@ -111,7 +111,7 @@ install_cli_cuda_pytorch:
 	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
 
 install_cli_rocm_pytorch:
-	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
+	pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq] "deepspeed<0.15"
 
 install_cli_cuda_torch_ort:
 	pip install -e .[testing,timm,diffusers,peft,torch-ort,deepspeed]
@@ -167,10 +167,10 @@ test_cli_cuda_torch_ort_single_gpu:
 	pytest -s -k "cli and cuda and torch-ort and not (dp or ddp or device_map or deepspeed) and not peft"
 
 test_cli_rocm_pytorch_multi_gpu:
-	pytest -s -k "cli and rocm and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+	pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb"
 
 test_cli_rocm_pytorch_single_gpu:
-	pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
+	pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb"
 
 test_cli_llama_cpp:
 	pytest -s -k "llama_cpp"
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index c45ab973..fa43fa49 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -24,7 +24,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel requests ninja
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests
 
 # Install PyTorch
 ARG TORCH_CUDA=cu124
@@ -39,6 +39,8 @@ else \
 fi
 
 # Install quantization libraries from source
+ENV CUDA_VERSION=12.4
 ENV TORCH_CUDA_ARCH_LIST="6.0 7.0 7.5 8.0 8.6 9.0+PTX"
+
 COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
-RUN python internal/install_quantization_libs.py --install-autogptq-from-source
+RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile
index 7dfd3719..611d058a 100644
--- a/docker/rocm/Dockerfile
+++ b/docker/rocm/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG ROCM_VERSION=6.1.2
+ARG ROCM_VERSION=5.7.1
 ARG UBUNTU_VERSION=22.04
 
 FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
@@ -21,15 +21,16 @@ FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
 ENV PATH="/opt/rocm/bin:${PATH}"
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ 
+    rocsparse-dev hipsparse-dev rocthrust-dev rocblas-dev hipblas-dev \
     sudo build-essential git bash-completion \
     python3.10 python3-pip python3.10-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/* && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
-    pip install --no-cache-dir --upgrade pip setuptools wheel  && \
+    pip install --no-cache-dir --upgrade pip setuptools wheel requests && \
     cd /opt/rocm/share/amd_smi && pip install .
 
 # Install PyTorch
-ARG TORCH_ROCM=rocm6.1
+ARG TORCH_ROCM=rocm5.7
 ARG TORCH_VERSION=stable
 
 RUN if [ "${TORCH_VERSION}" = "stable" ]; then \
@@ -39,3 +40,10 @@ elif [ "${TORCH_VERSION}" = "nightly" ]; then \
 else \
     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
 fi
+
+# Install quantization libraries from source
+ENV ROCM_VERSION=5.7
+ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
+
+COPY scripts/install_quantization_libs.py /internal/install_quantization_libs.py
+RUN python internal/install_quantization_libs.py --install-autogptq-from-source --install-autoawq-from-source
\ No newline at end of file
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index dd0523f9..fcf522b5 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -291,9 +291,11 @@ def process_quantization_config(self) -> None:
                 import exllamav2_kernels  # noqa: F401
             except ImportError:
                 raise ImportError(
-                    "GPTQ quantization requires the AutoGPTQ package. "
-                    "Please install it from source at `https://github.com/AutoGPTQ/AutoGPTQ`"
-                    "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` from our repository"
+                    "Tried to import `exllamav2_kernels` but failed. "
+                    "This means that the AutoGPTQ package is either not installed or not compiled with the right torch version. "
+                    "Please install it from source following the instructions at `https://github.com/AutoGPTQ/AutoGPTQ`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autogptq-from-source` in "
+                    "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
                 )
 
             self.quantization_config = GPTQConfig(
@@ -303,12 +305,14 @@ def process_quantization_config(self) -> None:
             self.logger.info("\t+ Processing AWQ config")
 
             try:
-                import awq_ext  # noqa: F401
+                import exlv2_ext  # noqa: F401
             except ImportError:
                 raise ImportError(
-                    "AWQ quantization requires the AutoAWQ package. "
-                    "Please install it from source at `https://github.com/casper-hansen/AutoAWQ`"
-                    "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` from our repository"
+                    "Tried to import `exlv2_ext` but failed. "
+                    "This means that the AutoAWQ package is either not installed or not compiled with the right torch version. "
+                    "Please install it from source following the instructions at `https://github.com/casper-hansen/AutoAWQ`"
+                    "Or use `python scripts/install_quantization_libs.py --install-autoawq-from-source` in "
+                    "`optimum-benchmark` repository at `https://github.com/huggingface/optimum-benchmark`."
                 )
 
             self.quantization_config = AwqConfig(
diff --git a/scripts/install_quantization_libs.py b/scripts/install_quantization_libs.py
index 3e1ff9aa..9feebbc1 100644
--- a/scripts/install_quantization_libs.py
+++ b/scripts/install_quantization_libs.py
@@ -95,7 +95,9 @@ def main():
         help="Install AutoAWQ and AutoAWQ_kernels packages from source.",
     )
     parser.add_argument(
-        "--install-autogptq-from-source", action="store_true", help="Install AutoGPTQ package from source."
+        "--install-autogptq-from-source",
+        action="store_true",
+        help="Install AutoGPTQ package from source.",
     )
 
     args = parser.parse_args()