From 24377b8ba4512a5ea8675f5d2754cf6e54e6b615 Mon Sep 17 00:00:00 2001
From: Ali Abdelkader <aliabdelkader@users.noreply.github.com>
Date: Sat, 24 Feb 2024 04:38:24 +0200
Subject: [PATCH 1/6] add test configurations for quantization with
 onnxruntime, awq, bnb

- add test configurations for onnxruntime backend
- add no_weights sweep to quantization tests
- add test configuration for pytorch awq quantization
- add test configuration for bnb quantization
- add autoawq and bitsandbytes libraries to optional install requirements
- update makefile and github workflow so that the autoawq & bnb get installed and their related tests run successfully locally and on CI
---
 .github/workflows/test_cli_cuda_pytorch.yaml  |  2 +-
 Makefile                                      |  2 +-
 setup.py                                      |  2 ++
 tests/configs/_no_weights_sweep_.yaml         |  4 +++
 ...nference_onnxruntime_static_quant_vit.yaml | 20 +++++++++++++++
 ...nference_onnxruntime_static_quant_vit.yaml | 20 +++++++++++++++
 .../cuda_inference_pytorch_awq_exllama.yaml   | 25 +++++++++++++++++++
 .../cuda_inference_pytorch_gpt_bnb_4bit.yaml  | 17 +++++++++++++
 .../cuda_inference_pytorch_gpt_bnb_8bit.yaml  | 16 ++++++++++++
 9 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 tests/configs/_no_weights_sweep_.yaml
 create mode 100644 tests/configs/cpu_inference_onnxruntime_static_quant_vit.yaml
 create mode 100644 tests/configs/cuda_inference_onnxruntime_static_quant_vit.yaml
 create mode 100644 tests/configs/cuda_inference_pytorch_awq_exllama.yaml
 create mode 100644 tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml
 create mode 100644 tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml

diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
index 204722db..3d8a8b0b 100644
--- a/.github/workflows/test_cli_cuda_pytorch.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -53,4 +53,4 @@ jobs:
           --workdir /workspace/optimum-benchmark
           --entrypoint /bin/bash
           opt-bench-cuda:${{ matrix.image.cuda_version }}
-          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest -k 'cli and cuda and pytorch' -x"
+          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,bitsandbytes] && pytest -k 'cli and cuda and pytorch' -x"
diff --git a/Makefile b/Makefile
index 468cccd0..e511be9a 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ CLI_MISC_REQS := testing
 
 CLI_CUDA_ONNXRUNTIME_REQS := testing,timm,diffusers
 CLI_ROCM_ONNXRUNTIME_REQS := testing,timm,diffusers
-CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
+CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,autoawq,bitsandbytes
 CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
 
 CLI_CPU_OPENVINO_REQS := testing,openvino,timm,diffusers
diff --git a/setup.py b/setup.py
index 50dc0528..1fcff05b 100644
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,8 @@
     "diffusers": ["diffusers"],
     "timm": ["timm"],
     "peft": ["peft"],
+    "autoawq": ["autoawq"],
+    "bitsandbytes": ["bitsandbytes"],
 }
 
 
diff --git a/tests/configs/_no_weights_sweep_.yaml b/tests/configs/_no_weights_sweep_.yaml
new file mode 100644
index 00000000..b982009c
--- /dev/null
+++ b/tests/configs/_no_weights_sweep_.yaml
@@ -0,0 +1,4 @@
+hydra:
+  sweeper:
+    params:
+      backend.no_weights: true,false
diff --git a/tests/configs/cpu_inference_onnxruntime_static_quant_vit.yaml b/tests/configs/cpu_inference_onnxruntime_static_quant_vit.yaml
new file mode 100644
index 00000000..b3bf7c63
--- /dev/null
+++ b/tests/configs/cpu_inference_onnxruntime_static_quant_vit.yaml
@@ -0,0 +1,20 @@
+defaults:
+  - backend: onnxruntime
+  # order of inheritance, last one overrides previous ones
+  - _base_ # inherits from base config
+  - _inference_ # inherits from inference config
+  - _cpu_ # inherits from cpu config
+  - _no_weights_sweep_ # sweep over no_weights: true,false
+  - _self_ # hydra 1.1 compatibility
+
+experiment_name: cpu_inference_onnxruntime_static_quant_vit
+
+backend:
+  model: google/vit-base-patch16-224
+  quantization: true
+  quantization_config:
+    is_static: true
+    per_channel: false
+
+  calibration: true
+
diff --git a/tests/configs/cuda_inference_onnxruntime_static_quant_vit.yaml b/tests/configs/cuda_inference_onnxruntime_static_quant_vit.yaml
new file mode 100644
index 00000000..382fabcb
--- /dev/null
+++ b/tests/configs/cuda_inference_onnxruntime_static_quant_vit.yaml
@@ -0,0 +1,20 @@
+defaults:
+  - backend: onnxruntime
+  # order of inheritance, last one overrides previous ones
+  - _base_ # inherits from base config
+  - _inference_ # inherits from inference config
+  - _cuda_ # inherits from cuda config
+  - _no_weights_sweep_ # sweep over no_weights: true,false
+  - _self_ # hydra 1.1 compatibility
+
+experiment_name: cuda_inference_onnxruntime_static_quant_vit
+
+backend:
+  model: google/vit-base-patch16-224
+  quantization: true
+  quantization_config:
+    is_static: true
+    per_channel: false
+
+  calibration: true
+
diff --git a/tests/configs/cuda_inference_pytorch_awq_exllama.yaml b/tests/configs/cuda_inference_pytorch_awq_exllama.yaml
new file mode 100644
index 00000000..8dfc0064
--- /dev/null
+++ b/tests/configs/cuda_inference_pytorch_awq_exllama.yaml
@@ -0,0 +1,25 @@
+defaults:
+  - backend: pytorch
+  - _base_ # inherits from base config
+  - _inference_ # inherits from inference config
+  - _cuda_ # inherits from cuda config
+  - _no_weights_sweep_ # sweep over no_weights: true,false
+  - _self_ # hydra 1.1 compatibility
+
+experiment_name: cuda_inference_pytorch_awq_exllama
+
+benchmark:
+  input_shapes:
+    batch_size: 4
+    sequence_length: 128
+
+  generate_kwargs:
+    max_new_tokens: 128
+    min_new_tokens: 128
+
+backend:
+  model: TheBloke/Mistral-7B-Instruct-v0.1-AWQ
+  quantization_scheme: "awq"
+  quantization_config:
+    exllama_config:
+      version:  2
diff --git a/tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml b/tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml
new file mode 100644
index 00000000..a1db9e09
--- /dev/null
+++ b/tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml
@@ -0,0 +1,17 @@
+defaults:
+  - backend: pytorch
+  - _base_ # inherits from base config
+  - _inference_ # inherits from inference config
+  - _cuda_ # inherits from cuda config
+  - _gpt_ # use gpt2 model
+  - _no_weights_sweep_ # sweep no_weights: true, false
+  - _self_ # hydra 1.1 compatibility
+
+experiment_name: cuda_inference_pytorch_gpt_bnb_4bit
+
+backend:
+  quantization_scheme: "bnb"
+  quantization_config:
+    load_in_4bit: true
+    llm_int8_threshold: 6.0
+    bnb_4bit_compute_dtype: float16
diff --git a/tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml b/tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml
new file mode 100644
index 00000000..c6686580
--- /dev/null
+++ b/tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml
@@ -0,0 +1,16 @@
+defaults:
+  - backend: pytorch
+  - _base_ # inherits from base config
+  - _inference_ # inherits from inference config
+  - _cuda_ # inherits from cuda config
+  - _gpt_ # use gpt2 model
+  - _no_weights_sweep_ # sweep no_weights: true, false
+  - _self_ # hydra 1.1 compatibility
+
+experiment_name: cuda_inference_pytorch_gpt_bnb_8bit
+
+backend:
+  quantization_scheme: "bnb"
+  quantization_config:
+    load_in_8bit: true
+    llm_int8_threshold: 6.0

From 746532f42fcfe61fb1fd89989edc6a15040405de Mon Sep 17 00:00:00 2001
From: Ali Abdelkader <aliabdelkader@users.noreply.github.com>
Date: Tue, 5 Mar 2024 15:55:25 +0200
Subject: [PATCH 2/6] set bnb llm_int8_threshold to 0 for reproducibility in
 tests

---
 tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml | 1 -
 tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml b/tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml
index a1db9e09..0769a008 100644
--- a/tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml
+++ b/tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml
@@ -13,5 +13,4 @@ backend:
   quantization_scheme: "bnb"
   quantization_config:
     load_in_4bit: true
-    llm_int8_threshold: 6.0
     bnb_4bit_compute_dtype: float16
diff --git a/tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml b/tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml
index c6686580..21e528e6 100644
--- a/tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml
+++ b/tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml
@@ -13,4 +13,3 @@ backend:
   quantization_scheme: "bnb"
   quantization_config:
     load_in_8bit: true
-    llm_int8_threshold: 6.0

From 13cd173214cddd920c852b43a36d4c037ce30f58 Mon Sep 17 00:00:00 2001
From: Ali Abdelkader <aliabdelkader@users.noreply.github.com>
Date: Tue, 5 Mar 2024 22:54:18 +0200
Subject: [PATCH 3/6] handle installation of autoawq bitsandbytes without
 setup.py

- remove autoawq bitsandbytes from setupy.py
- add autoawq bitsandbytes to github workflows and makefie
---
 .github/workflows/test_cli_cuda_pytorch.yaml |  2 +-
 .github/workflows/test_cli_rocm_pytorch.yaml |  2 +-
 Makefile                                     | 10 +++++-----
 setup.py                                     |  2 --
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
index 3d8a8b0b..02fe8140 100644
--- a/.github/workflows/test_cli_cuda_pytorch.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -53,4 +53,4 @@ jobs:
           --workdir /workspace/optimum-benchmark
           --entrypoint /bin/bash
           opt-bench-cuda:${{ matrix.image.cuda_version }}
-          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,bitsandbytes] && pytest -k 'cli and cuda and pytorch' -x"
+          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pip install bitsandbytes git+https://github.com/casper-hansen/AutoAWQ.git && pytest -k 'cli and cuda and pytorch' -x"
diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml
index c4ae7139..1cc27f83 100644
--- a/.github/workflows/test_cli_rocm_pytorch.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch.yaml
@@ -51,4 +51,4 @@ jobs:
           --device /dev/dri/renderD129
           --entrypoint /bin/bash
           opt-bench-rocm:${{ matrix.image.rocm_version }}
-          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest -k 'cli and cuda and pytorch' -x"
+          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pip install git+https://github.com/casper-hansen/AutoAWQ.git && pytest -k 'cli and cuda and pytorch not bnb' -x"
diff --git a/Makefile b/Makefile
index e511be9a..5dff0f9b 100644
--- a/Makefile
+++ b/Makefile
@@ -15,9 +15,9 @@ CLI_MISC_REQS := testing
 
 CLI_CUDA_ONNXRUNTIME_REQS := testing,timm,diffusers
 CLI_ROCM_ONNXRUNTIME_REQS := testing,timm,diffusers
-CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,autoawq,bitsandbytes
+CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
 CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
-
+CLI_CUDA_PYTORCH_QUANTIZATION_REGS := bitsandbytes git+https://github.com/casper-hansen/AutoAWQ.git
 CLI_CPU_OPENVINO_REQS := testing,openvino,timm,diffusers
 CLI_CPU_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
 CLI_CPU_ONNXRUNTIME_REQS := testing,onnxruntime,timm,diffusers
@@ -108,7 +108,7 @@ define test_nvidia
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-$(1):local -c "pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
+	opt-bench-$(1):local -c "pip install -e .[$(2)] && pip install $(CLI_CUDA_PYTORCH_QUANTIZATION_REGS) && pytest tests/ -k '$(3)' -x"
 endef
 
 define test_amdgpu
@@ -122,7 +122,7 @@ define test_amdgpu
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-$(1):local -c "pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
+	opt-bench-$(1):local -c "pip install -e .[$(2)] && pip install git+https://github.com/casper-hansen/AutoAWQ.git && pytest tests/ -k '$(3)' -x"
 endef
 
 # group the extra
@@ -144,7 +144,7 @@ test_cli_cuda_pytorch:
 	$(call test_nvidia,cuda,$(CLI_CUDA_PYTORCH_REQS),cli and cuda and pytorch)
 
 test_cli_rocm_pytorch:
-	$(call test_amdgpu,rocm,$(CLI_ROCM_PYTORCH_REQS),cli and cuda and pytorch and peft)
+	$(call test_amdgpu,rocm,$(CLI_ROCM_PYTORCH_REQS),cli and cuda and pytorch and peft and not bnb)
 
 test_cli_cuda_onnxruntime:
 	$(call test_nvidia,cuda,$(CLI_CUDA_ONNXRUNTIME_REQS),cli and cuda and onnxruntime)
diff --git a/setup.py b/setup.py
index 1fcff05b..50dc0528 100644
--- a/setup.py
+++ b/setup.py
@@ -55,8 +55,6 @@
     "diffusers": ["diffusers"],
     "timm": ["timm"],
     "peft": ["peft"],
-    "autoawq": ["autoawq"],
-    "bitsandbytes": ["bitsandbytes"],
 }
 
 

From 5d256d6ff667cdb40287b1ec216b14357f42ca87 Mon Sep 17 00:00:00 2001
From: Ali Abdelkader <aliabdelkader@users.noreply.github.com>
Date: Wed, 6 Mar 2024 13:00:50 +0200
Subject: [PATCH 4/6] minor fixes resolving PR review comments

---
 .github/workflows/test_cli_rocm_pytorch.yaml          |  2 +-
 Makefile                                              |  2 +-
 tests/configs/cuda_inference_pytorch_awq_exllama.yaml | 11 +----------
 3 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml
index 1cc27f83..c991375c 100644
--- a/.github/workflows/test_cli_rocm_pytorch.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch.yaml
@@ -51,4 +51,4 @@ jobs:
           --device /dev/dri/renderD129
           --entrypoint /bin/bash
           opt-bench-rocm:${{ matrix.image.rocm_version }}
-          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pip install git+https://github.com/casper-hansen/AutoAWQ.git && pytest -k 'cli and cuda and pytorch not bnb' -x"
+          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pip install git+https://github.com/casper-hansen/AutoAWQ.git && pytest -k 'cli and cuda and pytorch and not bnb' -x"
diff --git a/Makefile b/Makefile
index 5dff0f9b..5d27105b 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ CLI_CUDA_ONNXRUNTIME_REQS := testing,timm,diffusers
 CLI_ROCM_ONNXRUNTIME_REQS := testing,timm,diffusers
 CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
 CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
-CLI_CUDA_PYTORCH_QUANTIZATION_REGS := bitsandbytes git+https://github.com/casper-hansen/AutoAWQ.git
+CLI_CUDA_PYTORCH_QUANTIZATION_REQS := bitsandbytes git+https://github.com/casper-hansen/AutoAWQ.git
 CLI_CPU_OPENVINO_REQS := testing,openvino,timm,diffusers
 CLI_CPU_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
 CLI_CPU_ONNXRUNTIME_REQS := testing,onnxruntime,timm,diffusers
diff --git a/tests/configs/cuda_inference_pytorch_awq_exllama.yaml b/tests/configs/cuda_inference_pytorch_awq_exllama.yaml
index 8dfc0064..4059222c 100644
--- a/tests/configs/cuda_inference_pytorch_awq_exllama.yaml
+++ b/tests/configs/cuda_inference_pytorch_awq_exllama.yaml
@@ -8,17 +8,8 @@ defaults:
 
 experiment_name: cuda_inference_pytorch_awq_exllama
 
-benchmark:
-  input_shapes:
-    batch_size: 4
-    sequence_length: 128
-
-  generate_kwargs:
-    max_new_tokens: 128
-    min_new_tokens: 128
-
 backend:
-  model: TheBloke/Mistral-7B-Instruct-v0.1-AWQ
+  model: TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ
   quantization_scheme: "awq"
   quantization_config:
     exllama_config:

From 6d323e4fb82a58b2132acb80f07b64949adfea6f Mon Sep 17 00:00:00 2001
From: Ali Abdelkader <aliabdelkader@users.noreply.github.com>
Date: Wed, 6 Mar 2024 21:48:12 +0200
Subject: [PATCH 5/6] add autoawq and bnb to setup.py

- update github workflow files to install autoawq and bnb using setup.py
- "requests" is installed independently because "autoawq@git+https..." requires it to proceed with its installation.
---
 .github/workflows/test_cli_cuda_pytorch.yaml | 2 +-
 .github/workflows/test_cli_rocm_pytorch.yaml | 2 +-
 Makefile                                     | 9 ++++-----
 setup.py                                     | 2 ++
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
index 02fe8140..15445f1f 100644
--- a/.github/workflows/test_cli_cuda_pytorch.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -53,4 +53,4 @@ jobs:
           --workdir /workspace/optimum-benchmark
           --entrypoint /bin/bash
           opt-bench-cuda:${{ matrix.image.cuda_version }}
-          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pip install bitsandbytes git+https://github.com/casper-hansen/AutoAWQ.git && pytest -k 'cli and cuda and pytorch' -x"
+          -c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,bitsandbytes,autoawq] && pytest -k 'cli and cuda and pytorch' -x"
diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml
index c991375c..1006b2e1 100644
--- a/.github/workflows/test_cli_rocm_pytorch.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch.yaml
@@ -51,4 +51,4 @@ jobs:
           --device /dev/dri/renderD129
           --entrypoint /bin/bash
           opt-bench-rocm:${{ matrix.image.rocm_version }}
-          -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pip install git+https://github.com/casper-hansen/AutoAWQ.git && pytest -k 'cli and cuda and pytorch and not bnb' -x"
+          -c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq] && pytest -k 'cli and cuda and pytorch and not bnb' -x"
diff --git a/Makefile b/Makefile
index 5d27105b..622588ae 100644
--- a/Makefile
+++ b/Makefile
@@ -15,9 +15,8 @@ CLI_MISC_REQS := testing
 
 CLI_CUDA_ONNXRUNTIME_REQS := testing,timm,diffusers
 CLI_ROCM_ONNXRUNTIME_REQS := testing,timm,diffusers
-CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
-CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
-CLI_CUDA_PYTORCH_QUANTIZATION_REQS := bitsandbytes git+https://github.com/casper-hansen/AutoAWQ.git
+CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,bitsandbytes,autoawq
+CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,autoawq
 CLI_CPU_OPENVINO_REQS := testing,openvino,timm,diffusers
 CLI_CPU_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
 CLI_CPU_ONNXRUNTIME_REQS := testing,onnxruntime,timm,diffusers
@@ -108,7 +107,7 @@ define test_nvidia
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-$(1):local -c "pip install -e .[$(2)] && pip install $(CLI_CUDA_PYTORCH_QUANTIZATION_REGS) && pytest tests/ -k '$(3)' -x"
+	opt-bench-$(1):local -c "pip install requests && pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
 endef
 
 define test_amdgpu
@@ -122,7 +121,7 @@ define test_amdgpu
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-$(1):local -c "pip install -e .[$(2)] && pip install git+https://github.com/casper-hansen/AutoAWQ.git && pytest tests/ -k '$(3)' -x"
+	opt-bench-$(1):local -c "pip install requests && pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
 endef
 
 # group the extra
diff --git a/setup.py b/setup.py
index 50dc0528..6a10f78b 100644
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,8 @@
     "diffusers": ["diffusers"],
     "timm": ["timm"],
     "peft": ["peft"],
+    "autoawq": ["autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"],
+    "bitsandbytes": ["bitsandbytes"]
 }
 
 

From dcae350152135ac488b96675ba63016cc60b8aeb Mon Sep 17 00:00:00 2001
From: Ali Abdelkader <aliabdelkader@users.noreply.github.com>
Date: Fri, 8 Mar 2024 21:29:58 +0200
Subject: [PATCH 6/6] minor fix to setup.py style

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6a10f78b..6212b4e1 100644
--- a/setup.py
+++ b/setup.py
@@ -56,7 +56,7 @@
     "timm": ["timm"],
     "peft": ["peft"],
     "autoawq": ["autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"],
-    "bitsandbytes": ["bitsandbytes"]
+    "bitsandbytes": ["bitsandbytes"],
 }