Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add test configurations for quantization with onnxruntime, awq, bnb (#95) #144

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ jobs:
--workdir /workspace/optimum-benchmark
--entrypoint /bin/bash
opt-bench-cuda:${{ matrix.image.cuda_version }}
-c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest -k 'cli and cuda and pytorch' -x"
-c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,bitsandbytes,autoawq] && pytest -k 'cli and cuda and pytorch' -x"
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_rocm_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ jobs:
--device /dev/dri/renderD129
--entrypoint /bin/bash
opt-bench-rocm:${{ matrix.image.rocm_version }}
-c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest -k 'cli and cuda and pytorch' -x"
-c "pip install requests && pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq] && pytest -k 'cli and cuda and pytorch and not bnb' -x"
11 changes: 5 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ CLI_MISC_REQS := testing

CLI_CUDA_ONNXRUNTIME_REQS := testing,timm,diffusers
CLI_ROCM_ONNXRUNTIME_REQS := testing,timm,diffusers
CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft

CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,bitsandbytes,autoawq
CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft,autoawq
CLI_CPU_OPENVINO_REQS := testing,openvino,timm,diffusers
CLI_CPU_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
CLI_CPU_ONNXRUNTIME_REQS := testing,onnxruntime,timm,diffusers
Expand Down Expand Up @@ -108,7 +107,7 @@ define test_nvidia
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
opt-bench-$(1):local -c "pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
opt-bench-$(1):local -c "pip install requests && pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
endef

define test_amdgpu
Expand All @@ -122,7 +121,7 @@ define test_amdgpu
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
opt-bench-$(1):local -c "pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
opt-bench-$(1):local -c "pip install requests && pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
endef

# group the extra
Expand All @@ -144,7 +143,7 @@ test_cli_cuda_pytorch:
$(call test_nvidia,cuda,$(CLI_CUDA_PYTORCH_REQS),cli and cuda and pytorch)

test_cli_rocm_pytorch:
$(call test_amdgpu,rocm,$(CLI_ROCM_PYTORCH_REQS),cli and cuda and pytorch and peft)
$(call test_amdgpu,rocm,$(CLI_ROCM_PYTORCH_REQS),cli and cuda and pytorch and peft and not bnb)

test_cli_cuda_onnxruntime:
$(call test_nvidia,cuda,$(CLI_CUDA_ONNXRUNTIME_REQS),cli and cuda and onnxruntime)
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
"diffusers": ["diffusers"],
"timm": ["timm"],
"peft": ["peft"],
"autoawq": ["autoawq@git+https://github.com/casper-hansen/AutoAWQ.git"],
"bitsandbytes": ["bitsandbytes"],
}


Expand Down
4 changes: 4 additions & 0 deletions tests/configs/_no_weights_sweep_.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
hydra:
sweeper:
params:
backend.no_weights: true,false
20 changes: 20 additions & 0 deletions tests/configs/cpu_inference_onnxruntime_static_quant_vit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
defaults:
- backend: onnxruntime
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- _cpu_ # inherits from cpu config
- _no_weights_sweep_ # sweep over no_weights: true,false
- _self_ # hydra 1.1 compatibility

experiment_name: cpu_inference_onnxruntime_static_quant_vit

backend:
model: google/vit-base-patch16-224
quantization: true
quantization_config:
is_static: true
per_channel: false

calibration: true

20 changes: 20 additions & 0 deletions tests/configs/cuda_inference_onnxruntime_static_quant_vit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
defaults:
- backend: onnxruntime
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- _cuda_ # inherits from cuda config
- _no_weights_sweep_ # sweep over no_weights: true,false
- _self_ # hydra 1.1 compatibility

experiment_name: cuda_inference_onnxruntime_static_quant_vit

backend:
model: google/vit-base-patch16-224
quantization: true
quantization_config:
is_static: true
per_channel: false

calibration: true

16 changes: 16 additions & 0 deletions tests/configs/cuda_inference_pytorch_awq_exllama.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
defaults:
- backend: pytorch
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- _cuda_ # inherits from cuda config
- _no_weights_sweep_ # sweep over no_weights: true,false
- _self_ # hydra 1.1 compatibility

experiment_name: cuda_inference_pytorch_awq_exllama

backend:
model: TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ
quantization_scheme: "awq"
quantization_config:
exllama_config:
version: 2
16 changes: 16 additions & 0 deletions tests/configs/cuda_inference_pytorch_gpt_bnb_4bit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
defaults:
- backend: pytorch
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- _cuda_ # inherits from cuda config
- _gpt_ # use gpt2 model
- _no_weights_sweep_ # sweep no_weights: true, false
- _self_ # hydra 1.1 compatibility

experiment_name: cuda_inference_pytorch_gpt_bnb_4bit

backend:
quantization_scheme: "bnb"
quantization_config:
load_in_4bit: true
bnb_4bit_compute_dtype: float16
15 changes: 15 additions & 0 deletions tests/configs/cuda_inference_pytorch_gpt_bnb_8bit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
defaults:
- backend: pytorch
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- _cuda_ # inherits from cuda config
- _gpt_ # use gpt2 model
- _no_weights_sweep_ # sweep no_weights: true, false
- _self_ # hydra 1.1 compatibility

experiment_name: cuda_inference_pytorch_gpt_bnb_8bit

backend:
quantization_scheme: "bnb"
quantization_config:
load_in_8bit: true
Loading