From 7fd9323bead83f06df39d8227af3ff1537477946 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Mon, 12 Feb 2024 09:27:29 +0100
Subject: [PATCH] update examples

---
 examples/neural_compressor_ptq_bert.yaml   | 10 ++++++++--
 examples/onnxruntime_static_quant_vit.yaml | 11 ++++++++---
 examples/openvino_diffusion.yaml           | 10 ++++++++--
 examples/openvino_static_quant_bert.yaml   | 10 ++++++++--
 examples/pytorch_bert.yaml                 | 11 ++++++++---
 examples/pytorch_llama.yaml                | 12 +++++++++---
 examples/pytorch_timm.yaml                 | 11 ++++++++---
 examples/tgi_llama.yaml                    | 12 +++++++++---
 examples/trt_llama.yaml                    | 10 +++++++---
 9 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/examples/neural_compressor_ptq_bert.yaml b/examples/neural_compressor_ptq_bert.yaml
index 64691369..a8c83c88 100644
--- a/examples/neural_compressor_ptq_bert.yaml
+++ b/examples/neural_compressor_ptq_bert.yaml
@@ -10,22 +10,28 @@ defaults:
 experiment_name: openvino_static_quant_bert
 
 backend:
-  model: bert-base-uncased
+  device: cpu
   no_weights: true
+  model: bert-base-uncased
   ptq_quantization: true
   calibration: true
-  device: cpu
 
 benchmark:
   input_shapes:
     batch_size: 1
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
diff --git a/examples/onnxruntime_static_quant_vit.yaml b/examples/onnxruntime_static_quant_vit.yaml
index 0b06bc0e..d324415d 100644
--- a/examples/onnxruntime_static_quant_vit.yaml
+++ b/examples/onnxruntime_static_quant_vit.yaml
@@ -10,23 +10,28 @@ defaults:
 experiment_name: onnxruntime_static_quant_vit
 
 backend:
+  device: cpu
+  no_weights: true
   model: google/vit-base-patch16-224
   quantization: true
   quantization_config:
     is_static: true
     per_channel: false
-  device: cpu
 
   calibration: true
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
-      CUDA_VISIBLE_DEVICES: 0
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
diff --git a/examples/openvino_diffusion.yaml b/examples/openvino_diffusion.yaml
index 3591ecd7..f9f62e64 100644
--- a/examples/openvino_diffusion.yaml
+++ b/examples/openvino_diffusion.yaml
@@ -10,22 +10,28 @@ defaults:
 model: stabilityai/stable-diffusion-2-1
 
 backend:
+  device: cpu
   experiment_name: openvino_diffusion
-  export: true
   reshape: true
+  export: true
   half: true
-  device: cpu
 
 benchmark:
   input_shapes:
     batch_size: 1
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
diff --git a/examples/openvino_static_quant_bert.yaml b/examples/openvino_static_quant_bert.yaml
index c349f3ea..83921f4c 100644
--- a/examples/openvino_static_quant_bert.yaml
+++ b/examples/openvino_static_quant_bert.yaml
@@ -10,24 +10,30 @@ defaults:
 experiment_name: openvino_static_quant_bert
 
 backend:
+  device: cpu
+  no_weights: true
   model: bert-base-uncased
   export: true
-  no_weights: true
   quantization: true
   calibration: true
   reshape: true
-  device: cpu
 
 benchmark:
   input_shapes:
     batch_size: 1
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
diff --git a/examples/pytorch_bert.yaml b/examples/pytorch_bert.yaml
index 71a087f0..5a36147c 100644
--- a/examples/pytorch_bert.yaml
+++ b/examples/pytorch_bert.yaml
@@ -10,17 +10,22 @@ defaults:
 experiment_name: pytorch_bert
 
 backend:
-  model: bert-base-uncased
   device: cpu
+  device_ids: 0
+  model: bert-base-uncased
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
-      CUDA_VISIBLE_DEVICES: 0
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
diff --git a/examples/pytorch_llama.yaml b/examples/pytorch_llama.yaml
index f6b29792..2c9e2845 100644
--- a/examples/pytorch_llama.yaml
+++ b/examples/pytorch_llama.yaml
@@ -10,8 +10,10 @@ defaults:
 experiment_name: pytorch_llama
 
 backend:
-  model: TheBloke/Llama-2-70B-AWQ
   device: cuda
+  device_ids: 0
+  no_weights: true
+  model: TheBloke/Llama-2-70B-AWQ
 
 launcher:
   device_isolation: true
@@ -22,14 +24,18 @@ benchmark:
     sequence_length: 256
   new_tokens: 1000
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
-      CUDA_VISIBLE_DEVICES: 0
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
diff --git a/examples/pytorch_timm.yaml b/examples/pytorch_timm.yaml
index 03125599..4b2c5295 100644
--- a/examples/pytorch_timm.yaml
+++ b/examples/pytorch_timm.yaml
@@ -10,8 +10,9 @@ defaults:
 experiment_name: pytorch_timm
 
 backend:
-  model: timm/mobilenetv3_large_100.ra_in1k
   device: cuda
+  device_ids: 0
+  model: timm/mobilenetv3_large_100.ra_in1k
 
 launcher:
   device_isolation: true
@@ -20,14 +21,18 @@ benchmark:
   input_shapes:
     batch_size: 1
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
-      CUDA_VISIBLE_DEVICES: 0
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
diff --git a/examples/tgi_llama.yaml b/examples/tgi_llama.yaml
index 9bf8b4d1..a23c5c55 100644
--- a/examples/tgi_llama.yaml
+++ b/examples/tgi_llama.yaml
@@ -10,10 +10,12 @@ defaults:
 experiment_name: tgi_llama
 
 backend:
+  device: cuda
+  device_ids: 0,1
+  device_map: true
   model: TheBloke/Llama-2-7B-AWQ
   quantization_scheme: awq
   sharded: false
-  device: cuda
 
 benchmark:
   input_shapes:
@@ -21,14 +23,18 @@ benchmark:
     sequence_length: 256
   new_tokens: 1000
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
-      CUDA_VISIBLE_DEVICES: 0
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
diff --git a/examples/trt_llama.yaml b/examples/trt_llama.yaml
index e3f8844d..702bb39e 100644
--- a/examples/trt_llama.yaml
+++ b/examples/trt_llama.yaml
@@ -10,8 +10,8 @@ defaults:
 experiment_name: trt_llama
 
 backend:
-  model: NousResearch/Llama-2-7b-hf
   device: cuda
+  model: NousResearch/Llama-2-7b-hf
 
 benchmark:
   input_shapes:
@@ -19,14 +19,18 @@ benchmark:
     sequence_length: 64
   new_tokens: 128
 
+# hydra/cli specific settings
 hydra:
   run:
+    # where to store run results
     dir: runs/${experiment_name}
   sweep:
+    # where to store sweep results
     dir: sweeps/${experiment_name}
   job:
+    # change working directory to the run directory
     chdir: true
     env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
       OVERRIDE_BENCHMARKS: 1
-      CUDA_VISIBLE_DEVICES: 0
-      CUDA_DEVICE_ORDER: PCI_BUS_ID