From e5423bf0e905266874871e3ca1ea06b13476b254 Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Sun, 31 Mar 2024 04:05:27 +1000
Subject: [PATCH 01/14] Qwen1.5 0.5B pybuda implementation

---
 .../model_qwen1.5/pytorch_qwen1.5.py          | 71 +++++++++++++++++++
 model_demos/requirements.txt                  |  4 ++
 2 files changed, 75 insertions(+)
 create mode 100644 model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py

diff --git a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
new file mode 100644
index 00000000..a3e46031
--- /dev/null
+++ b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
@@ -0,0 +1,71 @@
+import pybuda
+
+from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config
+from pybuda.transformers.pipeline import pipeline as pybuda_pipeline
+
+"""
+=== Models ===
+
+Qwen/Qwen1.5-0.5B
+Qwen/Qwen1.5-0.5B-Chat
+Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4
+Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8
+"""
+
+model_name = "Qwen/Qwen1.5-0.5B"
+
+
+def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
+    # Set PyBuda configurations
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b
+
+    # Config
+    config = Qwen2Config.from_pretrained(model_name)
+    config_dict = config.to_dict()
+    config_dict["return_dict"] = False
+    config_dict["use_cache"] = False
+
+    config = Qwen2Config(**config_dict)
+
+    # Load the model and tokenizer
+    model = Qwen2ForCausalLM.from_pretrained(
+        model_name, config=config, device_map=device)
+    tokenizer = Qwen2Tokenizer.from_pretrained(model_name, device_map=device)
+
+    # Set pad token
+    tokenizer.pad_token = tokenizer.eos_token
+
+    # Example usage
+    prompt = "What is a neural network?"
+
+    # Initialize pipeline
+    text_generator = pybuda_pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        config=config,
+    )
+
+    # Inference
+    output = text_generator(
+        prompt,
+        do_sample=True,
+        pad_token_id=tokenizer.pad_token_id,
+        max_new_tokens=max_length,
+        temperature=temperature,
+        top_k=top_k,
+        top_p=top_p,
+    )
+
+    # Display output
+    print("OUTPUT:\n", output[0]["generated_text"])
+
+
+if __name__ == "__main__":
+    run_qwen_causal_lm(
+        max_length=1024,
+        top_p=0.9,
+        top_k=50,
+        temperature=0.7
+    )
\ No newline at end of file
diff --git a/model_demos/requirements.txt b/model_demos/requirements.txt
index 4b8b2f52..43929eee 100644
--- a/model_demos/requirements.txt
+++ b/model_demos/requirements.txt
@@ -9,4 +9,8 @@ soundfile==0.12.1  # For Whisper
 librosa==0.10.0  # For Whisper
 numba==0.53.1  # For Whisper
 segmentation-models-pytorch==0.3.3  # For U-Net
+pylocron==0.2.1  # For U-Net
 diffusers==0.14.0  # For Stable Diffusion
+transformers==4.37.0 # For Qwen2
+acceleration==0.28.0 # For Qwen2
+auto-gptq==0.7.1 # For Qwen2-GPTQ
\ No newline at end of file

From 88641a373588ac91fa724c45078c48435a93e41b Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Sun, 31 Mar 2024 04:07:10 +1000
Subject: [PATCH 02/14] remove unneeded requirement

---
 model_demos/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/model_demos/requirements.txt b/model_demos/requirements.txt
index 43929eee..118c65b9 100644
--- a/model_demos/requirements.txt
+++ b/model_demos/requirements.txt
@@ -9,7 +9,6 @@ soundfile==0.12.1  # For Whisper
 librosa==0.10.0  # For Whisper
 numba==0.53.1  # For Whisper
 segmentation-models-pytorch==0.3.3  # For U-Net
-pylocron==0.2.1  # For U-Net
 diffusers==0.14.0  # For Stable Diffusion
 transformers==4.37.0 # For Qwen2
 acceleration==0.28.0 # For Qwen2

From e442e192c82b4d3d03b1b08fd1917ca6faf97928 Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Sun, 31 Mar 2024 04:25:12 +1000
Subject: [PATCH 03/14] rename "acceleration" to "accelerate"

---
 model_demos/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/model_demos/requirements.txt b/model_demos/requirements.txt
index 118c65b9..637db37e 100644
--- a/model_demos/requirements.txt
+++ b/model_demos/requirements.txt
@@ -11,5 +11,5 @@ numba==0.53.1  # For Whisper
 segmentation-models-pytorch==0.3.3  # For U-Net
 diffusers==0.14.0  # For Stable Diffusion
 transformers==4.37.0 # For Qwen2
-acceleration==0.28.0 # For Qwen2
+accelerate==0.28.0 # For Qwen2
 auto-gptq==0.7.1 # For Qwen2-GPTQ
\ No newline at end of file

From 597a8b95ec85d196b5bd8888c89d15ded86c0ac4 Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Sun, 14 Apr 2024 15:35:50 +1000
Subject: [PATCH 04/14] Update env vars and compiler configs

---
 model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
index a3e46031..9d450174 100644
--- a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
+++ b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
@@ -1,4 +1,5 @@
 import pybuda
+import os
 
 from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config
 from pybuda.transformers.pipeline import pipeline as pybuda_pipeline
@@ -16,9 +17,15 @@
 
 
 def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
+    # Set environment variables
+    os.environ['PYBUDA_DEVMODE'] = '0'
+    os.environ['TT_BACKEND_TIMEOUT'] = '0'
+    os.environ["PYBUDA_FORK_JOIN_EXPAND_FORK_OUTPUT_BUF"] = "0"
+    os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536"
+
     # Set PyBuda configurations
     compiler_cfg = pybuda.config._get_global_compiler_config()
-    compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b
+    compiler_cfg.amp_level = 0
 
     # Config
     config = Qwen2Config.from_pretrained(model_name)

From f136d08b621c245d7ab9f6eb4cf532309bf54123 Mon Sep 17 00:00:00 2001
From: "Jush (yupiop12)" <36951064+JushBJJ@users.noreply.github.com>
Date: Mon, 15 Apr 2024 22:11:40 +1000
Subject: [PATCH 05/14] remove undefined device_map

---
 model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
index 9d450174..4e689e75 100644
--- a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
+++ b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
@@ -36,9 +36,8 @@ def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
     config = Qwen2Config(**config_dict)
 
     # Load the model and tokenizer
-    model = Qwen2ForCausalLM.from_pretrained(
-        model_name, config=config, device_map=device)
-    tokenizer = Qwen2Tokenizer.from_pretrained(model_name, device_map=device)
+    model = Qwen2ForCausalLM.from_pretrained(model_name, config=config)
+    tokenizer = Qwen2Tokenizer.from_pretrained(model_name)
 
     # Set pad token
     tokenizer.pad_token = tokenizer.eos_token
@@ -75,4 +74,4 @@ def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
         top_p=0.9,
         top_k=50,
         temperature=0.7
-    )
\ No newline at end of file
+    )

From 800737ad491f02157175b0947868a1b1ed7571da Mon Sep 17 00:00:00 2001
From: "Jush (yupiop12)" <36951064+JushBJJ@users.noreply.github.com>
Date: Fri, 26 Apr 2024 21:23:46 +1000
Subject: [PATCH 06/14] Remove misleading and unnecessary environment variables

---
 model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
index 4e689e75..cd1261a5 100644
--- a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
+++ b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
@@ -18,10 +18,7 @@
 
 def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
     # Set environment variables
-    os.environ['PYBUDA_DEVMODE'] = '0'
     os.environ['TT_BACKEND_TIMEOUT'] = '0'
-    os.environ["PYBUDA_FORK_JOIN_EXPAND_FORK_OUTPUT_BUF"] = "0"
-    os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536"
 
     # Set PyBuda configurations
     compiler_cfg = pybuda.config._get_global_compiler_config()

From 471acec62d521292d09851097b139aa3b793e8dc Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Fri, 26 Jul 2024 10:50:06 +1000
Subject: [PATCH 07/14] remove qwen from phi branch

---
 .../model_qwen1.5/pytorch_qwen1.5.py          | 74 -------------------
 1 file changed, 74 deletions(-)
 delete mode 100644 model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py

diff --git a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py b/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
deleted file mode 100644
index cd1261a5..00000000
--- a/model_demos/nlp_demos/model_qwen1.5/pytorch_qwen1.5.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import pybuda
-import os
-
-from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config
-from pybuda.transformers.pipeline import pipeline as pybuda_pipeline
-
-"""
-=== Models ===
-
-Qwen/Qwen1.5-0.5B
-Qwen/Qwen1.5-0.5B-Chat
-Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4
-Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8
-"""
-
-model_name = "Qwen/Qwen1.5-0.5B"
-
-
-def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
-    # Set environment variables
-    os.environ['TT_BACKEND_TIMEOUT'] = '0'
-
-    # Set PyBuda configurations
-    compiler_cfg = pybuda.config._get_global_compiler_config()
-    compiler_cfg.amp_level = 0
-
-    # Config
-    config = Qwen2Config.from_pretrained(model_name)
-    config_dict = config.to_dict()
-    config_dict["return_dict"] = False
-    config_dict["use_cache"] = False
-
-    config = Qwen2Config(**config_dict)
-
-    # Load the model and tokenizer
-    model = Qwen2ForCausalLM.from_pretrained(model_name, config=config)
-    tokenizer = Qwen2Tokenizer.from_pretrained(model_name)
-
-    # Set pad token
-    tokenizer.pad_token = tokenizer.eos_token
-
-    # Example usage
-    prompt = "What is a neural network?"
-
-    # Initialize pipeline
-    text_generator = pybuda_pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        config=config,
-    )
-
-    # Inference
-    output = text_generator(
-        prompt,
-        do_sample=True,
-        pad_token_id=tokenizer.pad_token_id,
-        max_new_tokens=max_length,
-        temperature=temperature,
-        top_k=top_k,
-        top_p=top_p,
-    )
-
-    # Display output
-    print("OUTPUT:\n", output[0]["generated_text"])
-
-
-if __name__ == "__main__":
-    run_qwen_causal_lm(
-        max_length=1024,
-        top_p=0.9,
-        top_k=50,
-        temperature=0.7
-    )

From dcff324731905542f3979661f5ab5e77c2862f91 Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Fri, 26 Jul 2024 10:50:54 +1000
Subject: [PATCH 08/14] Add Phi 2

---
 .../model_phi2/phi2_text_generation.py        | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 model_demos/nlp_demos/model_phi2/phi2_text_generation.py

diff --git a/model_demos/nlp_demos/model_phi2/phi2_text_generation.py b/model_demos/nlp_demos/model_phi2/phi2_text_generation.py
new file mode 100644
index 00000000..cd7bd9a6
--- /dev/null
+++ b/model_demos/nlp_demos/model_phi2/phi2_text_generation.py
@@ -0,0 +1,60 @@
+import os
+import pybuda
+
+from transformers import PhiForCausalLM, AutoTokenizer, PhiConfig
+from pybuda.transformers.pipeline import pipeline as pybuda_pipeline
+
+model_name = "microsoft/phi-2"
+
+def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
+    # Set PyBuda configurations
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
+    compiler_cfg.enable_auto_fusing = False
+    compiler_cfg.balancer_policy = "Ribbon"
+
+    # Load the model configuration
+    config = PhiConfig.from_pretrained(model_name)
+    config.use_cache = True
+
+    # Load the model and tokenizer with the updated config
+    model = PhiForCausalLM.from_pretrained(model_name, config=config)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+    # Set pad token
+    tokenizer.pad_token = tokenizer.eos_token
+
+    # Example usage
+    prompt = ["My name is Jimmy and"]
+
+    # Initialize pipeline
+    text_generator = pybuda_pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer
+    )
+
+    # Inference
+    output = text_generator(
+        prompt,
+        do_sample=True,
+        num_beams=1,
+        no_repeat_ngram_size=2,
+        pad_token_id=tokenizer.pad_token_id,
+        max_new_tokens=max_length,
+        temperature=temperature,
+        top_k=top_k,
+        top_p=top_p,
+    )
+
+    # Display output
+    print("OUTPUT:\n", output[0][0]["generated_text"])
+
+
+if __name__ == "__main__":
+    run_qwen_causal_lm(
+        max_length=100,
+        top_p=0.7,
+        top_k=50,
+        temperature=0.7
+    )
\ No newline at end of file

From 25958b868b4aced48029c1b1164bebce344e3596 Mon Sep 17 00:00:00 2001
From: "Jush (yupiop12)" <36951064+JushBJJ@users.noreply.github.com>
Date: Fri, 26 Jul 2024 11:20:22 +1000
Subject: [PATCH 09/14] Update requirements.txt

---
 model_demos/requirements.txt | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/model_demos/requirements.txt b/model_demos/requirements.txt
index 637db37e..4b8b2f52 100644
--- a/model_demos/requirements.txt
+++ b/model_demos/requirements.txt
@@ -10,6 +10,3 @@ librosa==0.10.0  # For Whisper
 numba==0.53.1  # For Whisper
 segmentation-models-pytorch==0.3.3  # For U-Net
 diffusers==0.14.0  # For Stable Diffusion
-transformers==4.37.0 # For Qwen2
-accelerate==0.28.0 # For Qwen2
-auto-gptq==0.7.1 # For Qwen2-GPTQ
\ No newline at end of file

From 871f749deb74acdaf148204f21d83cf7f8f61171 Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Thu, 1 Aug 2024 00:23:23 +0000
Subject: [PATCH 10/14] Standardize Phi2 demo and added tests

---
 .../phi2/pytorch_phi2_text_generation.py      | 68 +++++++++++++++++++
 model_demos/pyproject.toml                    |  1 +
 model_demos/tests/test_pytorch_phi2.py        | 10 +++
 3 files changed, 79 insertions(+)
 create mode 100644 model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py
 create mode 100644 model_demos/tests/test_pytorch_phi2.py

diff --git a/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py b/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py
new file mode 100644
index 00000000..a0f172f3
--- /dev/null
+++ b/model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py
@@ -0,0 +1,68 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
+# SPDX-License-Identifier: Apache-2.0
+
+# Phi2 Demo - Text Generation
+
+import os
+import pybuda
+
+from transformers import PhiForCausalLM, AutoTokenizer, PhiConfig
+from pybuda.transformers.pipeline import pipeline as pybuda_pipeline
+
+def run_phi2_causal_lm(batch_size=1):
+    os.environ["TT_BACKEND_TIMEOUT"] = '0'
+
+    # Set PyBuda configurations
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
+    compiler_cfg.enable_auto_fusing = True
+    compiler_cfg.balancer_policy = "Ribbon"
+
+    # Setup model configuration
+    config = PhiConfig.from_pretrained("microsoft/phi-2")
+    config.use_cache = False
+    config.return_dict = False
+
+    # Load model and tokenizer with config
+    model = PhiForCausalLM.from_pretrained("microsoft/phi-2", config=config)
+    tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+    tokenizer.pad_token, tokenizer.pad_token_id = (tokenizer.eos_token, tokenizer.eos_token_id)
+
+    # Disable DynamicCache
+    # See: https://github.com/tenstorrent/tt-buda/issues/42
+    model._supports_cache_class = False
+
+    # Example usage
+    prompt = ["My name is Jim Keller and"] * batch_size
+
+    # Initialize pipeline
+    text_generator = pybuda_pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer
+    )
+
+    # Inference on TT device
+    response = text_generator(
+        prompt,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.9,
+        max_new_tokens=512,
+        num_beams=1,
+        do_sample=True,
+        no_repeat_ngram_size=5,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        early_stopping=True
+    )
+
+    # Display Responses
+    for batch_id in range(batch_size):
+        print(f"Batch: {batch_id}")
+        print(f"Response: {response[batch_id][0]['generated_text']}")
+        print()
+
+
+if __name__ == "__main__":
+    run_phi2_causal_lm()
\ No newline at end of file
diff --git a/model_demos/pyproject.toml b/model_demos/pyproject.toml
index 334d1b69..e691f978 100644
--- a/model_demos/pyproject.toml
+++ b/model_demos/pyproject.toml
@@ -87,4 +87,5 @@ markers = [
     "yolov6: tests that involve yolov6",
     "segformer: tests that involve SegFormer",
     "monodle: tests that involve Monodle",
+    "phi2": tests that involve Phi2",
 ]
diff --git a/model_demos/tests/test_pytorch_phi2.py b/model_demos/tests/test_pytorch_phi2.py
new file mode 100644
index 00000000..ba27124e
--- /dev/null
+++ b/model_demos/tests/test_pytorch_phi2.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from nlp_demos.phi2.pytorch_phi2_text_generation import run_phi2_causal_lm
+
+@pytest.mark.qwen1_5
+def test_qwen1_5_causal_lm_pytorch(clear_pybuda, test_device, batch_size):
+    run_phi2_causal_lm(batch_size=batch_size)
\ No newline at end of file

From fd76bb5c8e5a11594f12c4de3c1157a9598ecf2e Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Thu, 1 Aug 2024 00:26:19 +0000
Subject: [PATCH 11/14] Remove old phi2 demo

---
 .../model_phi2/phi2_text_generation.py        | 60 -------------------
 1 file changed, 60 deletions(-)
 delete mode 100644 model_demos/nlp_demos/model_phi2/phi2_text_generation.py

diff --git a/model_demos/nlp_demos/model_phi2/phi2_text_generation.py b/model_demos/nlp_demos/model_phi2/phi2_text_generation.py
deleted file mode 100644
index cd7bd9a6..00000000
--- a/model_demos/nlp_demos/model_phi2/phi2_text_generation.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import os
-import pybuda
-
-from transformers import PhiForCausalLM, AutoTokenizer, PhiConfig
-from pybuda.transformers.pipeline import pipeline as pybuda_pipeline
-
-model_name = "microsoft/phi-2"
-
-def run_qwen_causal_lm(max_length=1024, top_p=0.9, top_k=50, temperature=0.7):
-    # Set PyBuda configurations
-    compiler_cfg = pybuda.config._get_global_compiler_config()
-    compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
-    compiler_cfg.enable_auto_fusing = False
-    compiler_cfg.balancer_policy = "Ribbon"
-
-    # Load the model configuration
-    config = PhiConfig.from_pretrained(model_name)
-    config.use_cache = True
-
-    # Load the model and tokenizer with the updated config
-    model = PhiForCausalLM.from_pretrained(model_name, config=config)
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-    # Set pad token
-    tokenizer.pad_token = tokenizer.eos_token
-
-    # Example usage
-    prompt = ["My name is Jimmy and"]
-
-    # Initialize pipeline
-    text_generator = pybuda_pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer
-    )
-
-    # Inference
-    output = text_generator(
-        prompt,
-        do_sample=True,
-        num_beams=1,
-        no_repeat_ngram_size=2,
-        pad_token_id=tokenizer.pad_token_id,
-        max_new_tokens=max_length,
-        temperature=temperature,
-        top_k=top_k,
-        top_p=top_p,
-    )
-
-    # Display output
-    print("OUTPUT:\n", output[0][0]["generated_text"])
-
-
-if __name__ == "__main__":
-    run_qwen_causal_lm(
-        max_length=100,
-        top_p=0.7,
-        top_k=50,
-        temperature=0.7
-    )
\ No newline at end of file

From 0fa971c97d9ce3fd6710f1009935fa1f318c5b90 Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Thu, 1 Aug 2024 00:27:11 +0000
Subject: [PATCH 12/14] fix missing quote in pyproject.toml

---
 model_demos/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/model_demos/pyproject.toml b/model_demos/pyproject.toml
index e691f978..bb1a4628 100644
--- a/model_demos/pyproject.toml
+++ b/model_demos/pyproject.toml
@@ -87,5 +87,5 @@ markers = [
     "yolov6: tests that involve yolov6",
     "segformer: tests that involve SegFormer",
     "monodle: tests that involve Monodle",
-    "phi2": tests that involve Phi2",
+    "phi2": "tests that involve Phi2",
 ]

From d40ca54e0848e87d3cd8752ce624294a67b53ea3 Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Thu, 1 Aug 2024 00:28:07 +0000
Subject: [PATCH 13/14] fix

---
 model_demos/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/model_demos/pyproject.toml b/model_demos/pyproject.toml
index bb1a4628..ea8239d7 100644
--- a/model_demos/pyproject.toml
+++ b/model_demos/pyproject.toml
@@ -87,5 +87,5 @@ markers = [
     "yolov6: tests that involve yolov6",
     "segformer: tests that involve SegFormer",
     "monodle: tests that involve Monodle",
-    "phi2": "tests that involve Phi2",
+    "phi2: tests that involve Phi2",
 ]

From d55116db4b54443421180c2959f653baaa2e866e Mon Sep 17 00:00:00 2001
From: JushBJJ <JushPacis@gmail.com>
Date: Sat, 31 Aug 2024 01:00:26 +0000
Subject: [PATCH 14/14] Fix test saying qwen1_5 instead of phi2

---
 model_demos/tests/test_pytorch_phi2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/model_demos/tests/test_pytorch_phi2.py b/model_demos/tests/test_pytorch_phi2.py
index ba27124e..394fb41d 100644
--- a/model_demos/tests/test_pytorch_phi2.py
+++ b/model_demos/tests/test_pytorch_phi2.py
@@ -5,6 +5,6 @@
 
 from nlp_demos.phi2.pytorch_phi2_text_generation import run_phi2_causal_lm
 
-@pytest.mark.qwen1_5
-def test_qwen1_5_causal_lm_pytorch(clear_pybuda, test_device, batch_size):
+@pytest.mark.phi2
+def test_phi2_causal_lm_pytorch(clear_pybuda, test_device, batch_size):
     run_phi2_causal_lm(batch_size=batch_size)
\ No newline at end of file