Update nightly test with xfail marker

tenstorrent · Mar 7, 2025 · 7790bdb · 7790bdb
1 parent 787d802
commit 7790bdb
Show file tree

Hide file tree

Showing 66 changed files with 612 additions and 81 deletions.
diff --git a/forge/test/models/pytorch/audio/stereo/test_stereo.py b/forge/test/models/pytorch/audio/stereo/test_stereo.py
@@ -12,7 +12,14 @@
 from test.models.utils import Framework, Source, Task, build_module_name
 
 variants = [
-    "facebook/musicgen-small",
+    pytest.param(
+        "facebook/musicgen-small",
+        marks=[
+            pytest.mark.xfail(
+                reason="[Optimization Graph Passes] RuntimeError: (i >= 0) && (i < (int)dims_.size()) Trying to access element outside of dimensions: 3"
+            )
+        ],
+    ),
     "facebook/musicgen-medium",
     "facebook/musicgen-large",
 ]

diff --git a/forge/test/models/pytorch/audio/whisper/test_whisper.py b/forge/test/models/pytorch/audio/whisper/test_whisper.py
@@ -16,7 +16,14 @@
 from test.utils import download_model
 
 variants = [
-    "openai/whisper-tiny",
+    pytest.param(
+        "openai/whisper-tiny",
+        marks=[
+            pytest.mark.xfail(
+                reason="Conv2d AssertionError: Setting a tensor value of incorrect shape: (1, 384, 2999, 2) vs torch.Size([1, 384, 3000, 1])"
+            )
+        ],
+    ),
     "openai/whisper-base",
     "openai/whisper-small",
     "openai/whisper-medium",
@@ -25,7 +32,7 @@
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_whisper(record_forge_property, variant):
     if variant != "openai/whisper-tiny":
         pytest.skip("Skipping due to the current CI/CD pipeline limitations")

diff --git a/forge/test/models/pytorch/multimodal/clip/test_clip.py b/forge/test/models/pytorch/multimodal/clip/test_clip.py
@@ -15,7 +15,19 @@
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["openai/clip-vit-base-patch32"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "openai/clip-vit-base-patch32",
+            marks=[
+                pytest.mark.xfail(
+                    reason="ttir.reshape op Input and output tensors must have the same number of elements"
+                )
+            ],
+        ),
+    ],
+)
 def test_clip_pytorch(record_forge_property, variant):
     # Build Module Name
     module_name = build_module_name(

diff --git a/forge/test/models/pytorch/multimodal/stable_diffusion/test_stable_diffusion_xl.py b/forge/test/models/pytorch/multimodal/stable_diffusion/test_stable_diffusion_xl.py
@@ -32,7 +32,15 @@ def forward(self, input_tensor):
 
 @pytest.mark.nightly
 @pytest.mark.skip_model_analysis
-@pytest.mark.parametrize("variant", ["stable-diffusion-xl-base-1.0"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "stable-diffusion-xl-base-1.0",
+            marks=[pytest.mark.xfail(reason="NotImplementedError: Unknown output type: <class 'PIL.Image.Image'>")],
+        ),
+    ],
+)
 def test_stable_diffusion_generation(record_forge_property, variant):
     # Build Module Name
     module_name = build_module_name(

diff --git a/forge/test/models/pytorch/text/bart/test_bart.py b/forge/test/models/pytorch/text/bart/test_bart.py
@@ -25,7 +25,19 @@ def forward(self, input_ids, attention_mask, decoder_input_ids):
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["facebook/bart-large-mnli"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "facebook/bart-large-mnli",
+            marks=[
+                pytest.mark.xfail(
+                    reason="unique+common runtime args targeting kernel reader_concat_stick_layout_interleaved_start_id on (x=0,y=0) are too large. Max allowable is 256"
+                )
+            ],
+        ),
+    ],
+)
 def test_pt_bart_classifier(record_forge_property, variant):
     # Build Module Name
     module_name = build_module_name(

diff --git a/forge/test/models/pytorch/text/bloom/test_bloom.py b/forge/test/models/pytorch/text/bloom/test_bloom.py
@@ -24,7 +24,19 @@ def forward(self, input_ids, attention_mask):
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["bigscience/bloom-1b1"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "bigscience/bloom-1b1",
+            marks=[
+                pytest.mark.xfail(
+                    reason="AssertionError: Data mismatch on output 0 between framework and Forge codegen"
+                )
+            ],
+        ),
+    ],
+)
 def test_bloom(record_forge_property, variant):
 
     # Build Module Name

diff --git a/forge/test/models/pytorch/text/codegen/test_codegen.py b/forge/test/models/pytorch/text/codegen/test_codegen.py
@@ -14,14 +14,19 @@
 from test.utils import download_model
 
 variants = [
-    "Salesforce/codegen-350M-mono",
+    pytest.param(
+        "Salesforce/codegen-350M-mono",
+        marks=[
+            pytest.mark.xfail(reason="AssertionError: Data mismatch on output 0 between framework and Forge codegen")
+        ],
+    ),
     "Salesforce/codegen-350M-multi",
     "Salesforce/codegen-350M-nl",
 ]
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_codegen(record_forge_property, variant):
     if variant != "Salesforce/codegen-350M-mono":
         pytest.skip("Skipping due to the current CI/CD pipeline limitations")

diff --git a/forge/test/models/pytorch/text/distilbert/test_distilbert.py b/forge/test/models/pytorch/text/distilbert/test_distilbert.py
@@ -16,11 +16,18 @@
 from test.models.utils import Framework, Source, Task, build_module_name
 from test.utils import download_model
 
-variants = ["distilbert-base-uncased", "distilbert-base-cased", "distilbert-base-multilingual-cased"]
+variants = [
+    pytest.param(
+        "distilbert-base-uncased",
+        marks=[pytest.mark.xfail(reason="ttir.typecast op Result shape must match operand shapes after broadcasting")],
+    ),
+    "distilbert-base-cased",
+    "distilbert-base-multilingual-cased",
+]
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_distilbert_masked_lm_pytorch(record_forge_property, variant):
     if variant != "distilbert-base-uncased":
         pytest.skip("Skipping due to the current CI/CD pipeline limitations")

diff --git a/forge/test/models/pytorch/text/falcon/test_falcon.py b/forge/test/models/pytorch/text/falcon/test_falcon.py
@@ -63,6 +63,9 @@ def test_falcon_3(record_forge_property, variant):
 
     if variant == "tiiuae/Falcon3-Mamba-7B-Base" or variant == "tiiuae/Falcon3-7B-Base":
         pytest.skip("Insufficient host DRAM to run this model (requires a bit more than 36 GB)")
+    if variant == "tiiuae/Falcon3-3B-Base":
+        pytest.skip("Insufficient host DRAM to run this model (requires a bit more than 25 GB)")
+
     # Build Module Name
     module_name = build_module_name(
         framework=Framework.PYTORCH, model="falcon3", variant=variant, task=Task.CAUSAL_LM, source=Source.HUGGINGFACE

diff --git a/forge/test/models/pytorch/text/fuyu/test_fuyu_8b.py b/forge/test/models/pytorch/text/fuyu/test_fuyu_8b.py
@@ -25,7 +25,19 @@
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["adept/fuyu-8b"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "adept/fuyu-8b",
+            marks=[
+                pytest.mark.xfail(
+                    reason="[Optimization Graph Passes] RuntimeError: (i >= 0) && (i < (int)dims_.size()) Trying to access element outside of dimensions: 3"
+                )
+            ],
+        ),
+    ],
+)
 def test_fuyu8b(record_forge_property, variant):
     # Build Module Name
     module_name = build_module_name(

diff --git a/forge/test/models/pytorch/text/gpt2/test_gpt2.py b/forge/test/models/pytorch/text/gpt2/test_gpt2.py
@@ -28,7 +28,17 @@ def forward(self, input_ids, attention_mask):
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["gpt2"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "gpt2",
+            marks=[
+                pytest.mark.xfail(reason="RuntimeError: Tensor 6 - data type mismatch: expected Float32, got UInt8")
+            ],
+        ),
+    ],
+)
 def test_gpt2_text_gen(record_forge_property, variant):
     # Build Module Name
     module_name = build_module_name(
@@ -62,7 +72,15 @@ def test_gpt2_text_gen(record_forge_property, variant):
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["mnoukhov/gpt2-imdb-sentiment-classifier"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "mnoukhov/gpt2-imdb-sentiment-classifier",
+            marks=[pytest.mark.xfail(reason="ttir.softmax op requires attribute 'dimension'")],
+        ),
+    ],
+)
 def test_gpt2_sequence_classification(record_forge_property, variant):
 
     # Build Module Name

diff --git a/forge/test/models/pytorch/text/gptneo/test_gptneo.py b/forge/test/models/pytorch/text/gptneo/test_gptneo.py
@@ -17,14 +17,21 @@
 from test.utils import download_model
 
 variants = [
-    "EleutherAI/gpt-neo-125M",
+    pytest.param(
+        "EleutherAI/gpt-neo-125M",
+        marks=[
+            pytest.mark.xfail(
+                reason="AssertionError: Data mismatch on output 0 between Framework and Forge codegen(pcc=0.28)"
+            )
+        ],
+    ),
     "EleutherAI/gpt-neo-1.3B",
     "EleutherAI/gpt-neo-2.7B",
 ]
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_gptneo_causal_lm(record_forge_property, variant):
     if variant != "EleutherAI/gpt-neo-125M":
         pytest.skip("Skipping due to the current CI/CD pipeline limitations")

diff --git a/forge/test/models/pytorch/text/mamba/test_mamba.py b/forge/test/models/pytorch/text/mamba/test_mamba.py
@@ -26,7 +26,14 @@ def forward(self, input_ids):
 
 
 variants = [
-    "state-spaces/mamba-790m-hf",
+    pytest.param(
+        "state-spaces/mamba-790m-hf",
+        marks=[
+            pytest.mark.xfail(
+                reason="[TVM Relay IRModule Generation] Dimension mismatch: axes has 3 elements, but data.ndim = 6"
+            )
+        ],
+    ),
     "state-spaces/mamba-2.8b-hf",
     "state-spaces/mamba-1.4b-hf",
     "state-spaces/mamba-370m-hf",

diff --git a/forge/test/models/pytorch/text/nanogpt/test_nanogpt.py b/forge/test/models/pytorch/text/nanogpt/test_nanogpt.py
@@ -23,7 +23,15 @@ def forward(self, input_ids, attention_mask):
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["FinancialSupport/NanoGPT"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "FinancialSupport/NanoGPT",
+            marks=pytest.mark.xfail(reason="RuntimeError: Tensor 6 - data type mismatch: expected Float32, got UInt8"),
+        ),
+    ],
+)
 def test_nanogpt_text_generation(record_forge_property, variant):
 
     # Build Module Name

diff --git a/forge/test/models/pytorch/text/opt/test_opt.py b/forge/test/models/pytorch/text/opt/test_opt.py
@@ -16,11 +16,22 @@
 from test.models.utils import Framework, Source, Task, build_module_name
 from test.utils import download_model
 
-variants = ["facebook/opt-125m", "facebook/opt-350m", "facebook/opt-1.3b"]
+variants = [
+    pytest.param(
+        "facebook/opt-125m",
+        marks=[
+            pytest.mark.xfail(
+                reason="unique+common runtime args targeting kernel reader_concat_stick_layout_interleaved_start_id on (x=0,y=0) are too large. Max allowable is 256"
+            )
+        ],
+    ),
+    "facebook/opt-350m",
+    "facebook/opt-1.3b",
+]
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_opt_causal_lm(record_forge_property, variant):
     if variant != "facebook/opt-125m":
         pytest.skip("Skipping due to the current CI/CD pipeline limitations")
@@ -71,7 +82,7 @@ def test_opt_causal_lm(record_forge_property, variant):
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_opt_qa(record_forge_property, variant):
     pytest.skip("Skipping due to the current CI/CD pipeline limitations")
 
@@ -117,7 +128,7 @@ def test_opt_qa(record_forge_property, variant):
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_opt_sequence_classification(record_forge_property, variant):
     pytest.skip("Skipping due to the current CI/CD pipeline limitations")
 

diff --git a/forge/test/models/pytorch/text/phi2/test_phi2.py b/forge/test/models/pytorch/text/phi2/test_phi2.py
@@ -16,11 +16,19 @@
 
 from test.models.utils import Framework, Source, Task, build_module_name
 
-variants = ["microsoft/phi-2", "microsoft/phi-2-pytdml"]
+variants = [
+    pytest.param(
+        "microsoft/phi-2",
+        marks=[
+            pytest.mark.xfail(reason="AssertionError: Data mismatch on output 0 between framework and Forge codegen")
+        ],
+    ),
+    "microsoft/phi-2-pytdml",
+]
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 def test_phi2_clm(record_forge_property, variant):
     if variant != "microsoft/phi-2":
         pytest.skip("Skipping due to the current CI/CD pipeline limitations")

diff --git a/forge/test/models/pytorch/text/qwen/test_qwen.py b/forge/test/models/pytorch/text/qwen/test_qwen.py
@@ -12,7 +12,15 @@
 
 
 @pytest.mark.nightly
-@pytest.mark.parametrize("variant", ["Qwen/Qwen1.5-0.5B"])
+@pytest.mark.parametrize(
+    "variant",
+    [
+        pytest.param(
+            "Qwen/Qwen1.5-0.5B",
+            marks=[pytest.mark.xfail(reason="RuntimeError: Input count mismatch: expected 533, got 534")],
+        ),
+    ],
+)
 def test_qwen1_5_causal_lm(record_forge_property, variant):
     # Build Module Name
     module_name = build_module_name(

diff --git a/forge/test/models/pytorch/text/qwen/test_qwen_coder.py b/forge/test/models/pytorch/text/qwen/test_qwen_coder.py
@@ -11,7 +11,10 @@
 
 # Variants for testing
 variants = [
-    "Qwen/Qwen2.5-Coder-0.5B",
+    pytest.param(
+        "Qwen/Qwen2.5-Coder-0.5B",
+        marks=[pytest.mark.xfail(reason="RuntimeError: Input count mismatch: expected 533, got 534")],
+    ),
     "Qwen/Qwen2.5-Coder-1.5B",
     "Qwen/Qwen2.5-Coder-1.5B-Instruct",
     "Qwen/Qwen2.5-Coder-3B",
@@ -21,7 +24,7 @@
 ]
 
 
-@pytest.mark.parametrize("variant", variants, ids=variants)
+@pytest.mark.parametrize("variant", variants)
 @pytest.mark.nightly
 def test_qwen_clm(record_forge_property, variant):
     if variant != "Qwen/Qwen2.5-Coder-0.5B":