Skip to content

Commit

Permalink
Update nightly test with xfail marker
Browse files Browse the repository at this point in the history
  • Loading branch information
chandrasekaranpradeep committed Mar 7, 2025
1 parent 787d802 commit 7790bdb
Show file tree
Hide file tree
Showing 66 changed files with 612 additions and 81 deletions.
9 changes: 8 additions & 1 deletion forge/test/models/pytorch/audio/stereo/test_stereo.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@
from test.models.utils import Framework, Source, Task, build_module_name

variants = [
"facebook/musicgen-small",
pytest.param(
"facebook/musicgen-small",
marks=[
pytest.mark.xfail(
reason="[Optimization Graph Passes] RuntimeError: (i >= 0) && (i < (int)dims_.size()) Trying to access element outside of dimensions: 3"
)
],
),
"facebook/musicgen-medium",
"facebook/musicgen-large",
]
Expand Down
11 changes: 9 additions & 2 deletions forge/test/models/pytorch/audio/whisper/test_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@
from test.utils import download_model

variants = [
"openai/whisper-tiny",
pytest.param(
"openai/whisper-tiny",
marks=[
pytest.mark.xfail(
reason="Conv2d AssertionError: Setting a tensor value of incorrect shape: (1, 384, 2999, 2) vs torch.Size([1, 384, 3000, 1])"
)
],
),
"openai/whisper-base",
"openai/whisper-small",
"openai/whisper-medium",
Expand All @@ -25,7 +32,7 @@


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_whisper(record_forge_property, variant):
if variant != "openai/whisper-tiny":
pytest.skip("Skipping due to the current CI/CD pipeline limitations")
Expand Down
14 changes: 13 additions & 1 deletion forge/test/models/pytorch/multimodal/clip/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,19 @@


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["openai/clip-vit-base-patch32"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"openai/clip-vit-base-patch32",
marks=[
pytest.mark.xfail(
reason="ttir.reshape op Input and output tensors must have the same number of elements"
)
],
),
],
)
def test_clip_pytorch(record_forge_property, variant):
# Build Module Name
module_name = build_module_name(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,15 @@ def forward(self, input_tensor):

@pytest.mark.nightly
@pytest.mark.skip_model_analysis
@pytest.mark.parametrize("variant", ["stable-diffusion-xl-base-1.0"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"stable-diffusion-xl-base-1.0",
marks=[pytest.mark.xfail(reason="NotImplementedError: Unknown output type: <class 'PIL.Image.Image'>")],
),
],
)
def test_stable_diffusion_generation(record_forge_property, variant):
# Build Module Name
module_name = build_module_name(
Expand Down
14 changes: 13 additions & 1 deletion forge/test/models/pytorch/text/bart/test_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,19 @@ def forward(self, input_ids, attention_mask, decoder_input_ids):


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["facebook/bart-large-mnli"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"facebook/bart-large-mnli",
marks=[
pytest.mark.xfail(
reason="unique+common runtime args targeting kernel reader_concat_stick_layout_interleaved_start_id on (x=0,y=0) are too large. Max allowable is 256"
)
],
),
],
)
def test_pt_bart_classifier(record_forge_property, variant):
# Build Module Name
module_name = build_module_name(
Expand Down
14 changes: 13 additions & 1 deletion forge/test/models/pytorch/text/bloom/test_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,19 @@ def forward(self, input_ids, attention_mask):


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["bigscience/bloom-1b1"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"bigscience/bloom-1b1",
marks=[
pytest.mark.xfail(
reason="AssertionError: Data mismatch on output 0 between framework and Forge codegen"
)
],
),
],
)
def test_bloom(record_forge_property, variant):

# Build Module Name
Expand Down
9 changes: 7 additions & 2 deletions forge/test/models/pytorch/text/codegen/test_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@
from test.utils import download_model

variants = [
"Salesforce/codegen-350M-mono",
pytest.param(
"Salesforce/codegen-350M-mono",
marks=[
pytest.mark.xfail(reason="AssertionError: Data mismatch on output 0 between framework and Forge codegen")
],
),
"Salesforce/codegen-350M-multi",
"Salesforce/codegen-350M-nl",
]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_codegen(record_forge_property, variant):
if variant != "Salesforce/codegen-350M-mono":
pytest.skip("Skipping due to the current CI/CD pipeline limitations")
Expand Down
11 changes: 9 additions & 2 deletions forge/test/models/pytorch/text/distilbert/test_distilbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,18 @@
from test.models.utils import Framework, Source, Task, build_module_name
from test.utils import download_model

variants = ["distilbert-base-uncased", "distilbert-base-cased", "distilbert-base-multilingual-cased"]
variants = [
pytest.param(
"distilbert-base-uncased",
marks=[pytest.mark.xfail(reason="ttir.typecast op Result shape must match operand shapes after broadcasting")],
),
"distilbert-base-cased",
"distilbert-base-multilingual-cased",
]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_distilbert_masked_lm_pytorch(record_forge_property, variant):
if variant != "distilbert-base-uncased":
pytest.skip("Skipping due to the current CI/CD pipeline limitations")
Expand Down
3 changes: 3 additions & 0 deletions forge/test/models/pytorch/text/falcon/test_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def test_falcon_3(record_forge_property, variant):

if variant == "tiiuae/Falcon3-Mamba-7B-Base" or variant == "tiiuae/Falcon3-7B-Base":
pytest.skip("Insufficient host DRAM to run this model (requires a bit more than 36 GB)")
if variant == "tiiuae/Falcon3-3B-Base":
pytest.skip("Insufficient host DRAM to run this model (requires a bit more than 25 GB)")

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH, model="falcon3", variant=variant, task=Task.CAUSAL_LM, source=Source.HUGGINGFACE
Expand Down
14 changes: 13 additions & 1 deletion forge/test/models/pytorch/text/fuyu/test_fuyu_8b.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,19 @@


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["adept/fuyu-8b"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"adept/fuyu-8b",
marks=[
pytest.mark.xfail(
reason="[Optimization Graph Passes] RuntimeError: (i >= 0) && (i < (int)dims_.size()) Trying to access element outside of dimensions: 3"
)
],
),
],
)
def test_fuyu8b(record_forge_property, variant):
# Build Module Name
module_name = build_module_name(
Expand Down
22 changes: 20 additions & 2 deletions forge/test/models/pytorch/text/gpt2/test_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ def forward(self, input_ids, attention_mask):


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["gpt2"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"gpt2",
marks=[
pytest.mark.xfail(reason="RuntimeError: Tensor 6 - data type mismatch: expected Float32, got UInt8")
],
),
],
)
def test_gpt2_text_gen(record_forge_property, variant):
# Build Module Name
module_name = build_module_name(
Expand Down Expand Up @@ -62,7 +72,15 @@ def test_gpt2_text_gen(record_forge_property, variant):


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["mnoukhov/gpt2-imdb-sentiment-classifier"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"mnoukhov/gpt2-imdb-sentiment-classifier",
marks=[pytest.mark.xfail(reason="ttir.softmax op requires attribute 'dimension'")],
),
],
)
def test_gpt2_sequence_classification(record_forge_property, variant):

# Build Module Name
Expand Down
11 changes: 9 additions & 2 deletions forge/test/models/pytorch/text/gptneo/test_gptneo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,21 @@
from test.utils import download_model

variants = [
"EleutherAI/gpt-neo-125M",
pytest.param(
"EleutherAI/gpt-neo-125M",
marks=[
pytest.mark.xfail(
reason="AssertionError: Data mismatch on output 0 between Framework and Forge codegen(pcc=0.28)"
)
],
),
"EleutherAI/gpt-neo-1.3B",
"EleutherAI/gpt-neo-2.7B",
]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_gptneo_causal_lm(record_forge_property, variant):
if variant != "EleutherAI/gpt-neo-125M":
pytest.skip("Skipping due to the current CI/CD pipeline limitations")
Expand Down
9 changes: 8 additions & 1 deletion forge/test/models/pytorch/text/mamba/test_mamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ def forward(self, input_ids):


variants = [
"state-spaces/mamba-790m-hf",
pytest.param(
"state-spaces/mamba-790m-hf",
marks=[
pytest.mark.xfail(
reason="[TVM Relay IRModule Generation] Dimension mismatch: axes has 3 elements, but data.ndim = 6"
)
],
),
"state-spaces/mamba-2.8b-hf",
"state-spaces/mamba-1.4b-hf",
"state-spaces/mamba-370m-hf",
Expand Down
10 changes: 9 additions & 1 deletion forge/test/models/pytorch/text/nanogpt/test_nanogpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@ def forward(self, input_ids, attention_mask):


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["FinancialSupport/NanoGPT"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"FinancialSupport/NanoGPT",
marks=pytest.mark.xfail(reason="RuntimeError: Tensor 6 - data type mismatch: expected Float32, got UInt8"),
),
],
)
def test_nanogpt_text_generation(record_forge_property, variant):

# Build Module Name
Expand Down
19 changes: 15 additions & 4 deletions forge/test/models/pytorch/text/opt/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,22 @@
from test.models.utils import Framework, Source, Task, build_module_name
from test.utils import download_model

variants = ["facebook/opt-125m", "facebook/opt-350m", "facebook/opt-1.3b"]
variants = [
pytest.param(
"facebook/opt-125m",
marks=[
pytest.mark.xfail(
reason="unique+common runtime args targeting kernel reader_concat_stick_layout_interleaved_start_id on (x=0,y=0) are too large. Max allowable is 256"
)
],
),
"facebook/opt-350m",
"facebook/opt-1.3b",
]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_opt_causal_lm(record_forge_property, variant):
if variant != "facebook/opt-125m":
pytest.skip("Skipping due to the current CI/CD pipeline limitations")
Expand Down Expand Up @@ -71,7 +82,7 @@ def test_opt_causal_lm(record_forge_property, variant):


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_opt_qa(record_forge_property, variant):
pytest.skip("Skipping due to the current CI/CD pipeline limitations")

Expand Down Expand Up @@ -117,7 +128,7 @@ def test_opt_qa(record_forge_property, variant):


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_opt_sequence_classification(record_forge_property, variant):
pytest.skip("Skipping due to the current CI/CD pipeline limitations")

Expand Down
12 changes: 10 additions & 2 deletions forge/test/models/pytorch/text/phi2/test_phi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,19 @@

from test.models.utils import Framework, Source, Task, build_module_name

variants = ["microsoft/phi-2", "microsoft/phi-2-pytdml"]
variants = [
pytest.param(
"microsoft/phi-2",
marks=[
pytest.mark.xfail(reason="AssertionError: Data mismatch on output 0 between framework and Forge codegen")
],
),
"microsoft/phi-2-pytdml",
]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
def test_phi2_clm(record_forge_property, variant):
if variant != "microsoft/phi-2":
pytest.skip("Skipping due to the current CI/CD pipeline limitations")
Expand Down
10 changes: 9 additions & 1 deletion forge/test/models/pytorch/text/qwen/test_qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,15 @@


@pytest.mark.nightly
@pytest.mark.parametrize("variant", ["Qwen/Qwen1.5-0.5B"])
@pytest.mark.parametrize(
"variant",
[
pytest.param(
"Qwen/Qwen1.5-0.5B",
marks=[pytest.mark.xfail(reason="RuntimeError: Input count mismatch: expected 533, got 534")],
),
],
)
def test_qwen1_5_causal_lm(record_forge_property, variant):
# Build Module Name
module_name = build_module_name(
Expand Down
7 changes: 5 additions & 2 deletions forge/test/models/pytorch/text/qwen/test_qwen_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@

# Variants for testing
variants = [
"Qwen/Qwen2.5-Coder-0.5B",
pytest.param(
"Qwen/Qwen2.5-Coder-0.5B",
marks=[pytest.mark.xfail(reason="RuntimeError: Input count mismatch: expected 533, got 534")],
),
"Qwen/Qwen2.5-Coder-1.5B",
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
"Qwen/Qwen2.5-Coder-3B",
Expand All @@ -21,7 +24,7 @@
]


@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.parametrize("variant", variants)
@pytest.mark.nightly
def test_qwen_clm(record_forge_property, variant):
if variant != "Qwen/Qwen2.5-Coder-0.5B":
Expand Down
Loading

0 comments on commit 7790bdb

Please sign in to comment.