From 412a0f3df0cea4e7b8723160f32870418571d8f8 Mon Sep 17 00:00:00 2001
From: Ivan Kobzarev <ivankobzarev@meta.com>
Date: Wed, 22 Jan 2025 13:25:40 -0800
Subject: [PATCH] Fix mobilenetv2 inductor freezing fail_accuracy (#145296)

Summary:
Issue: https://github.com/pytorch/pytorch/issues/144891

inductor freezing effectively enables inductor conv-batchnorm fusion. This fusion increases the accuracy error.

More context about this: https://github.com/pytorch/pytorch/issues/120545
For Timm models that are run through benchmarks/dynamo/timm_models.py with TimsRunner the tolerance was increased here:
https://github.com/pytorch/pytorch/blob/main/benchmarks/dynamo/timm_models.py#L367

If to comment out  conv-batchnorm fusion as Elias suggested in Context issue, the accuracy is back.

=>
Increasing tolerace for mobilenetv2  to the same value via introducing the special configuration for tolerance for freezing only

X-link: https://github.com/pytorch/pytorch/pull/145296
Approved by: https://github.com/eellison, https://github.com/zou3519

Reviewed By: izaitsevfb

Differential Revision: D68509876

fbshipit-source-id: f748156d78541fe33da63bf5b9cb6a3bd4045294
---
 userbenchmark/dynamo/dynamobench/torchbench.py   | 7 +++++++
 userbenchmark/dynamo/dynamobench/torchbench.yaml | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/userbenchmark/dynamo/dynamobench/torchbench.py b/userbenchmark/dynamo/dynamobench/torchbench.py
index c127b00bac..e499c136cb 100755
--- a/userbenchmark/dynamo/dynamobench/torchbench.py
+++ b/userbenchmark/dynamo/dynamobench/torchbench.py
@@ -413,6 +413,13 @@ def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
         cosine = self.args.cosine
         # Increase the tolerance for torch allclose
         if self.args.float16 or self.args.amp:
+            if self.args.freezing and (freezing := self._tolerance["freezing"]):
+                higher_fp16 = freezing.get("higher_fp16", None)
+                even_higher = freezing.get("even_higher", None)
+                if higher_fp16 and name in higher_fp16:
+                    return 1e-2, cosine
+                elif even_higher and name in even_higher:
+                    return 8 * 1e-2, cosine
             if name in self._tolerance["higher_fp16"]:
                 return 1e-2, cosine
             elif name in self._tolerance["even_higher"]:
diff --git a/userbenchmark/dynamo/dynamobench/torchbench.yaml b/userbenchmark/dynamo/dynamobench/torchbench.yaml
index 0b9a083515..5647ab4900 100644
--- a/userbenchmark/dynamo/dynamobench/torchbench.yaml
+++ b/userbenchmark/dynamo/dynamobench/torchbench.yaml
@@ -54,6 +54,14 @@ tolerance:
     - drq
     - hf_Whisper
 
+  freezing:
+    # Similar logic to timm_models.py:get_tolerance_and_cosine_flag
+    # the conv-batchnorm fusion used under freezing may cause relatively
+    # large numerical difference. We need are larger tolerance.
+    # Check https://github.com/pytorch/pytorch/issues/120545 for context
+    even_higher:
+      - mobilenet_v2
+
   cosine: []
 
 require_larger_multiplier_for_smaller_tensor: