From 412a0f3df0cea4e7b8723160f32870418571d8f8 Mon Sep 17 00:00:00 2001 From: Ivan Kobzarev Date: Wed, 22 Jan 2025 13:25:40 -0800 Subject: [PATCH] Fix mobilenetv2 inductor freezing fail_accuracy (#145296) Summary: Issue: https://github.com/pytorch/pytorch/issues/144891 inductor freezing effectively enables inductor conv-batchnorm fusion. This fusion increases the accuracy error. More context about this: https://github.com/pytorch/pytorch/issues/120545 For Timm models that are run through benchmarks/dynamo/timm_models.py with TimsRunner the tolerance was increased here: https://github.com/pytorch/pytorch/blob/main/benchmarks/dynamo/timm_models.py#L367 If to comment out conv-batchnorm fusion as Elias suggested in Context issue, the accuracy is back. => Increasing tolerace for mobilenetv2 to the same value via introducing the special configuration for tolerance for freezing only X-link: https://github.com/pytorch/pytorch/pull/145296 Approved by: https://github.com/eellison, https://github.com/zou3519 Reviewed By: izaitsevfb Differential Revision: D68509876 fbshipit-source-id: f748156d78541fe33da63bf5b9cb6a3bd4045294 --- userbenchmark/dynamo/dynamobench/torchbench.py | 7 +++++++ userbenchmark/dynamo/dynamobench/torchbench.yaml | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/userbenchmark/dynamo/dynamobench/torchbench.py b/userbenchmark/dynamo/dynamobench/torchbench.py index c127b00bac..e499c136cb 100755 --- a/userbenchmark/dynamo/dynamobench/torchbench.py +++ b/userbenchmark/dynamo/dynamobench/torchbench.py @@ -413,6 +413,13 @@ def get_tolerance_and_cosine_flag(self, is_training, current_device, name): cosine = self.args.cosine # Increase the tolerance for torch allclose if self.args.float16 or self.args.amp: + if self.args.freezing and (freezing := self._tolerance["freezing"]): + higher_fp16 = freezing.get("higher_fp16", None) + even_higher = freezing.get("even_higher", None) + if higher_fp16 and name in higher_fp16: + return 1e-2, cosine + elif even_higher and name in even_higher: + return 8 * 1e-2, cosine if name in self._tolerance["higher_fp16"]: return 1e-2, cosine elif name in self._tolerance["even_higher"]: diff --git a/userbenchmark/dynamo/dynamobench/torchbench.yaml b/userbenchmark/dynamo/dynamobench/torchbench.yaml index 0b9a083515..5647ab4900 100644 --- a/userbenchmark/dynamo/dynamobench/torchbench.yaml +++ b/userbenchmark/dynamo/dynamobench/torchbench.yaml @@ -54,6 +54,14 @@ tolerance: - drq - hf_Whisper + freezing: + # Similar logic to timm_models.py:get_tolerance_and_cosine_flag + # the conv-batchnorm fusion used under freezing may cause relatively + # large numerical difference. We need are larger tolerance. + # Check https://github.com/pytorch/pytorch/issues/120545 for context + even_higher: + - mobilenet_v2 + cosine: [] require_larger_multiplier_for_smaller_tensor: