From c30c4a89895b1fc344407ba65b14e4c0eea4d25d Mon Sep 17 00:00:00 2001 From: Neil Xie Date: Thu, 23 Jan 2025 22:11:03 -0800 Subject: [PATCH] Don't fail the workflow when failed to emit metrics for one domain (#6640) * Don't fail the workflow when failed to emit metrics for one domain in ES analyzer * Return error when all domains fail to emit metrics --- .../worker/esanalyzer/domainWorkflowTypeCountWorkflow.go | 7 ++++++- service/worker/esanalyzer/workflow.go | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/service/worker/esanalyzer/domainWorkflowTypeCountWorkflow.go b/service/worker/esanalyzer/domainWorkflowTypeCountWorkflow.go index ddf48d888fe..7445f54ffbb 100644 --- a/service/worker/esanalyzer/domainWorkflowTypeCountWorkflow.go +++ b/service/worker/esanalyzer/domainWorkflowTypeCountWorkflow.go @@ -148,6 +148,7 @@ func (w *Workflow) emitWorkflowTypeCountMetrics(ctx context.Context) error { if err != nil { return err } + var failedDomains []string for _, domainName := range workflowMetricDomainNames { switch w.analyzer.readMode { case ES: @@ -158,9 +159,13 @@ func (w *Workflow) emitWorkflowTypeCountMetrics(ctx context.Context) error { err = w.emitWorkflowTypeCountMetricsES(ctx, domainName, logger) } if err != nil { - return err + logger.Error(fmt.Sprintf("failed to emit workflow type metrics for domain %s", domainName), zap.Error(err)) + failedDomains = append(failedDomains, domainName) } } + if len(failedDomains) == len(workflowMetricDomainNames) { + return fmt.Errorf("failed to emit workflow type metrics for all domains") + } } return nil } diff --git a/service/worker/esanalyzer/workflow.go b/service/worker/esanalyzer/workflow.go index 651e8b54362..88b502846fc 100644 --- a/service/worker/esanalyzer/workflow.go +++ b/service/worker/esanalyzer/workflow.go @@ -197,6 +197,7 @@ func (w *Workflow) emitWorkflowVersionMetrics(ctx context.Context) error { if err != nil { return err } + var failedDomains []string for _, domainName := range workflowMetricDomainNames { switch w.analyzer.readMode { case ES: @@ -207,9 +208,13 @@ func (w *Workflow) emitWorkflowVersionMetrics(ctx context.Context) error { err = w.emitWorkflowVersionMetricsES(ctx, domainName, logger) } if err != nil { - return err + logger.Error(fmt.Sprintf("failed to emit workflow version metrics for domain %s", domainName), zap.Error(err)) + failedDomains = append(failedDomains, domainName) } } + if len(failedDomains) == len(workflowMetricDomainNames) { + return fmt.Errorf("failed to emit workflow version metrics for all domains") + } } return nil }