From e0715f9735b543901c0963d7239fd9ba8d9de8b7 Mon Sep 17 00:00:00 2001 From: Onur Kayabasi Date: Mon, 10 Nov 2025 07:01:16 +0100 Subject: [PATCH 1/4] Failsafe RetryPolicy instrumentation added --- .../failsafe/v3_0/FailsafeTelemetry.java | 47 ++++++++++ .../RetryPolicyEventListenerBuilders.java | 53 +++++++++++ .../failsafe/v3_0/FailsafeTelemetryTest.java | 91 +++++++++++++++++++ 3 files changed, 191 insertions(+) create mode 100644 instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java diff --git a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java index 94c05f1dba5f..fc488bbb06e0 100644 --- a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java +++ b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java @@ -13,11 +13,16 @@ import dev.failsafe.CircuitBreaker; import dev.failsafe.CircuitBreakerConfig; +import dev.failsafe.RetryPolicy; +import dev.failsafe.RetryPolicyConfig; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.common.AttributeKey; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongHistogram; import io.opentelemetry.api.metrics.Meter; +import java.util.stream.Collectors; +import java.util.stream.LongStream; /** Entrypoint for instrumenting Failsafe components. */ public final class FailsafeTelemetry { @@ -25,6 +30,8 @@ public final class FailsafeTelemetry { private static final AttributeKey CIRCUIT_BREAKER_NAME = AttributeKey.stringKey("failsafe.circuit_breaker.name"); + private static final AttributeKey RETRY_POLICY_NAME = + AttributeKey.stringKey("failsafe.retry_policy.name"); /** Returns a new {@link FailsafeTelemetry} configured with the given {@link OpenTelemetry}. */ public static FailsafeTelemetry create(OpenTelemetry openTelemetry) { @@ -70,4 +77,44 @@ public CircuitBreaker createCircuitBreaker( .onClose(buildInstrumentedCloseListener(userConfig, stateChangesCounter, attributes)) .build(); } + + /** + * Returns an instrumented {@link RetryPolicy} by given values. + * + * @param delegate user configured {@link RetryPolicy} to be instrumented + * @param retryPolicyName identifier of given {@link RetryPolicy} + * @param {@link RetryPolicy}'s result type + * @return instrumented {@link RetryPolicy} + */ + public RetryPolicy createRetryPolicy(RetryPolicy delegate, String retryPolicyName) { + RetryPolicyConfig userConfig = delegate.getConfig(); + Meter meter = openTelemetry.getMeter(INSTRUMENTATION_NAME); + LongCounter executionCounter = + meter + .counterBuilder("failsafe.retry_policy.execution.count") + .setDescription( + "Count of execution events processed by the retry policy. " + + "Each event represents one complete execution flow (initial attempt + any retries). " + + "This metric does not count individual retry attempts - it counts each time the policy is invoked.") + .build(); + LongHistogram attemptsHistogram = + meter + .histogramBuilder("failsafe.retry_policy.attempts") + .setDescription("Histogram of number of attempts for each execution.") + .ofLongs() + .setExplicitBucketBoundariesAdvice( + LongStream.range(1, userConfig.getMaxAttempts() + 1) + .boxed() + .collect(Collectors.toList())) + .build(); + Attributes attributes = Attributes.of(RETRY_POLICY_NAME, retryPolicyName); + return RetryPolicy.builder(userConfig) + .onFailure( + RetryPolicyEventListenerBuilders.buildInstrumentedFailureListener( + userConfig, executionCounter, attemptsHistogram, attributes)) + .onSuccess( + RetryPolicyEventListenerBuilders.buildInstrumentedSuccessListener( + userConfig, executionCounter, attemptsHistogram, attributes)) + .build(); + } } diff --git a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java new file mode 100644 index 000000000000..dc7cfeb3e455 --- /dev/null +++ b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java @@ -0,0 +1,53 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.instrumentation.failsafe.v3_0; + +import static io.opentelemetry.api.common.AttributeKey.stringKey; + +import dev.failsafe.RetryPolicyConfig; +import dev.failsafe.event.EventListener; +import dev.failsafe.event.ExecutionCompletedEvent; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongHistogram; + +final class RetryPolicyEventListenerBuilders { + private static final AttributeKey OUTCOME_KEY = + stringKey("failsafe.retry_policy.outcome"); + + private RetryPolicyEventListenerBuilders() {} + + static EventListener> buildInstrumentedFailureListener( + RetryPolicyConfig userConfig, + LongCounter executionCounter, + LongHistogram attemptsHistogram, + Attributes commonAttributes) { + Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "failure").build(); + return e -> { + executionCounter.add(1, attributes); + attemptsHistogram.record(e.getAttemptCount(), attributes); + if (userConfig.getFailureListener() != null) { + userConfig.getFailureListener().accept(e); + } + }; + } + + static EventListener> buildInstrumentedSuccessListener( + RetryPolicyConfig userConfig, + LongCounter executionCounter, + LongHistogram attemptsHistogram, + Attributes commonAttributes) { + Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "success").build(); + return e -> { + executionCounter.add(1, attributes); + attemptsHistogram.record(e.getAttemptCount(), attributes); + if (userConfig.getFailureListener() != null) { + userConfig.getFailureListener().accept(e); + } + }; + } +} diff --git a/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java b/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java index 78038503e49d..5fe45a041cac 100644 --- a/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java +++ b/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java @@ -11,12 +11,20 @@ import dev.failsafe.CircuitBreaker; import dev.failsafe.CircuitBreakerOpenException; import dev.failsafe.Failsafe; +import dev.failsafe.RetryPolicy; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.instrumentation.testing.junit.InstrumentationExtension; import io.opentelemetry.instrumentation.testing.junit.LibraryInstrumentationExtension; +import io.opentelemetry.sdk.metrics.data.HistogramData; +import io.opentelemetry.sdk.metrics.data.HistogramPointData; +import io.opentelemetry.sdk.metrics.data.LongPointData; +import io.opentelemetry.sdk.metrics.data.SumData; import io.opentelemetry.sdk.testing.assertj.LongPointAssert; import java.time.Duration; +import java.util.Arrays; +import java.util.Collection; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.RegisterExtension; @@ -80,6 +88,82 @@ void captureCircuitBreakerMetrics() { 1, "failsafe.circuit_breaker.state", "closed")))); } + @Test + void captureRetryPolicyMetrics() { + // given + RetryPolicy userRetryPolicy = + dev.failsafe.RetryPolicy.builder() + .handleResultIf(Objects::isNull) + .withMaxAttempts(3) + .build(); + FailsafeTelemetry failsafeTelemetry = FailsafeTelemetry.create(testing.getOpenTelemetry()); + RetryPolicy instrumentedRetryPolicy = + failsafeTelemetry.createRetryPolicy(userRetryPolicy, "testing"); + + // when + for (int i = 0; i <= 3; i++) { + int temp = i; + AtomicInteger retry = new AtomicInteger(0); + Failsafe.with(instrumentedRetryPolicy) + .get( + () -> { + if (retry.get() < temp) { + retry.incrementAndGet(); + return null; + } else { + return new Object(); + } + }); + } + + // then + testing.waitAndAssertMetrics("io.opentelemetry.failsafe-3.0"); + assertThat(testing.metrics().size()).isEqualTo(2); + + SumData executionCountMetric = + testing.metrics().stream() + .filter(m -> m.getName().equals("failsafe.retry_policy.execution.count")) + .findFirst() + .get() + .getLongSumData(); + assertThat(executionCountMetric.getPoints().size()).isEqualTo(2); + assertThat(executionCountMetric.getPoints()) + .anyMatch( + p -> + p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure")) + && p.getValue() == 1); + assertThat(executionCountMetric.getPoints()) + .anyMatch( + p -> + p.getAttributes().equals(buildExpectedRetryPolicyAttributes("success")) + && p.getValue() == 3); + + HistogramData attemptsMetric = + testing.metrics().stream() + .filter(m -> m.getName().equals("failsafe.retry_policy.attempts")) + .findFirst() + .get() + .getHistogramData(); + Collection pointData = attemptsMetric.getPoints(); + assertThat(pointData).hasSize(2); + assertThat(pointData) + .anyMatch( + p -> + p.getCount() == 3 + && p.getMin() == 1 + && p.getMax() == 3 + && p.getAttributes().equals(buildExpectedRetryPolicyAttributes("success")) + && Arrays.equals(p.getCounts().toArray(), new Long[] {1L, 1L, 1L, 0L})); + assertThat(pointData) + .anyMatch( + p -> + p.getCount() == 1 + && p.getMin() == 3 + && p.getMax() == 3 + && p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure")) + && Arrays.equals(p.getCounts().toArray(), new Long[] {0L, 0L, 1L, 0L})); + } + private static Consumer buildCircuitBreakerAssertion( long expectedValue, String expectedAttributeKey, String expectedAttributeValue) { return longSumAssert -> @@ -94,4 +178,11 @@ private static Consumer buildCircuitBreakerAssertion( .build(), attributes)); } + + private static Attributes buildExpectedRetryPolicyAttributes(String expectedOutcome) { + return Attributes.builder() + .put("failsafe.retry_policy.name", "testing") + .put("failsafe.retry_policy.outcome", expectedOutcome) + .build(); + } } From d2e8c60d4372f4098dbbe3226186dcf04b3a8583 Mon Sep 17 00:00:00 2001 From: Onur Kayabasi Date: Mon, 24 Nov 2025 06:45:04 +0100 Subject: [PATCH 2/4] Review comments addressed --- .../failsafe/v3_0/FailsafeTelemetry.java | 8 ++------ .../failsafe/v3_0/FailsafeTelemetryTest.java | 10 +++++----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java index fc488bbb06e0..6dbd2394835e 100644 --- a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java +++ b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java @@ -21,8 +21,7 @@ import io.opentelemetry.api.metrics.LongCounter; import io.opentelemetry.api.metrics.LongHistogram; import io.opentelemetry.api.metrics.Meter; -import java.util.stream.Collectors; -import java.util.stream.LongStream; +import java.util.Arrays; /** Entrypoint for instrumenting Failsafe components. */ public final class FailsafeTelemetry { @@ -102,10 +101,7 @@ public RetryPolicy createRetryPolicy(RetryPolicy delegate, String retr .histogramBuilder("failsafe.retry_policy.attempts") .setDescription("Histogram of number of attempts for each execution.") .ofLongs() - .setExplicitBucketBoundariesAdvice( - LongStream.range(1, userConfig.getMaxAttempts() + 1) - .boxed() - .collect(Collectors.toList())) + .setExplicitBucketBoundariesAdvice(Arrays.asList(1L, 2L, 3L, 5L)) .build(); Attributes attributes = Attributes.of(RETRY_POLICY_NAME, retryPolicyName); return RetryPolicy.builder(userConfig) diff --git a/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java b/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java index 5fe45a041cac..e9b51dc4926d 100644 --- a/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java +++ b/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java @@ -101,7 +101,7 @@ void captureRetryPolicyMetrics() { failsafeTelemetry.createRetryPolicy(userRetryPolicy, "testing"); // when - for (int i = 0; i <= 3; i++) { + for (int i = 0; i <= 4; i++) { int temp = i; AtomicInteger retry = new AtomicInteger(0); Failsafe.with(instrumentedRetryPolicy) @@ -131,7 +131,7 @@ void captureRetryPolicyMetrics() { .anyMatch( p -> p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure")) - && p.getValue() == 1); + && p.getValue() == 2); assertThat(executionCountMetric.getPoints()) .anyMatch( p -> @@ -153,15 +153,15 @@ void captureRetryPolicyMetrics() { && p.getMin() == 1 && p.getMax() == 3 && p.getAttributes().equals(buildExpectedRetryPolicyAttributes("success")) - && Arrays.equals(p.getCounts().toArray(), new Long[] {1L, 1L, 1L, 0L})); + && Arrays.equals(p.getCounts().toArray(), new Long[] {1L, 1L, 1L, 0L, 0L})); assertThat(pointData) .anyMatch( p -> - p.getCount() == 1 + p.getCount() == 2 && p.getMin() == 3 && p.getMax() == 3 && p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure")) - && Arrays.equals(p.getCounts().toArray(), new Long[] {0L, 0L, 1L, 0L})); + && Arrays.equals(p.getCounts().toArray(), new Long[] {0L, 0L, 2L, 0L, 0L})); } private static Consumer buildCircuitBreakerAssertion( From 10242277f8a806982c6106fbd2ad6c92a3d34f2a Mon Sep 17 00:00:00 2001 From: Onur Kayabasi Date: Mon, 24 Nov 2025 07:03:37 +0100 Subject: [PATCH 3/4] Minor fix --- .../v3_0/RetryPolicyEventListenerBuilders.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java index dc7cfeb3e455..344e17e03d94 100644 --- a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java +++ b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java @@ -27,11 +27,12 @@ static EventListener> buildInstrumentedFailureLis LongHistogram attemptsHistogram, Attributes commonAttributes) { Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "failure").build(); + EventListener> userFailureListener = userConfig.getFailureListener(); return e -> { executionCounter.add(1, attributes); attemptsHistogram.record(e.getAttemptCount(), attributes); - if (userConfig.getFailureListener() != null) { - userConfig.getFailureListener().accept(e); + if (userFailureListener != null) { + userFailureListener.accept(e); } }; } @@ -42,11 +43,12 @@ static EventListener> buildInstrumentedSuccessLis LongHistogram attemptsHistogram, Attributes commonAttributes) { Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "success").build(); + EventListener> userSuccessListener = userConfig.getSuccessListener(); return e -> { executionCounter.add(1, attributes); attemptsHistogram.record(e.getAttemptCount(), attributes); - if (userConfig.getFailureListener() != null) { - userConfig.getFailureListener().accept(e); + if (userSuccessListener != null) { + userSuccessListener.accept(e); } }; } From 8456c0533ffc52aed02a5449fd85e282c3c20082 Mon Sep 17 00:00:00 2001 From: Onur Kayabasi Date: Fri, 28 Nov 2025 07:12:35 +0100 Subject: [PATCH 4/4] Review comments addressed --- .../instrumentation/failsafe/v3_0/FailsafeTelemetry.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java index 6dbd2394835e..c9b358c6baf1 100644 --- a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java +++ b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java @@ -92,14 +92,14 @@ public RetryPolicy createRetryPolicy(RetryPolicy delegate, String retr meter .counterBuilder("failsafe.retry_policy.execution.count") .setDescription( - "Count of execution events processed by the retry policy. " - + "Each event represents one complete execution flow (initial attempt + any retries). " - + "This metric does not count individual retry attempts - it counts each time the policy is invoked.") + "Count of execution attempts processed by the retry policy, " + + "where one execution represents the total number of attempts.") + .setUnit("{execution}") .build(); LongHistogram attemptsHistogram = meter .histogramBuilder("failsafe.retry_policy.attempts") - .setDescription("Histogram of number of attempts for each execution.") + .setDescription("Number of attempts for each execution.") .ofLongs() .setExplicitBucketBoundariesAdvice(Arrays.asList(1L, 2L, 3L, 5L)) .build();