|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +# Copyright The OpenTelemetry Authors |
| 4 | +# |
| 5 | +# SPDX-License-Identifier: Apache-2.0 |
| 6 | + |
| 7 | +require 'opentelemetry/common' |
| 8 | +require 'opentelemetry/sdk' |
| 9 | +require 'net/http' |
| 10 | +require 'csv' |
| 11 | +require 'zlib' |
| 12 | + |
| 13 | +require 'google/rpc/status_pb' |
| 14 | + |
| 15 | +require 'opentelemetry/proto/common/v1/common_pb' |
| 16 | +require 'opentelemetry/proto/resource/v1/resource_pb' |
| 17 | +require 'opentelemetry/proto/metrics/v1/metrics_pb' |
| 18 | +require 'opentelemetry/proto/collector/metrics/v1/metrics_service_pb' |
| 19 | + |
| 20 | +require 'opentelemetry/metrics' |
| 21 | +require 'opentelemetry/sdk/metrics' |
| 22 | + |
| 23 | +require_relative './util' |
| 24 | + |
| 25 | +module OpenTelemetry |
| 26 | + module Exporter |
| 27 | + module OTLP |
| 28 | + # An OpenTelemetry metrics exporter that sends metrics over HTTP as Protobuf encoded OTLP ExportMetricsServiceRequest. |
| 29 | + class MetricsExporter < ::OpenTelemetry::SDK::Metrics::Export::MetricReader |
| 30 | + include Util |
| 31 | + |
| 32 | + attr_reader :metric_snapshots |
| 33 | + |
| 34 | + SUCCESS = OpenTelemetry::SDK::Metrics::Export::SUCCESS |
| 35 | + FAILURE = OpenTelemetry::SDK::Metrics::Export::FAILURE |
| 36 | + private_constant(:SUCCESS, :FAILURE) |
| 37 | + |
| 38 | + WRITE_TIMEOUT_SUPPORTED = Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.6') |
| 39 | + private_constant(:WRITE_TIMEOUT_SUPPORTED) |
| 40 | + |
| 41 | + def self.ssl_verify_mode |
| 42 | + if ENV.key?('OTEL_RUBY_EXPORTER_OTLP_SSL_VERIFY_PEER') |
| 43 | + OpenSSL::SSL::VERIFY_PEER |
| 44 | + elsif ENV.key?('OTEL_RUBY_EXPORTER_OTLP_SSL_VERIFY_NONE') |
| 45 | + OpenSSL::SSL::VERIFY_NONE |
| 46 | + else |
| 47 | + OpenSSL::SSL::VERIFY_PEER |
| 48 | + end |
| 49 | + end |
| 50 | + |
| 51 | + def initialize(endpoint: OpenTelemetry::Common::Utilities.config_opt('OTEL_EXPORTER_OTLP_METRICS_ENDPOINT', 'OTEL_EXPORTER_OTLP_ENDPOINT', default: 'http://localhost:4318/v1/metrics'), |
| 52 | + certificate_file: OpenTelemetry::Common::Utilities.config_opt('OTEL_EXPORTER_OTLP_METRICS_CERTIFICATE', 'OTEL_EXPORTER_OTLP_CERTIFICATE'), |
| 53 | + ssl_verify_mode: MetricsExporter.ssl_verify_mode, |
| 54 | + headers: OpenTelemetry::Common::Utilities.config_opt('OTEL_EXPORTER_OTLP_METRICS_HEADERS', 'OTEL_EXPORTER_OTLP_HEADERS', default: {}), |
| 55 | + compression: OpenTelemetry::Common::Utilities.config_opt('OTEL_EXPORTER_OTLP_METRICS_COMPRESSION', 'OTEL_EXPORTER_OTLP_COMPRESSION', default: 'gzip'), |
| 56 | + timeout: OpenTelemetry::Common::Utilities.config_opt('OTEL_EXPORTER_OTLP_METRICS_TIMEOUT', 'OTEL_EXPORTER_OTLP_TIMEOUT', default: 10)) |
| 57 | + raise ArgumentError, "invalid url for OTLP::MetricsExporter #{endpoint}" unless OpenTelemetry::Common::Utilities.valid_url?(endpoint) |
| 58 | + raise ArgumentError, "unsupported compression key #{compression}" unless compression.nil? || %w[gzip none].include?(compression) |
| 59 | + |
| 60 | + # create the MetricStore object |
| 61 | + super() |
| 62 | + |
| 63 | + @uri = if endpoint == ENV['OTEL_EXPORTER_OTLP_ENDPOINT'] |
| 64 | + URI.join(endpoint, 'v1/metrics') |
| 65 | + else |
| 66 | + URI(endpoint) |
| 67 | + end |
| 68 | + |
| 69 | + @http = http_connection(@uri, ssl_verify_mode, certificate_file) |
| 70 | + |
| 71 | + @path = @uri.path |
| 72 | + @headers = prepare_headers(headers) |
| 73 | + @timeout = timeout.to_f |
| 74 | + @compression = compression |
| 75 | + @mutex = Mutex.new |
| 76 | + @shutdown = false |
| 77 | + end |
| 78 | + |
| 79 | + # consolidate the metrics data into the form of MetricData |
| 80 | + # |
| 81 | + # return MetricData |
| 82 | + def pull |
| 83 | + export(collect) |
| 84 | + end |
| 85 | + |
| 86 | + # metrics Array[MetricData] |
| 87 | + def export(metrics, timeout: nil) |
| 88 | + @mutex.synchronize do |
| 89 | + send_bytes(encode(metrics), timeout: timeout) |
| 90 | + end |
| 91 | + end |
| 92 | + |
| 93 | + def send_bytes(bytes, timeout:) |
| 94 | + return FAILURE if bytes.nil? |
| 95 | + |
| 96 | + request = Net::HTTP::Post.new(@path) |
| 97 | + |
| 98 | + if @compression == 'gzip' |
| 99 | + request.add_field('Content-Encoding', 'gzip') |
| 100 | + body = Zlib.gzip(bytes) |
| 101 | + else |
| 102 | + body = bytes |
| 103 | + end |
| 104 | + |
| 105 | + request.body = body |
| 106 | + request.add_field('Content-Type', 'application/x-protobuf') |
| 107 | + @headers.each { |key, value| request.add_field(key, value) } |
| 108 | + |
| 109 | + retry_count = 0 |
| 110 | + timeout ||= @timeout |
| 111 | + start_time = OpenTelemetry::Common::Utilities.timeout_timestamp |
| 112 | + |
| 113 | + around_request do |
| 114 | + remaining_timeout = OpenTelemetry::Common::Utilities.maybe_timeout(timeout, start_time) |
| 115 | + return FAILURE if remaining_timeout.zero? |
| 116 | + |
| 117 | + @http.open_timeout = remaining_timeout |
| 118 | + @http.read_timeout = remaining_timeout |
| 119 | + @http.write_timeout = remaining_timeout if WRITE_TIMEOUT_SUPPORTED |
| 120 | + @http.start unless @http.started? |
| 121 | + response = measure_request_duration { @http.request(request) } |
| 122 | + case response |
| 123 | + when Net::HTTPOK |
| 124 | + response.body # Read and discard body |
| 125 | + SUCCESS |
| 126 | + when Net::HTTPServiceUnavailable, Net::HTTPTooManyRequests |
| 127 | + response.body # Read and discard body |
| 128 | + redo if backoff?(retry_after: response['Retry-After'], retry_count: retry_count += 1, reason: response.code) |
| 129 | + OpenTelemetry.logger.warn('Net::HTTPServiceUnavailable/Net::HTTPTooManyRequests in MetricsExporter#send_bytes') |
| 130 | + FAILURE |
| 131 | + when Net::HTTPRequestTimeOut, Net::HTTPGatewayTimeOut, Net::HTTPBadGateway |
| 132 | + response.body # Read and discard body |
| 133 | + redo if backoff?(retry_count: retry_count += 1, reason: response.code) |
| 134 | + OpenTelemetry.logger.warn('Net::HTTPRequestTimeOut/Net::HTTPGatewayTimeOut/Net::HTTPBadGateway in MetricsExporter#send_bytes') |
| 135 | + FAILURE |
| 136 | + when Net::HTTPNotFound |
| 137 | + OpenTelemetry.handle_error(message: "OTLP metrics_exporter received http.code=404 for uri: '#{@path}'") |
| 138 | + FAILURE |
| 139 | + when Net::HTTPBadRequest, Net::HTTPClientError, Net::HTTPServerError |
| 140 | + log_status(response.body) |
| 141 | + OpenTelemetry.logger.warn('Net::HTTPBadRequest/Net::HTTPClientError/Net::HTTPServerError in MetricsExporter#send_bytes') |
| 142 | + FAILURE |
| 143 | + when Net::HTTPRedirection |
| 144 | + @http.finish |
| 145 | + handle_redirect(response['location']) |
| 146 | + redo if backoff?(retry_after: 0, retry_count: retry_count += 1, reason: response.code) |
| 147 | + else |
| 148 | + @http.finish |
| 149 | + OpenTelemetry.logger.warn("Unexpected error in OTLP::MetricsExporter#send_bytes - #{response.message}") |
| 150 | + FAILURE |
| 151 | + end |
| 152 | + rescue Net::OpenTimeout, Net::ReadTimeout |
| 153 | + retry if backoff?(retry_count: retry_count += 1, reason: 'timeout') |
| 154 | + OpenTelemetry.logger.warn('Net::OpenTimeout/Net::ReadTimeout in MetricsExporter#send_bytes') |
| 155 | + return FAILURE |
| 156 | + rescue OpenSSL::SSL::SSLError |
| 157 | + retry if backoff?(retry_count: retry_count += 1, reason: 'openssl_error') |
| 158 | + OpenTelemetry.logger.warn('OpenSSL::SSL::SSLError in MetricsExporter#send_bytes') |
| 159 | + return FAILURE |
| 160 | + rescue SocketError |
| 161 | + retry if backoff?(retry_count: retry_count += 1, reason: 'socket_error') |
| 162 | + OpenTelemetry.logger.warn('SocketError in MetricsExporter#send_bytes') |
| 163 | + return FAILURE |
| 164 | + rescue SystemCallError => e |
| 165 | + retry if backoff?(retry_count: retry_count += 1, reason: e.class.name) |
| 166 | + OpenTelemetry.logger.warn('SystemCallError in MetricsExporter#send_bytes') |
| 167 | + return FAILURE |
| 168 | + rescue EOFError |
| 169 | + retry if backoff?(retry_count: retry_count += 1, reason: 'eof_error') |
| 170 | + OpenTelemetry.logger.warn('EOFError in MetricsExporter#send_bytes') |
| 171 | + return FAILURE |
| 172 | + rescue Zlib::DataError |
| 173 | + retry if backoff?(retry_count: retry_count += 1, reason: 'zlib_error') |
| 174 | + OpenTelemetry.logger.warn('Zlib::DataError in MetricsExporter#send_bytes') |
| 175 | + return FAILURE |
| 176 | + rescue StandardError => e |
| 177 | + OpenTelemetry.handle_error(exception: e, message: 'unexpected error in OTLP::MetricsExporter#send_bytes') |
| 178 | + return FAILURE |
| 179 | + end |
| 180 | + ensure |
| 181 | + # Reset timeouts to defaults for the next call. |
| 182 | + @http.open_timeout = @timeout |
| 183 | + @http.read_timeout = @timeout |
| 184 | + @http.write_timeout = @timeout if WRITE_TIMEOUT_SUPPORTED |
| 185 | + end |
| 186 | + |
| 187 | + def encode(metrics_data) |
| 188 | + Opentelemetry::Proto::Collector::Metrics::V1::ExportMetricsServiceRequest.encode( |
| 189 | + Opentelemetry::Proto::Collector::Metrics::V1::ExportMetricsServiceRequest.new( |
| 190 | + resource_metrics: metrics_data |
| 191 | + .group_by(&:resource) |
| 192 | + .map do |resource, scope_metrics| |
| 193 | + Opentelemetry::Proto::Metrics::V1::ResourceMetrics.new( |
| 194 | + resource: Opentelemetry::Proto::Resource::V1::Resource.new( |
| 195 | + attributes: resource.attribute_enumerator.map { |key, value| as_otlp_key_value(key, value) } |
| 196 | + ), |
| 197 | + scope_metrics: scope_metrics |
| 198 | + .group_by(&:instrumentation_scope) |
| 199 | + .map do |instrumentation_scope, metrics| |
| 200 | + Opentelemetry::Proto::Metrics::V1::ScopeMetrics.new( |
| 201 | + scope: Opentelemetry::Proto::Common::V1::InstrumentationScope.new( |
| 202 | + name: instrumentation_scope.name, |
| 203 | + version: instrumentation_scope.version |
| 204 | + ), |
| 205 | + metrics: metrics.map { |sd| as_otlp_metrics(sd) } |
| 206 | + ) |
| 207 | + end |
| 208 | + ) |
| 209 | + end |
| 210 | + ) |
| 211 | + ) |
| 212 | + rescue StandardError => e |
| 213 | + OpenTelemetry.handle_error(exception: e, message: 'unexpected error in OTLP::MetricsExporter#encode') |
| 214 | + nil |
| 215 | + end |
| 216 | + |
| 217 | + # metrics_pb has following type of data: :gauge, :sum, :histogram, :exponential_histogram, :summary |
| 218 | + # current metric sdk only implements instrument: :counter -> :sum, :histogram -> :histogram |
| 219 | + # |
| 220 | + # metrics [MetricData] |
| 221 | + def as_otlp_metrics(metrics) |
| 222 | + case metrics.instrument_kind |
| 223 | + when :observable_gauge |
| 224 | + Opentelemetry::Proto::Metrics::V1::Metric.new( |
| 225 | + name: metrics.name, |
| 226 | + description: metrics.description, |
| 227 | + unit: metrics.unit, |
| 228 | + gauge: Opentelemetry::Proto::Metrics::V1::Gauge.new( |
| 229 | + aggregation_temporality: as_otlp_aggregation_temporality(metrics.aggregation_temporality), |
| 230 | + data_points: metrics.data_points.map do |ndp| |
| 231 | + number_data_point(ndp) |
| 232 | + end |
| 233 | + ) |
| 234 | + ) |
| 235 | + |
| 236 | + when :counter, :up_down_counter |
| 237 | + Opentelemetry::Proto::Metrics::V1::Metric.new( |
| 238 | + name: metrics.name, |
| 239 | + description: metrics.description, |
| 240 | + unit: metrics.unit, |
| 241 | + sum: Opentelemetry::Proto::Metrics::V1::Sum.new( |
| 242 | + aggregation_temporality: as_otlp_aggregation_temporality(metrics.aggregation_temporality), |
| 243 | + data_points: metrics.data_points.map do |ndp| |
| 244 | + number_data_point(ndp) |
| 245 | + end |
| 246 | + ) |
| 247 | + ) |
| 248 | + |
| 249 | + when :histogram |
| 250 | + Opentelemetry::Proto::Metrics::V1::Metric.new( |
| 251 | + name: metrics.name, |
| 252 | + description: metrics.description, |
| 253 | + unit: metrics.unit, |
| 254 | + histogram: Opentelemetry::Proto::Metrics::V1::Histogram.new( |
| 255 | + aggregation_temporality: as_otlp_aggregation_temporality(metrics.aggregation_temporality), |
| 256 | + data_points: metrics.data_points.map do |hdp| |
| 257 | + histogram_data_point(hdp) |
| 258 | + end |
| 259 | + ) |
| 260 | + ) |
| 261 | + end |
| 262 | + end |
| 263 | + |
| 264 | + def as_otlp_aggregation_temporality(type) |
| 265 | + case type |
| 266 | + when :delta then Opentelemetry::Proto::Metrics::V1::AggregationTemporality::AGGREGATION_TEMPORALITY_DELTA |
| 267 | + when :cumulative then Opentelemetry::Proto::Metrics::V1::AggregationTemporality::AGGREGATION_TEMPORALITY_CUMULATIVE |
| 268 | + else Opentelemetry::Proto::Metrics::V1::AggregationTemporality::AGGREGATION_TEMPORALITY_UNSPECIFIED |
| 269 | + end |
| 270 | + end |
| 271 | + |
| 272 | + def histogram_data_point(hdp) |
| 273 | + Opentelemetry::Proto::Metrics::V1::HistogramDataPoint.new( |
| 274 | + attributes: hdp.attributes.map { |k, v| as_otlp_key_value(k, v) }, |
| 275 | + start_time_unix_nano: hdp.start_time_unix_nano, |
| 276 | + time_unix_nano: hdp.time_unix_nano, |
| 277 | + count: hdp.count, |
| 278 | + sum: hdp.sum, |
| 279 | + bucket_counts: hdp.bucket_counts, |
| 280 | + explicit_bounds: hdp.explicit_bounds, |
| 281 | + exemplars: hdp.exemplars, |
| 282 | + min: hdp.min, |
| 283 | + max: hdp.max |
| 284 | + ) |
| 285 | + end |
| 286 | + |
| 287 | + def number_data_point(ndp) |
| 288 | + Opentelemetry::Proto::Metrics::V1::NumberDataPoint.new( |
| 289 | + attributes: ndp.attributes.map { |k, v| as_otlp_key_value(k, v) }, |
| 290 | + as_int: ndp.value, |
| 291 | + start_time_unix_nano: ndp.start_time_unix_nano, |
| 292 | + time_unix_nano: ndp.time_unix_nano, |
| 293 | + exemplars: ndp.exemplars # exemplars not implemented yet from metrics sdk |
| 294 | + ) |
| 295 | + end |
| 296 | + |
| 297 | + # may not need this |
| 298 | + def reset |
| 299 | + SUCCESS |
| 300 | + end |
| 301 | + |
| 302 | + def shutdown(timeout: nil) |
| 303 | + @shutdown = true |
| 304 | + SUCCESS |
| 305 | + end |
| 306 | + end |
| 307 | + end |
| 308 | + end |
| 309 | +end |
0 commit comments