-
Notifications
You must be signed in to change notification settings - Fork 452
feat(bigtable): add AttemptLatency2 metric and populate peer info labels #16095
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| #include "google/cloud/bigtable/internal/metrics.h" | ||
| #include "google/cloud/bigtable/version.h" | ||
| #include "absl/strings/charconv.h" | ||
| #include "absl/strings/escaping.h" | ||
| #include "absl/strings/match.h" | ||
| #include "absl/strings/numbers.h" | ||
| #include "absl/strings/str_split.h" | ||
|
|
@@ -38,13 +39,23 @@ auto constexpr kMeterInstrumentationScopeVersion = "v1"; | |
| // to the map should be more performant than performing a set_difference every | ||
| // time. | ||
| LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d, | ||
| std::set<std::string> const& filtered_data_labels) { | ||
| std::set<std::string> const& filtered_data_labels, | ||
| std::optional<PeerInfoLabels> const& peer_info_labels) { | ||
| LabelMap labels = { | ||
| {"project_id", r.project_id}, | ||
| {"instance", r.instance}, | ||
| {"table", r.table}, | ||
| {"cluster", r.cluster.empty() ? "<unspecified>" : r.cluster}, | ||
| {"zone", r.zone.empty() ? "global" : r.zone}}; | ||
|
|
||
| if (peer_info_labels) { | ||
| labels.insert({ | ||
| {"transport_type", peer_info_labels->transport_type}, | ||
| {"transport_region", peer_info_labels->transport_region}, | ||
| {"transport_subzone", peer_info_labels->transport_subzone}, | ||
| }); | ||
| } | ||
|
|
||
| std::map<std::string, std::string> data = {{ | ||
| {"method", d.method}, | ||
| {"streaming", d.streaming}, | ||
|
|
@@ -74,6 +85,7 @@ LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d, | |
| std::set_difference(data.begin(), data.end(), filtered_data_labels.begin(), | ||
| filtered_data_labels.end(), | ||
| std::inserter(labels, labels.begin()), Compare()); | ||
|
|
||
| return labels; | ||
| } | ||
|
|
||
|
|
@@ -103,6 +115,47 @@ GetResponseParamsFromTrailingMetadata( | |
| return absl::nullopt; | ||
| } | ||
|
|
||
| absl::optional<google::bigtable::v2::PeerInfo> GetPeerInfoFromTrailingMetadata( | ||
| grpc::ClientContext const& client_context) { | ||
| auto metadata = client_context.GetServerTrailingMetadata(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. Let's avoid making a copy if we don't have to. |
||
| // Base64 encoded peer info header key defined by the server. | ||
| auto iter = metadata.find("bigtable-peer-info"); | ||
| if (iter == metadata.end()) return absl::nullopt; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| std::string decoded; | ||
| if (!absl::Base64Unescape( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check if we need [WebSafeBase64Unescape] |
||
| absl::string_view{iter->second.data(), iter->second.size()}, | ||
| &decoded)) { | ||
| return absl::nullopt; | ||
| } | ||
| google::bigtable::v2::PeerInfo p; | ||
| if (p.ParseFromString(decoded)) return p; | ||
| return absl::nullopt; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
|
|
||
| std::string TransportTypeToString( | ||
| google::bigtable::v2::PeerInfo::TransportType type) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can use implict proto conversion here. absl::AsciiStrToLower(google::bigtable::v2::PeerInfo::TransportType_Name(type)); |
||
| switch (type) { | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_UNKNOWN: | ||
| return "transport_type_unknown"; | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_EXTERNAL: | ||
| return "transport_type_external"; | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_CLOUD_PATH: | ||
| return "transport_type_cloud_path"; | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_DIRECT_ACCESS: | ||
| return "transport_type_direct_access"; | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_SESSION_UNKNOWN: | ||
| return "transport_type_session_unknown"; | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_SESSION_EXTERNAL: | ||
| return "transport_type_session_external"; | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_SESSION_CLOUD_PATH: | ||
| return "transport_type_session_cloud_path"; | ||
| case google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_SESSION_DIRECT_ACCESS: | ||
| return "transport_type_session_direct_access"; | ||
| default: | ||
| return "transport_type_unknown"; | ||
| } | ||
| } | ||
|
|
||
| absl::optional<double> GetServerLatencyFromInitialMetadata( | ||
| grpc::ClientContext const& client_context) { | ||
| auto const& initial_metadata = client_context.GetServerInitialMetadata(); | ||
|
|
@@ -225,6 +278,54 @@ std::unique_ptr<Metric> AttemptLatency::clone(ResourceLabels resource_labels, | |
| return m; | ||
| } | ||
|
|
||
| AttemptLatency2::AttemptLatency2( | ||
| std::string const& instrumentation_scope, | ||
| opentelemetry::nostd::shared_ptr< | ||
| opentelemetry::metrics::MeterProvider> const& provider) | ||
| : attempt_latencies2_(provider | ||
| ->GetMeter(instrumentation_scope, | ||
| kMeterInstrumentationScopeVersion) | ||
| ->CreateDoubleHistogram("attempt_latencies2")) {} | ||
|
|
||
| void AttemptLatency2::PreCall(opentelemetry::context::Context const&, | ||
| PreCallParams const& p) { | ||
| attempt_start_ = std::move(p.attempt_start); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
|
|
||
| void AttemptLatency2::PostCall(opentelemetry::context::Context const& context, | ||
| grpc::ClientContext const& client_context, | ||
| PostCallParams const& p) { | ||
| auto response_params = GetResponseParamsFromTrailingMetadata(client_context); | ||
| if (response_params) { | ||
| resource_labels_.cluster = response_params->cluster_id(); | ||
| resource_labels_.zone = response_params->zone_id(); | ||
| } | ||
| auto peer_info = GetPeerInfoFromTrailingMetadata(client_context); | ||
| peer_info_labels_.transport_type = TransportTypeToString( | ||
| peer_info ? peer_info->transport_type() | ||
| : google::bigtable::v2::PeerInfo::TRANSPORT_TYPE_UNKNOWN); | ||
| if (peer_info) { | ||
| peer_info_labels_.transport_region = | ||
| peer_info->application_frontend_region(); | ||
| peer_info_labels_.transport_subzone = | ||
| peer_info->application_frontend_subzone(); | ||
| } | ||
|
|
||
| data_labels_.status = StatusCodeToString(p.attempt_status.code()); | ||
| auto attempt_elapsed = std::chrono::duration_cast<LatencyDuration>( | ||
| p.attempt_end - attempt_start_); | ||
| auto m = IntoLabelMap(resource_labels_, data_labels_, {}, peer_info_labels_); | ||
| attempt_latencies2_->Record(attempt_elapsed.count(), std::move(m), context); | ||
| } | ||
|
|
||
| std::unique_ptr<Metric> AttemptLatency2::clone(ResourceLabels resource_labels, | ||
| DataLabels data_labels) const { | ||
| auto m = std::make_unique<AttemptLatency2>(*this); | ||
| m->resource_labels_ = std::move(resource_labels); | ||
| m->data_labels_ = std::move(data_labels); | ||
| return m; | ||
| } | ||
|
|
||
| RetryCount::RetryCount( | ||
| std::string const& instrumentation_scope, | ||
| opentelemetry::nostd::shared_ptr< | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| #include "google/cloud/bigtable/internal/operation_context.h" | ||
| #include "google/cloud/bigtable/version.h" | ||
| #include "google/cloud/status.h" | ||
| #include "google/bigtable/v2/peer_info.pb.h" | ||
| #include "google/bigtable/v2/response_params.pb.h" | ||
| #include <grpcpp/grpcpp.h> | ||
| #include <opentelemetry/context/context.h> | ||
|
|
@@ -52,16 +53,30 @@ struct DataLabels { | |
| std::string status; | ||
| }; | ||
|
|
||
| // Labels populated from the peer info metadata. | ||
| struct PeerInfoLabels { | ||
| std::string transport_type; | ||
| std::string transport_region; | ||
| std::string transport_subzone; | ||
| }; | ||
|
|
||
| using LabelMap = std::unordered_map<std::string, std::string>; | ||
| LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d, | ||
| std::set<std::string> const& filtered_data_labels = {}); | ||
| // `peer_info_labels` is optional because only AttemptLatency2 populates it. | ||
| LabelMap IntoLabelMap( | ||
| ResourceLabels const& r, DataLabels const& d, | ||
| std::set<std::string> const& filtered_data_labels = {}, | ||
| std::optional<PeerInfoLabels> const& peer_info_labels = std::nullopt); | ||
|
|
||
| bool HasServerTiming(grpc::ClientContext const& client_context); | ||
| bool IsConnectivityError(google::cloud::Status const& status, | ||
| grpc::ClientContext const& client_context); | ||
| absl::optional<google::bigtable::v2::ResponseParams> | ||
| GetResponseParamsFromTrailingMetadata( | ||
| grpc::ClientContext const& client_context); | ||
| absl::optional<google::bigtable::v2::PeerInfo> GetPeerInfoFromTrailingMetadata( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use |
||
| grpc::ClientContext const& client_context); | ||
| std::string TransportTypeToString( | ||
| google::bigtable::v2::PeerInfo::TransportType type); | ||
|
|
||
| absl::optional<double> GetServerLatencyFromInitialMetadata( | ||
| grpc::ClientContext const& client_context); | ||
|
|
@@ -154,6 +169,29 @@ class AttemptLatency : public Metric { | |
| OperationContext::Clock::time_point attempt_start_; | ||
| }; | ||
|
|
||
| // Similar to AttemptLatency and also populates the peer info. | ||
| class AttemptLatency2 : public Metric { | ||
| public: | ||
| AttemptLatency2(std::string const& instrumentation_scope, | ||
| opentelemetry::nostd::shared_ptr< | ||
| opentelemetry::metrics::MeterProvider> const& provider); | ||
| void PreCall(opentelemetry::context::Context const&, | ||
| PreCallParams const& p) override; | ||
| void PostCall(opentelemetry::context::Context const& context, | ||
| grpc::ClientContext const& client_context, | ||
| PostCallParams const& p) override; | ||
| std::unique_ptr<Metric> clone(ResourceLabels resource_labels, | ||
| DataLabels data_labels) const override; | ||
|
|
||
| private: | ||
| ResourceLabels resource_labels_; | ||
| DataLabels data_labels_; | ||
| PeerInfoLabels peer_info_labels_; | ||
| opentelemetry::nostd::shared_ptr<opentelemetry::metrics::Histogram<double>> | ||
| attempt_latencies2_; | ||
| OperationContext::Clock::time_point attempt_start_; | ||
| }; | ||
|
|
||
| class RetryCount : public Metric { | ||
| public: | ||
| RetryCount(std::string const& instrumentation_scope, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bigtable-peer-info is sent as initialmetadata. so i would check it it initial metadata and if it is not present, check it in GetServerTrailingMetadata