Skip to content

Commit

Permalink
Metrics + OpenTelemetry Tracing (#131)
Browse files Browse the repository at this point in the history
Co-authored-by: Felix Gündling <felix.guendling@gmail.com>
  • Loading branch information
pablohoch and felixguendling authored Sep 10, 2024
1 parent e3f8fce commit d6e1c8f
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 7 deletions.
4 changes: 4 additions & 0 deletions .pkg
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@
url=git@github.com:felixguendling/oh.git
branch=master
commit=d21c30f40e52a83d6dc09bcffd0067598b5ec069
[opentelemetry-cpp]
url=git@github.com:motis-project/opentelemetry-cpp.git
branch=main
commit=ec4aef6b17b697052edef5417825ad71947b2ed1
9 changes: 7 additions & 2 deletions .pkg.lock
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
14526600618413782245
8342248202595136248
cista f52a62c4d83377acd398227ab4fcd6c946bdbd70
PEGTL 1c1aa6e650e4d26f10fa398f148ec0cdc5f0808d
res 7d97784ba785ce8a2677ea77164040fde484fb04
Expand All @@ -7,12 +7,17 @@ googletest 7b64fca6ea0833628d6f86255a81424365f7cc0c
fmt dc10f83be70ac2873d5f8d1ce317596f1fd318a2
utl 77aac494c45d2b070e65fe712abc34ac74a91d0f
oh d21c30f40e52a83d6dc09bcffd0067598b5ec069
zlib fe8e13ffca867612951bc6baf114e5ac8b00f305
zlib ee0742244d93c4237154ae16c3db42b5f284b442
boost 60cae66449fa3c9599b2b7d3d5d44c65301ed3a3
doctest 70e8f76437b76dd5e9c0a2eb9b907df190ab71a0
geo 0a14addf42e91b267906a156c9c2564935c03eaf
miniz 1edbdece9d71dc65c6ff405572ee37cbdcef7af4
libressl 24acd9e710fbe842e863572da9d738715fbc74b8
curl 3358dac21192864ef2ba47c88704d3b8d8203804
json 410c74782230daaa15054d6ee0975c0607091cb3
opentelemetry-proto 1624689398a3226c45994d70cb544a1e781dc032
abseil-cpp ba5240842d352b4b67a32092453a2fe5fe53a62e
protobuf d8136b9c6a62db6ce09900ecdeb82bb793096cbd
opentelemetry-cpp ec4aef6b17b697052edef5417825ad71947b2ed1
unordered_dense 77e91016354e6d8cba24a86c5abb807de2534c02
wyhash 1e012b57fc2227a9e583a57e2eacb3da99816d99
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ endif ()
file(GLOB_RECURSE nigiri-files src/*.cc)
add_library(nigiri ${nigiri-files})
target_include_directories(nigiri PUBLIC include)
target_link_libraries(nigiri PUBLIC cista geo utl fmt date miniz date-tz wyhash unordered_dense gtfsrt oh)
target_link_libraries(nigiri PUBLIC cista geo utl fmt date miniz date-tz wyhash unordered_dense gtfsrt oh opentelemetry_api)
target_compile_features(nigiri PUBLIC cxx_std_23)
target_compile_options(nigiri PRIVATE ${nigiri-compile-options})

Expand Down
17 changes: 17 additions & 0 deletions include/nigiri/get_otel_tracer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "opentelemetry/trace/provider.h"
#include "opentelemetry/trace/tracer.h"

#include "opentelemetry/trace/scope.h"
#include "opentelemetry/trace/span.h"

namespace nigiri {

inline opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
get_otel_tracer() {
return opentelemetry::trace::Provider::GetTracerProvider()->GetTracer(
"nigiri");
}

} // namespace nigiri
22 changes: 22 additions & 0 deletions include/nigiri/routing/search.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#pragma once

#include "fmt/format.h"

#include "utl/enumerate.h"
#include "utl/equal_ranges_linear.h"
#include "utl/erase_if.h"
#include "utl/timing.h"
#include "utl/to_vec.h"

#include "nigiri/for_each_meta.h"
#include "nigiri/get_otel_tracer.h"
#include "nigiri/logging.h"
#include "nigiri/routing/dijkstra.h"
#include "nigiri/routing/get_fastest_direct.h"
Expand Down Expand Up @@ -63,6 +66,9 @@ struct search {
bool const require_bikes_allowed,
transfer_time_settings& tts,
algo_state_t& algo_state) {
auto span = get_otel_tracer()->StartSpan("search::init");
auto scope = opentelemetry::trace::Scope{span};

stats_.fastest_direct_ =
static_cast<std::uint64_t>(fastest_direct_.count());

Expand All @@ -83,6 +89,8 @@ struct search {
}

if constexpr (Algo::kUseLowerBounds) {
auto lb_span = get_otel_tracer()->StartSpan("lower bounds");
auto lb_scope = opentelemetry::trace::Scope{lb_span};
UTL_START_TIMING(lb);
dijkstra(tt_, q_,
kFwd ? tt_.fwd_search_lb_graph_ : tt_.bwd_search_lb_graph_,
Expand Down Expand Up @@ -167,6 +175,9 @@ struct search {
}

routing_result<algo_stats_t> execute() {
auto span = get_otel_tracer()->StartSpan("search::execute");
auto scope = opentelemetry::trace::Scope{span};

state_.results_.clear();

if (start_dest_overlap()) {
Expand Down Expand Up @@ -219,6 +230,8 @@ struct search {
q_.start_time_),
search_interval_, tt_.external_interval(), n_results_in_interval(),
is_timeout_reached());
span->SetAttribute("nigiri.search.timeout_reached",
is_timeout_reached());
break;
} else {
trace(
Expand Down Expand Up @@ -367,6 +380,9 @@ struct search {
}

void search_interval() {
auto span = get_otel_tracer()->StartSpan("search::search_interval");
auto scope = opentelemetry::trace::Scope{span};

utl::equal_ranges_linear(
state_.starts_,
[](start const& a, start const& b) {
Expand Down Expand Up @@ -397,6 +413,12 @@ struct search {
j.error_ = true;
log(log_lvl::error, "search", "reconstruct failed: {}",
e.what());
span->SetStatus(opentelemetry::trace::StatusCode::kError,
"exception");
span->AddEvent(
"exception",
{{"exception.message",
fmt::format("reconstruct failed: {}", e.what())}});
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions include/nigiri/rt/gtfsrt_update.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#endif
#include "gtfsrt/gtfs-realtime.pb.h"

#include "date/date.h"

#include "nigiri/types.h"

namespace nigiri {
Expand All @@ -30,6 +32,7 @@ struct statistics {
int trip_update_without_trip_{0};
int trip_resolve_error_{0};
int unsupported_schedule_relationship_{0};
date::sys_seconds feed_timestamp_{};
};

statistics gtfsrt_update_msg(timetable const&,
Expand Down
70 changes: 70 additions & 0 deletions src/routing/raptor_search.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
#include "nigiri/routing/raptor_search.h"

#include <string>
#include <string_view>
#include <utility>

#include "date/date.h"

#include "fmt/format.h"
#include "fmt/ranges.h"

#include "utl/overloaded.h"
#include "utl/to_vec.h"
#include "utl/verify.h"

#include "nigiri/get_otel_tracer.h"
#include "nigiri/routing/sanitize_via_stops.h"

namespace nigiri::routing {
Expand Down Expand Up @@ -62,6 +72,17 @@ routing_result<raptor_stats> raptor_search_with_dir(
std::unreachable();
}

std::string_view location_match_mode_str(location_match_mode const mode) {
using namespace std::literals;
switch (mode) {
case location_match_mode::kExact: return "exact"sv;
case location_match_mode::kOnlyChildren: return "only_children"sv;
case location_match_mode::kEquivalent: return "equivalent"sv;
case location_match_mode::kIntermodal: return "intermodal"sv;
}
std::unreachable();
}

} // namespace

routing_result<raptor_stats> raptor_search(
Expand All @@ -72,6 +93,55 @@ routing_result<raptor_stats> raptor_search(
query q,
direction const search_dir,
std::optional<std::chrono::seconds> const timeout) {
auto span = get_otel_tracer()->StartSpan("raptor_search");
auto scope = opentelemetry::trace::Scope{span};
if (span->IsRecording()) {
std::visit(utl::overloaded{
[&](interval<unixtime_t> const& interval) {
span->SetAttribute("nigiri.query.start_time_interval.from",
date::format("%FT%RZ", interval.from_));
span->SetAttribute("nigiri.query.start_time_interval.to",
date::format("%FT%RZ", interval.to_));
},
[&](unixtime_t const& t) {
span->SetAttribute("nigiri.query.start_time",
date::format("%FT%RZ", t));
}},
q.start_time_);
span->SetAttribute("nigiri.query.start_match_mode",
location_match_mode_str(q.start_match_mode_));
span->SetAttribute("nigiri.query.destination_match_mode",
location_match_mode_str(q.dest_match_mode_));
span->SetAttribute("nigiri.query.use_start_footpaths",
q.use_start_footpaths_);
span->SetAttribute("nigiri.query.start_count", q.start_.size());
span->SetAttribute("nigiri.query.destination_count", q.destination_.size());
span->SetAttribute("nigiri.query.td_start_count", q.td_start_.size());
span->SetAttribute("nigiri.query.td_destination_count", q.td_dest_.size());
span->SetAttribute("nigiri.query.max_start_offset",
q.max_start_offset_.count());
span->SetAttribute("nigiri.query.max_transfers", q.max_transfers_);
span->SetAttribute("nigiri.query.min_connection_count",
q.min_connection_count_);
span->SetAttribute("nigiri.query.extend_interval_earlier",
q.extend_interval_earlier_);
span->SetAttribute("nigiri.query.extend_interval_later",
q.extend_interval_later_);
span->SetAttribute("nigiri.query.prf_idx", q.prf_idx_);
span->SetAttribute("nigiri.query.allowed_classes", q.allowed_claszes_);
span->SetAttribute("nigiri.query.require_bike_transport",
q.require_bike_transport_);
span->SetAttribute("nigiri.query.transfer_time_settings.default",
q.transfer_time_settings_.default_);
span->SetAttribute("nigiri.query.via_stops_count", q.via_stops_.size());
span->SetAttribute(
"nigiri.query.search_direction",
search_dir == direction::kForward ? "forward" : "backward");
if (timeout) {
span->SetAttribute("nigiri.query.timeout", timeout.value().count());
}
}

if (search_dir == direction::kForward) {
return raptor_search_with_dir<direction::kForward>(
tt, rtt, s_state, r_state, std::move(q), timeout);
Expand Down
39 changes: 35 additions & 4 deletions src/rt/gtfsrt_update.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "utl/pairwise.h"

#include "nigiri/loader/gtfs/stop_seq_number_encoding.h"
#include "nigiri/get_otel_tracer.h"
#include "nigiri/logging.h"
#include "nigiri/rt/frun.h"
#include "nigiri/rt/gtfsrt_resolve_run.h"
Expand Down Expand Up @@ -247,15 +248,25 @@ statistics gtfsrt_update_msg(timetable const& tt,
source_idx_t const src,
std::string_view tag,
gtfsrt::FeedMessage const& msg) {
auto span = get_otel_tracer()->StartSpan("gtfsrt_update_msg", {{"tag", tag}});
auto scope = opentelemetry::trace::Scope{span};

if (!msg.has_header()) {
span->SetStatus(opentelemetry::trace::StatusCode::kError, "missing header");
return {.no_header_ = true};
}

auto stats = statistics{.total_entities_ = msg.entity_size()};
auto const message_time =
date::sys_seconds{std::chrono::seconds{msg.header().timestamp()}};
auto const today =
std::chrono::time_point_cast<date::sys_days::duration>(message_time);
auto stats = statistics{.total_entities_ = msg.entity_size(),
.feed_timestamp_ = message_time};

span->SetAttribute("nigiri.gtfsrt.header.timestamp",
msg.header().timestamp());
span->SetAttribute("nigiri.gtfsrt.total_entities", msg.entity_size());

for (auto const& entity : msg.entity()) {
if (entity.has_is_deleted() && entity.is_deleted()) {
log(log_lvl::error, "rt.gtfs.unsupported",
Expand Down Expand Up @@ -307,7 +318,23 @@ statistics gtfsrt_update_msg(timetable const& tt,

if (!r.valid()) {
log(log_lvl::error, "rt.gtfs.resolve", "could not resolve (tag={}) {}",
tag, remove_nl(entity.trip_update().trip().DebugString()));
tag, remove_nl(td.DebugString()));
span->AddEvent(
"unresolved trip",
{
{"entity.id", entity.id()},
{"trip.trip_id", td.has_trip_id() ? td.trip_id() : ""},
{"trip.route_id", td.has_route_id() ? td.route_id() : ""},
{"trip.direction_id", td.direction_id()},
{"trip.start_time", td.has_start_time() ? td.start_time() : ""},
{"trip.start_date", td.has_start_date() ? td.start_date() : ""},
{"trip.schedule_relationship",
td.has_schedule_relationship()
? TripDescriptor_ScheduleRelationship_Name(
td.schedule_relationship())
: ""},
{"trip.str", remove_nl(td.DebugString())},
});
++stats.trip_resolve_error_;
continue;
}
Expand All @@ -323,9 +350,13 @@ statistics gtfsrt_update_msg(timetable const& tt,
} catch (const std::exception& e) {
++stats.total_entities_fail_;
log(log_lvl::error, "rt.gtfs",
"GTFS-RT error (tag={}): time={}, entitiy={}, message={}, error={}",
"GTFS-RT error (tag={}): time={}, entity={}, message={}, error={}",
tag, date::format("%T", message_time), entity.id(),
remove_nl(entity.DebugString()), e.what());
span->AddEvent("exception",
{{"exception.message", e.what()},
{"entity.id", entity.id()},
{"message", remove_nl(entity.DebugString())}});
}
}

Expand All @@ -352,4 +383,4 @@ statistics gtfsrt_update_buf(timetable const& tt,
return gtfsrt_update_msg(tt, rtt, src, tag, msg);
}

} // namespace nigiri::rt
} // namespace nigiri::rt

0 comments on commit d6e1c8f

Please sign in to comment.