Merge pull request #1173 from DeterminateSystems/queue-runner-exporter

hydra-queue-runner metrics
This commit is contained in:
Graham Christensen
2022-04-07 12:27:33 -04:00
committed by GitHub
9 changed files with 175 additions and 23 deletions

View File

@ -6,6 +6,8 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <prometheus/exposer.h>
#include "state.hh"
#include "hydra-build-result.hh"
#include "store-api.hh"
@ -36,8 +38,55 @@ std::string getEnvOrDie(const std::string & key)
return *value;
}
State::PromMetrics::PromMetrics()
: registry(std::make_shared<prometheus::Registry>())
, queue_checks_started(
prometheus::BuildCounter()
.Name("hydraqueuerunner_queue_checks_started_total")
.Help("Number of times State::getQueuedBuilds() was started")
.Register(*registry)
.Add({})
)
, queue_build_loads(
prometheus::BuildCounter()
.Name("hydraqueuerunner_queue_build_loads_total")
.Help("Number of builds loaded")
.Register(*registry)
.Add({})
)
, queue_steps_created(
prometheus::BuildCounter()
.Name("hydraqueuerunner_queue_steps_created_total")
.Help("Number of steps created")
.Register(*registry)
.Add({})
)
, queue_checks_early_exits(
prometheus::BuildCounter()
.Name("hydraqueuerunner_queue_checks_early_exits_total")
.Help("Number of times State::getQueuedBuilds() yielded to potential bumps")
.Register(*registry)
.Add({})
)
, queue_checks_finished(
prometheus::BuildCounter()
.Name("hydraqueuerunner_queue_checks_finished_total")
.Help("Number of times State::getQueuedBuilds() was completed")
.Register(*registry)
.Add({})
)
, queue_max_id(
prometheus::BuildGauge()
.Name("hydraqueuerunner_queue_max_build_id_info")
.Help("Maximum build record ID in the queue")
.Register(*registry)
.Add({})
)
{
State::State()
}
State::State(std::optional<std::string> metricsAddrOpt)
: config(std::make_unique<HydraConfig>())
, maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0))
, dbPool(config->getIntOption("max_db_connections", 128))
@ -45,11 +94,16 @@ State::State()
, maxLogSize(config->getIntOption("max_log_size", 64ULL << 20))
, uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false))
, rootsDir(config->getStrOption("gc_roots_dir", fmt("%s/gcroots/per-user/%s/hydra-roots", settings.nixStateDir, getEnvOrDie("LOGNAME"))))
, metricsAddr(config->getStrOption("queue_runner_metrics_address", std::string{"127.0.0.1:9198"}))
{
hydraData = getEnvOrDie("HYDRA_DATA");
logDir = canonPath(hydraData + "/build-logs");
if (metricsAddrOpt.has_value()) {
metricsAddr = metricsAddrOpt.value();
}
/* handle deprecated store specification */
if (config->getStrOption("store_mode") != "")
throw Error("store_mode in hydra.conf is deprecated, please use store_uri");
@ -754,6 +808,18 @@ void State::run(BuildID buildOne)
if (!lock)
throw Error("hydra-queue-runner is already running");
std::cout << "Starting the Prometheus exporter on " << metricsAddr << std::endl;
/* Set up simple exporter, to show that we're still alive. */
prometheus::Exposer promExposer{metricsAddr};
auto exposerPort = promExposer.GetListeningPorts().front();
promExposer.RegisterCollectable(prom.registry);
std::cout << "Started the Prometheus exporter, listening on "
<< metricsAddr << "/metrics (port " << exposerPort << ")"
<< std::endl;
Store::Params localParams;
localParams["max-connections"] = "16";
localParams["max-connection-age"] = "600";
@ -864,6 +930,7 @@ int main(int argc, char * * argv)
bool unlock = false;
bool status = false;
BuildID buildOne = 0;
std::optional<std::string> metricsAddrOpt = std::nullopt;
parseCmdLine(argc, argv, [&](Strings::iterator & arg, const Strings::iterator & end) {
if (*arg == "--unlock")
@ -875,6 +942,8 @@ int main(int argc, char * * argv)
buildOne = *b;
else
throw Error("--build-one requires a build ID");
} else if (*arg == "--prometheus-address") {
metricsAddrOpt = getArg(*arg, arg, end);
} else
return false;
return true;
@ -883,7 +952,7 @@ int main(int argc, char * * argv)
settings.verboseBuild = true;
settings.lockCPU = false;
State state;
State state{metricsAddrOpt};
if (status)
state.showStatus();
else if (unlock)