From 847a8ae6cdf3e8ab63679ce74479549db2024303 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 3 Mar 2025 10:10:04 -0500 Subject: [PATCH 01/36] Revert "Use `LegacySSHStore`" There were some hangs caused by this. Need to fix them, ideally reproducing the issue in a test, before trying this again. This reverts commit 4a4a0f901c70676ee47f830d2ff6a72789ba1baf. --- src/hydra-queue-runner/build-remote.cc | 174 ++++++++++++++++--------- src/hydra-queue-runner/state.hh | 8 +- 2 files changed, 116 insertions(+), 66 deletions(-) diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index 39970bd3..77bde2c4 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -9,10 +9,13 @@ #include "path.hh" #include "legacy-ssh-store.hh" #include "serve-protocol.hh" +#include "serve-protocol-impl.hh" #include "state.hh" #include "current-process.hh" #include "processes.hh" #include "util.hh" +#include "serve-protocol.hh" +#include "serve-protocol-impl.hh" #include "ssh.hh" #include "finally.hh" #include "url.hh" @@ -36,6 +39,38 @@ bool ::Machine::isLocalhost() const namespace nix::build_remote { +static std::unique_ptr openConnection( + ::Machine::ptr machine, SSHMaster & master) +{ + Strings command = {"nix-store", "--serve", "--write"}; + if (machine->isLocalhost()) { + command.push_back("--builders"); + command.push_back(""); + } else { + auto remoteStore = machine->storeUri.params.find("remote-store"); + if (remoteStore != machine->storeUri.params.end()) { + command.push_back("--store"); + command.push_back(shellEscape(remoteStore->second)); + } + } + + auto ret = master.startCommand(std::move(command), { + "-a", "-oBatchMode=yes", "-oConnectTimeout=60", "-oTCPKeepAlive=yes" + }); + + // XXX: determine the actual max value we can use from /proc. + + // FIXME: Should this be upstreamed into `startCommand` in Nix? + + int pipesize = 1024 * 1024; + + fcntl(ret->in.get(), F_SETPIPE_SZ, &pipesize); + fcntl(ret->out.get(), F_SETPIPE_SZ, &pipesize); + + return ret; +} + + static void copyClosureTo( ::Machine::Connection & conn, Store & destStore, @@ -52,8 +87,8 @@ static void copyClosureTo( // FIXME: substitute output pollutes our build log /* Get back the set of paths that are already valid on the remote host. */ - auto present = conn.store->queryValidPaths( - closure, true, useSubstitutes); + auto present = conn.queryValidPaths( + destStore, true, closure, useSubstitutes); if (present.size() == closure.size()) return; @@ -68,7 +103,12 @@ static void copyClosureTo( std::unique_lock sendLock(conn.machine->state->sendLock, std::chrono::seconds(600)); - conn.store->addMultipleToStoreLegacy(destStore, missing); + conn.to << ServeProto::Command::ImportPaths; + destStore.exportPaths(missing, conn.to); + conn.to.flush(); + + if (readInt(conn.from) != 1) + throw Error("remote machine failed to import closure"); } @@ -188,7 +228,7 @@ static BuildResult performBuild( counter & nrStepsBuilding ) { - auto kont = conn.store->buildDerivationAsync(drvPath, drv, options); + conn.putBuildDerivationRequest(localStore, drvPath, drv, options); BuildResult result; @@ -197,10 +237,7 @@ static BuildResult performBuild( startTime = time(0); { MaintainCount mc(nrStepsBuilding); - result = kont(); - // Without proper call-once functions, we need to manually - // delete after calling. - kont = {}; + result = ServeProto::Serialise::read(localStore, conn); } stopTime = time(0); @@ -216,7 +253,7 @@ static BuildResult performBuild( // If the protocol was too old to give us `builtOutputs`, initialize // it manually by introspecting the derivation. - if (GET_PROTOCOL_MINOR(conn.store->getProtocol()) < 6) + if (GET_PROTOCOL_MINOR(conn.remoteVersion) < 6) { // If the remote is too old to handle CA derivations, we can’t get this // far anyways @@ -249,25 +286,26 @@ static void copyPathFromRemote( const ValidPathInfo & info ) { - /* Receive the NAR from the remote and add it to the - destination store. Meanwhile, extract all the info from the - NAR that getBuildOutput() needs. */ - auto source2 = sinkToSource([&](Sink & sink) - { - /* Note: we should only send the command to dump the store - path to the remote if the NAR is actually going to get read - by the destination store, which won't happen if this path - is already valid on the destination store. Since this - lambda function only gets executed if someone tries to read - from source2, we will send the command from here rather - than outside the lambda. */ - conn.store->narFromPath(info.path, [&](Source & source) { - TeeSource tee{source, sink}; - extractNarData(tee, conn.store->printStorePath(info.path), narMembers); - }); - }); + /* Receive the NAR from the remote and add it to the + destination store. Meanwhile, extract all the info from the + NAR that getBuildOutput() needs. */ + auto source2 = sinkToSource([&](Sink & sink) + { + /* Note: we should only send the command to dump the store + path to the remote if the NAR is actually going to get read + by the destination store, which won't happen if this path + is already valid on the destination store. Since this + lambda function only gets executed if someone tries to read + from source2, we will send the command from here rather + than outside the lambda. */ + conn.to << ServeProto::Command::DumpStorePath << localStore.printStorePath(info.path); + conn.to.flush(); - destStore.addToStore(info, *source2, NoRepair, NoCheckSigs); + TeeSource tee(conn.from, sink); + extractNarData(tee, localStore.printStorePath(info.path), narMembers); + }); + + destStore.addToStore(info, *source2, NoRepair, NoCheckSigs); } static void copyPathsFromRemote( @@ -366,39 +404,30 @@ void State::buildRemote(ref destStore, updateStep(ssConnecting); - // FIXME: rewrite to use Store. - ::Machine::Connection conn { - .machine = machine, - .store = [&]{ - auto * pSpecified = std::get_if(&machine->storeUri.variant); - if (!pSpecified || pSpecified->scheme != "ssh") { - throw Error("Currently, only (legacy-)ssh stores are supported!"); - } + auto storeRef = machine->completeStoreReference(); - auto remoteStore = machine->openStore().dynamic_pointer_cast(); - assert(remoteStore); + auto * pSpecified = std::get_if(&storeRef.variant); + if (!pSpecified || pSpecified->scheme != "ssh") { + throw Error("Currently, only (legacy-)ssh stores are supported!"); + } - remoteStore->connPipeSize = 1024 * 1024; - - if (machine->isLocalhost()) { - auto rp_new = remoteStore->remoteProgram.get(); - rp_new.push_back("--builders"); - rp_new.push_back(""); - const_cast &>(remoteStore->remoteProgram).assign(rp_new); - } - remoteStore->extraSshArgs = { - "-a", "-oBatchMode=yes", "-oConnectTimeout=60", "-oTCPKeepAlive=yes" - }; - const_cast &>(remoteStore->logFD).assign(logFD.get()); - - return nix::ref{remoteStore}; - }(), + LegacySSHStoreConfig storeConfig { + pSpecified->scheme, + pSpecified->authority, + storeRef.params }; + auto master = storeConfig.createSSHMaster( + false, // no SSH master yet + logFD.get()); + + // FIXME: rewrite to use Store. + auto child = build_remote::openConnection(machine, master); + { auto activeStepState(activeStep->state_.lock()); if (activeStepState->cancelled) throw Error("step cancelled"); - activeStepState->pid = conn.store->getConnectionPid(); + activeStepState->pid = child->sshPid; } Finally clearPid([&]() { @@ -413,12 +442,35 @@ void State::buildRemote(ref destStore, process. Meh. */ }); + ::Machine::Connection conn { + { + .to = child->in.get(), + .from = child->out.get(), + /* Handshake. */ + .remoteVersion = 0xdadbeef, // FIXME avoid dummy initialize + }, + /*.machine =*/ machine, + }; + Finally updateStats([&]() { - auto stats = conn.store->getConnectionStats(); - bytesReceived += stats.bytesReceived; - bytesSent += stats.bytesSent; + bytesReceived += conn.from.read; + bytesSent += conn.to.written; }); + constexpr ServeProto::Version our_version = 0x206; + + try { + conn.remoteVersion = decltype(conn)::handshake( + conn.to, + conn.from, + our_version, + machine->storeUri.render()); + } catch (EndOfFile & e) { + child->sshPid.wait(); + std::string s = chomp(readFile(result.logFile)); + throw Error("cannot connect to ‘%1%’: %2%", machine->storeUri.render(), s); + } + { auto info(machine->state->connectInfo.lock()); info->consecutiveFailures = 0; @@ -487,7 +539,7 @@ void State::buildRemote(ref destStore, auto now1 = std::chrono::steady_clock::now(); - auto infos = conn.store->queryPathInfosUncached(outputs); + auto infos = conn.queryPathInfos(*localStore, outputs); size_t totalNarSize = 0; for (auto & [_, info] : infos) totalNarSize += info.narSize; @@ -522,11 +574,9 @@ void State::buildRemote(ref destStore, } } - /* Shut down the connection done by RAII. - - Only difference is kill() instead of wait() (i.e. send signal - then wait()) - */ + /* Shut down the connection. */ + child->in = -1; + child->sshPid.wait(); } catch (Error & e) { /* Disable this machine until a certain period of time has diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index e2d31434..30e01c74 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -20,7 +20,9 @@ #include "store-api.hh" #include "sync.hh" #include "nar-extractor.hh" -#include "legacy-ssh-store.hh" +#include "serve-protocol.hh" +#include "serve-protocol-impl.hh" +#include "serve-protocol-connection.hh" #include "machines.hh" @@ -290,11 +292,9 @@ struct Machine : nix::Machine bool isLocalhost() const; // A connection to a machine - struct Connection { + struct Connection : nix::ServeProto::BasicClientConnection { // Backpointer to the machine ptr machine; - // Opened store - nix::ref store; }; }; -- 2.48.1 From 55349930f1b5bbb38cf1809aefa207bc26032ca2 Mon Sep 17 00:00:00 2001 From: Martin Weinelt Date: Sun, 2 Mar 2025 03:08:26 +0100 Subject: [PATCH 02/36] Fix race condition in hydra-compress-logs --- nixos-modules/hydra.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nixos-modules/hydra.nix b/nixos-modules/hydra.nix index 4fc2d311..79d639e6 100644 --- a/nixos-modules/hydra.nix +++ b/nixos-modules/hydra.nix @@ -468,7 +468,7 @@ in elif [[ $compression == zstd ]]; then compression="zstd --rm" fi - find ${baseDir}/build-logs -type f -name "*.drv" -mtime +3 -size +0c | xargs -r "$compression" --force --quiet + find ${baseDir}/build-logs -ignore_readdir_race -type f -name "*.drv" -mtime +3 -size +0c | xargs -r "$compression" --force --quiet ''; startAt = "Sun 01:45"; }; -- 2.48.1 From ebefdb0a3d8b603b8a11ee1b0ab21c3012e1040e Mon Sep 17 00:00:00 2001 From: zowoq <59103226+zowoq@users.noreply.github.com> Date: Fri, 28 Mar 2025 11:11:26 +1000 Subject: [PATCH 03/36] hydraTest: remove outdated postgresql version error: postgresql_12 has been removed since it reached its EOL upstream --- nixos-modules/default.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/nixos-modules/default.nix b/nixos-modules/default.nix index 62b18406..d12d8338 100644 --- a/nixos-modules/default.nix +++ b/nixos-modules/default.nix @@ -15,7 +15,6 @@ systemd.services.hydra-send-stats.enable = false; services.postgresql.enable = true; - services.postgresql.package = pkgs.postgresql_12; # The following is to work around the following error from hydra-server: # [error] Caught exception in engine "Cannot determine local time zone" -- 2.48.1 From eb17619ee549614cd064db091029d217e515af8a Mon Sep 17 00:00:00 2001 From: zowoq <59103226+zowoq@users.noreply.github.com> Date: Fri, 28 Mar 2025 11:07:25 +1000 Subject: [PATCH 04/36] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'nix-eval-jobs': 'github:nix-community/nix-eval-jobs/4b392b284877d203ae262e16af269f702df036bc?narHash=sha256-3wIReAqdTALv39gkWXLMZQvHyBOc3yPkWT2ZsItxedY%3D' (2025-02-14) → 'github:nix-community/nix-eval-jobs/f7418fc1fa45b96d37baa95ff3c016dd5be3876b?narHash=sha256-Lo4KFBNcY8tmBuCmEr2XV0IUZtxXHmbXPNLkov/QSU0%3D' (2025-03-26) --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index c47a3b88..6f1b1347 100644 --- a/flake.lock +++ b/flake.lock @@ -29,11 +29,11 @@ "nix-eval-jobs": { "flake": false, "locked": { - "lastModified": 1739500569, - "narHash": "sha256-3wIReAqdTALv39gkWXLMZQvHyBOc3yPkWT2ZsItxedY=", + "lastModified": 1743008255, + "narHash": "sha256-Lo4KFBNcY8tmBuCmEr2XV0IUZtxXHmbXPNLkov/QSU0=", "owner": "nix-community", "repo": "nix-eval-jobs", - "rev": "4b392b284877d203ae262e16af269f702df036bc", + "rev": "f7418fc1fa45b96d37baa95ff3c016dd5be3876b", "type": "github" }, "original": { -- 2.48.1 From 90a8a0d94a1fe97211147c6b0ea3315cdf305712 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Thu, 16 Jan 2025 15:50:00 +0100 Subject: [PATCH 05/36] Reimplement (named) constituent jobs (+globbing) based on nix-eval-jobs Depends on https://github.com/nix-community/nix-eval-jobs/pull/349 & #1421. Almost equivalent to #1425, but with a small change: when having e.g. an aggregate job with a glob that matches nothing, the jobset evaluation is failed now. This was the intended behavior before (hydra-eval-jobset fails hard if an aggregate is broken), the code-path was never reached however since the aggregate was never marked as broken in this case before. --- t/evaluator/evaluate-constituents-broken.t | 68 ++++++--- t/evaluator/evaluate-constituents-globbing.t | 138 +++++++++++++++++++ t/jobs/config.nix | 14 ++ t/jobs/constituents-cycle-glob.nix | 34 +++++ t/jobs/constituents-cycle.nix | 21 +++ t/jobs/constituents-glob-all.nix | 22 +++ t/jobs/constituents-glob.nix | 31 +++++ t/jobs/constituents-no-matches.nix | 20 +++ t/jobs/declarative/project.json | 24 ++++ t/queue-runner/constituents.t | 4 +- 10 files changed, 354 insertions(+), 22 deletions(-) create mode 100644 t/evaluator/evaluate-constituents-globbing.t create mode 100644 t/jobs/config.nix create mode 100644 t/jobs/constituents-cycle-glob.nix create mode 100644 t/jobs/constituents-cycle.nix create mode 100644 t/jobs/constituents-glob-all.nix create mode 100644 t/jobs/constituents-glob.nix create mode 100644 t/jobs/constituents-no-matches.nix create mode 100644 t/jobs/declarative/project.json diff --git a/t/evaluator/evaluate-constituents-broken.t b/t/evaluator/evaluate-constituents-broken.t index 0e5960bf..1391b618 100644 --- a/t/evaluator/evaluate-constituents-broken.t +++ b/t/evaluator/evaluate-constituents-broken.t @@ -6,27 +6,55 @@ use Hydra::Helper::Exec; my $ctx = test_context(); -my $jobsetCtx = $ctx->makeJobset( - expression => 'constituents-broken.nix', -); -my $jobset = $jobsetCtx->{"jobset"}; +subtest "broken constituents expression" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-broken.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; -my ($res, $stdout, $stderr) = captureStdoutStderr(60, - ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) -); -isnt($res, 0, "hydra-eval-jobset exits non-zero"); -ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); -like( - $stderr, - qr/aggregate job ‘mixed_aggregate’ failed with the error: "constituentA": does not exist/, - "The stderr record includes a relevant error message" -); + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + isnt($res, 0, "hydra-eval-jobset exits non-zero"); + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + like( + $stderr, + qr/aggregate job 'mixed_aggregate' references non-existent job 'constituentA'/, + "The stderr record includes a relevant error message" + ); -$jobset->discard_changes({ '+columns' => {'errormsg' => 'errormsg'} }); # refresh from DB -like( - $jobset->errormsg, - qr/aggregate job ‘mixed_aggregate’ failed with the error: "constituentA": does not exist/, - "The jobset records a relevant error message" -); + $jobset->discard_changes({ '+columns' => {'errormsg' => 'errormsg'} }); # refresh from DB + like( + $jobset->errormsg, + qr/aggregate job ‘mixed_aggregate’ failed with the error: constituentA: does not exist/, + "The jobset records a relevant error message" + ); +}; + +subtest "no matches" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-no-matches.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + isnt($res, 0, "hydra-eval-jobset exits non-zero"); + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + like( + $stderr, + qr/aggregate job 'non_match_aggregate' references constituent glob pattern 'tests\.\*' with no matches/, + "The stderr record includes a relevant error message" + ); + + $jobset->discard_changes({ '+columns' => {'errormsg' => 'errormsg'} }); # refresh from DB + like( + $jobset->errormsg, + qr/aggregate job ‘non_match_aggregate’ failed with the error: tests\.\*: constituent glob pattern had no matches/, + qr/in job ‘non_match_aggregate’:\ntests\.\*: constituent glob pattern had no matches/, + "The jobset records a relevant error message" + ); +}; done_testing; diff --git a/t/evaluator/evaluate-constituents-globbing.t b/t/evaluator/evaluate-constituents-globbing.t new file mode 100644 index 00000000..49315d58 --- /dev/null +++ b/t/evaluator/evaluate-constituents-globbing.t @@ -0,0 +1,138 @@ +use strict; +use warnings; +use Setup; +use Test2::V0; +use Hydra::Helper::Exec; +use Data::Dumper; + +my $ctx = test_context(); + +subtest "general glob testing" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-glob.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + is($res, 0, "hydra-eval-jobset exits zero"); + + my $builds = {}; + for my $build ($jobset->builds) { + $builds->{$build->job} = $build; + } + + subtest "basic globbing works" => sub { + ok(defined $builds->{"ok_aggregate"}, "'ok_aggregate' is part of the jobset evaluation"); + my @constituents = $builds->{"ok_aggregate"}->constituents->all; + is(2, scalar @constituents, "'ok_aggregate' has two constituents"); + + my @sortedConstituentNames = sort (map { $_->nixname } @constituents); + + is($sortedConstituentNames[0], "empty-dir-A", "first constituent of 'ok_aggregate' is 'empty-dir-A'"); + is($sortedConstituentNames[1], "empty-dir-B", "second constituent of 'ok_aggregate' is 'empty-dir-B'"); + }; + + subtest "transitivity is OK" => sub { + ok(defined $builds->{"indirect_aggregate"}, "'indirect_aggregate' is part of the jobset evaluation"); + my @constituents = $builds->{"indirect_aggregate"}->constituents->all; + is(1, scalar @constituents, "'indirect_aggregate' has one constituent"); + is($constituents[0]->nixname, "direct_aggregate", "'indirect_aggregate' has 'direct_aggregate' as single constituent"); + }; +}; + +subtest "* selects all except current aggregate" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-glob-all.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + + subtest "no eval errors" => sub { + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + ok( + $stderr !~ "aggregate job ‘ok_aggregate’ has a constituent .* that doesn't correspond to a Hydra build", + "Catchall wildcard must not select itself as constituent" + ); + + $jobset->discard_changes; # refresh from DB + is( + $jobset->errormsg, + "", + "eval-errors non-empty" + ); + }; + + my $builds = {}; + for my $build ($jobset->builds) { + $builds->{$build->job} = $build; + } + + subtest "two constituents" => sub { + ok(defined $builds->{"ok_aggregate"}, "'ok_aggregate' is part of the jobset evaluation"); + my @constituents = $builds->{"ok_aggregate"}->constituents->all; + is(2, scalar @constituents, "'ok_aggregate' has two constituents"); + + my @sortedConstituentNames = sort (map { $_->nixname } @constituents); + + is($sortedConstituentNames[0], "empty-dir-A", "first constituent of 'ok_aggregate' is 'empty-dir-A'"); + is($sortedConstituentNames[1], "empty-dir-B", "second constituent of 'ok_aggregate' is 'empty-dir-B'"); + }; +}; + +subtest "trivial cycle check" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-cycle.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + + ok( + $stderr =~ "Found dependency cycle between jobs 'indirect_aggregate' and 'ok_aggregate'", + "Dependency cycle error is on stderr" + ); + + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + + $jobset->discard_changes; # refresh from DB + like( + $jobset->errormsg, + qr/Dependency cycle: indirect_aggregate <-> ok_aggregate/, + "eval-errors non-empty" + ); + + is(0, $jobset->builds->count, "No builds should be scheduled"); +}; + +subtest "cycle check with globbing" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-cycle-glob.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + + $jobset->discard_changes; # refresh from DB + like( + $jobset->errormsg, + qr/aggregate job ‘indirect_aggregate’ failed with the error: Dependency cycle: indirect_aggregate <-> packages.constituentA/, + "packages.constituentA error missing" + ); + + # on this branch of Hydra, hydra-eval-jobset fails hard if an aggregate + # job is broken. + is(0, $jobset->builds->count, "Zero jobs are scheduled"); +}; + +done_testing; diff --git a/t/jobs/config.nix b/t/jobs/config.nix new file mode 100644 index 00000000..91fd1d1a --- /dev/null +++ b/t/jobs/config.nix @@ -0,0 +1,14 @@ +rec { + path = "/nix/store/l9mg93sgx50y88p5rr6x1vib6j1rjsds-coreutils-9.1/bin"; + + mkDerivation = args: + derivation ({ + system = builtins.currentSystem; + PATH = path; + } // args); + mkContentAddressedDerivation = args: mkDerivation ({ + __contentAddressed = true; + outputHashMode = "recursive"; + outputHashAlgo = "sha256"; + } // args); +} diff --git a/t/jobs/constituents-cycle-glob.nix b/t/jobs/constituents-cycle-glob.nix new file mode 100644 index 00000000..efc152ce --- /dev/null +++ b/t/jobs/constituents-cycle-glob.nix @@ -0,0 +1,34 @@ +with import ./config.nix; +{ + packages.constituentA = mkDerivation { + name = "empty-dir-A"; + builder = ./empty-dir-builder.sh; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ "*_aggregate" ]; + }; + + packages.constituentB = mkDerivation { + name = "empty-dir-B"; + builder = ./empty-dir-builder.sh; + }; + + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "packages.*" + ]; + builder = ./empty-dir-builder.sh; + }; + + indirect_aggregate = mkDerivation { + name = "indirect_aggregate"; + _hydraAggregate = true; + constituents = [ + "ok_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; +} diff --git a/t/jobs/constituents-cycle.nix b/t/jobs/constituents-cycle.nix new file mode 100644 index 00000000..7e086aa1 --- /dev/null +++ b/t/jobs/constituents-cycle.nix @@ -0,0 +1,21 @@ +with import ./config.nix; +{ + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "indirect_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; + + indirect_aggregate = mkDerivation { + name = "indirect_aggregate"; + _hydraAggregate = true; + constituents = [ + "ok_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; +} diff --git a/t/jobs/constituents-glob-all.nix b/t/jobs/constituents-glob-all.nix new file mode 100644 index 00000000..d671fd70 --- /dev/null +++ b/t/jobs/constituents-glob-all.nix @@ -0,0 +1,22 @@ +with import ./config.nix; +{ + packages.constituentA = mkDerivation { + name = "empty-dir-A"; + builder = ./empty-dir-builder.sh; + }; + + packages.constituentB = mkDerivation { + name = "empty-dir-B"; + builder = ./empty-dir-builder.sh; + }; + + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "*" + ]; + builder = ./empty-dir-builder.sh; + }; +} diff --git a/t/jobs/constituents-glob.nix b/t/jobs/constituents-glob.nix new file mode 100644 index 00000000..f566dbfd --- /dev/null +++ b/t/jobs/constituents-glob.nix @@ -0,0 +1,31 @@ +with import ./config.nix; +{ + packages.constituentA = mkDerivation { + name = "empty-dir-A"; + builder = ./empty-dir-builder.sh; + }; + + packages.constituentB = mkDerivation { + name = "empty-dir-B"; + builder = ./empty-dir-builder.sh; + }; + + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "packages.*" + ]; + builder = ./empty-dir-builder.sh; + }; + + indirect_aggregate = mkDerivation { + name = "indirect_aggregate"; + _hydraAggregate = true; + constituents = [ + "ok_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; +} diff --git a/t/jobs/constituents-no-matches.nix b/t/jobs/constituents-no-matches.nix new file mode 100644 index 00000000..699cad91 --- /dev/null +++ b/t/jobs/constituents-no-matches.nix @@ -0,0 +1,20 @@ +with import ./config.nix; +{ + non_match_aggregate = mkDerivation { + name = "mixed_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "tests.*" + ]; + builder = ./empty-dir-builder.sh; + }; + + # Without a second job no jobset is attempted to be created + # (the only job would be broken) + # and thus the constituent validation is never reached. + dummy = mkDerivation { + name = "dummy"; + builder = ./empty-dir-builder.sh; + }; +} diff --git a/t/jobs/declarative/project.json b/t/jobs/declarative/project.json new file mode 100644 index 00000000..47d6ecf2 --- /dev/null +++ b/t/jobs/declarative/project.json @@ -0,0 +1,24 @@ +{ + "enabled": 1, + "hidden": false, + "description": "declarative-jobset-example", + "nixexprinput": "src", + "nixexprpath": "declarative/generator.nix", + "checkinterval": 300, + "schedulingshares": 100, + "enableemail": false, + "emailoverride": "", + "keepnr": 3, + "inputs": { + "src": { + "type": "path", + "value": "/home/ma27/Projects/hydra-cppnix/t/jobs", + "emailresponsible": false + }, + "jobspath": { + "type": "string", + "value": "/home/ma27/Projects/hydra-cppnix/t/jobs", + "emailresponsible": false + } + } +} diff --git a/t/queue-runner/constituents.t b/t/queue-runner/constituents.t index e1b8d733..8e048a73 100644 --- a/t/queue-runner/constituents.t +++ b/t/queue-runner/constituents.t @@ -22,11 +22,11 @@ is(nrQueuedBuildsForJobset($jobset), 0, "Evaluating jobs/broken-constituent.nix like( $jobset->errormsg, - qr/^"does-not-exist": does not exist$/m, + qr/^does-not-exist: does not exist$/m, "Evaluating jobs/broken-constituent.nix should log an error for does-not-exist"); like( $jobset->errormsg, - qr/^"does-not-evaluate": "error: assertion 'false' failed/m, + qr/^does-not-evaluate: error: assertion 'false' failed/m, "Evaluating jobs/broken-constituent.nix should log an error for does-not-evaluate"); done_testing; -- 2.48.1 From 590e8d85119667b54264417950456feeec0524c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20Kemetm=C3=BCller?= Date: Wed, 26 Feb 2025 16:32:42 +0100 Subject: [PATCH 06/36] Fix rendering of metrics with special characters My main motivation here is to get metrics with brackets to work in order to support "pytest" test names: - test_foo.py::test_bar[1] - test_foo.py::test_bar[2] I couldn't find an "HTML escape"-style function that would generate valid html `id` attribute names from random strings, so I went with a hash digest instead. --- src/lib/Hydra/View/TT.pm | 7 +++++++ src/root/job-metrics-tab.tt | 3 +-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lib/Hydra/View/TT.pm b/src/lib/Hydra/View/TT.pm index 84fcf3e9..241787e0 100644 --- a/src/lib/Hydra/View/TT.pm +++ b/src/lib/Hydra/View/TT.pm @@ -6,6 +6,7 @@ use base 'Catalyst::View::TT'; use Template::Plugin::HTML; use Hydra::Helper::Nix; use Time::Seconds; +use Digest::SHA qw(sha1_hex); __PACKAGE__->config( TEMPLATE_EXTENSION => '.tt', @@ -25,8 +26,14 @@ __PACKAGE__->config( makeNameTextForJobset relativeDuration stripSSHUser + metricDivId /]); +sub metricDivId { + my ($self, $c, $text) = @_; + return "metric-" . sha1_hex($text); +} + sub buildLogExists { my ($self, $c, $build) = @_; return 1 if defined $c->config->{log_prefix}; diff --git a/src/root/job-metrics-tab.tt b/src/root/job-metrics-tab.tt index 23d8ffa3..123a00f1 100644 --- a/src/root/job-metrics-tab.tt +++ b/src/root/job-metrics-tab.tt @@ -18,8 +18,7 @@

Metric: c.uri_for('/job' project.name jobset.name job 'metric' metric.name)) %]>[%HTML.escape(metric.name)%]

- [% id = "metric-" _ metric.name; - id = id.replace('\.', '_'); + [% id = metricDivId(metric.name); INCLUDE createChart dataUrl=c.uri_for('/job' project.name jobset.name job 'metric' metric.name); %] [% END %] -- 2.48.1 From 485aa93f2dedc8b868e7c30e6c0ba8293853c923 Mon Sep 17 00:00:00 2001 From: Robin Stumm Date: Wed, 26 Mar 2025 20:23:26 +0100 Subject: [PATCH 07/36] hydra-eval-jobset: do not wait on n-e-j inside transaction fixes #1429 --- src/script/hydra-eval-jobset | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/script/hydra-eval-jobset b/src/script/hydra-eval-jobset index 5b4026a5..cf3fa294 100755 --- a/src/script/hydra-eval-jobset +++ b/src/script/hydra-eval-jobset @@ -773,6 +773,9 @@ sub checkJobsetWrapped { my $jobsetChanged = 0; my %buildMap; + my @jobs; + push @jobs, $_ while defined($_ = $jobsIter->()); + $db->txn_do(sub { my $prevEval = getPrevJobsetEval($db, $jobset, 1); @@ -796,7 +799,7 @@ sub checkJobsetWrapped { my @jobsWithConstituents; - while (defined(my $job = $jobsIter->())) { + foreach my $job (@jobs) { if ($jobsetsJobset) { die "The .jobsets jobset must only have a single job named 'jobsets'" unless $job->{attr} eq "jobsets"; -- 2.48.1 From d799742057fcc4c5beef0123c710b5b8c32bc29f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sat, 29 Mar 2025 11:09:32 +0000 Subject: [PATCH 08/36] fix development workflow after switching to meson-based build --- README.md | 27 ++++++++++++++------------- doc/manual/src/hacking.md | 27 +++++++-------------------- package.nix | 2 +- 3 files changed, 22 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 8ac18ac9..54b95549 100644 --- a/README.md +++ b/README.md @@ -72,17 +72,16 @@ Make sure **State** at the top of the page is set to "_Enabled_" and click on "_ You can build Hydra via `nix-build` using the provided [default.nix](./default.nix): ``` -$ nix-build +$ nix build ``` ### Development Environment You can use the provided shell.nix to get a working development environment: ``` -$ nix-shell -$ autoreconfPhase -$ configurePhase # NOTE: not ./configure -$ make +$ nix develop +$ mesonConfigurePhase +$ ninja ``` ### Executing Hydra During Development @@ -91,9 +90,9 @@ When working on new features or bug fixes you need to be able to run Hydra from can be done using [foreman](https://github.com/ddollar/foreman): ``` -$ nix-shell +$ nix develop $ # hack hack -$ make +$ ninja -C build $ foreman start ``` @@ -115,22 +114,24 @@ Start by following the steps in [Development Environment](#development-environme Then, you can run the tests and the perlcritic linter together with: ```console -$ nix-shell -$ make check +$ nix develop +$ ninja -C build test ``` You can run a single test with: ``` -$ nix-shell -$ yath test ./t/foo/bar.t +$ nix develop +$ cd build +$ meson test --test-args=../t/Hydra/Event.t testsuite ``` And you can run just perlcritic with: ``` -$ nix-shell -$ make perlcritic +$ nix develop +$ cd build +$ meson test perlcritic ``` ### JSON API diff --git a/doc/manual/src/hacking.md b/doc/manual/src/hacking.md index ec96b8c6..8b2b13ba 100644 --- a/doc/manual/src/hacking.md +++ b/doc/manual/src/hacking.md @@ -11,12 +11,6 @@ $ cd hydra To enter a shell in which all environment variables (such as `PERL5LIB`) and dependencies can be found: -```console -$ nix-shell -``` - -of when flakes are enabled: - ```console $ nix develop ``` @@ -24,15 +18,15 @@ $ nix develop To build Hydra, you should then do: ```console -[nix-shell]$ autoreconfPhase -[nix-shell]$ configurePhase -[nix-shell]$ make -j$(nproc) +$ mesonConfigurePhase +$ ninja ``` You start a local database, the webserver, and other components with foreman: ```console +$ ninja -C build $ foreman start ``` @@ -47,18 +41,11 @@ $ ./src/script/hydra-server You can run Hydra's test suite with the following: ```console -[nix-shell]$ make check -[nix-shell]$ # to run as many tests as you have cores: -[nix-shell]$ make check YATH_JOB_COUNT=$NIX_BUILD_CORES -[nix-shell]$ # or run yath directly: -[nix-shell]$ yath test -[nix-shell]$ # to run as many tests as you have cores: -[nix-shell]$ yath test -j $NIX_BUILD_CORES +$ meson test +# to run as many tests as you have cores: +$ YATH_JOB_COUNT=$NIX_BUILD_CORES meson test ``` -When using `yath` instead of `make check`, ensure you have run `make` -in the root of the repository at least once. - **Warning**: Currently, the tests can fail if run with high parallelism [due to an issue in `Test::PostgreSQL`](https://github.com/TJC/Test-postgresql/issues/40) @@ -75,7 +62,7 @@ will reload the page every time you save. To build Hydra and its dependencies: ```console -$ nix-build release.nix -A build.x86_64-linux +$ nix build .#packages.x86_64-linux.default ``` ## Development Tasks diff --git a/package.nix b/package.nix index 12fac1d8..07701bc9 100644 --- a/package.nix +++ b/package.nix @@ -241,7 +241,7 @@ stdenv.mkDerivation (finalAttrs: { shellHook = '' pushd $(git rev-parse --show-toplevel) >/dev/null - PATH=$(pwd)/src/hydra-evaluator:$(pwd)/src/script:$(pwd)/src/hydra-queue-runner:$PATH + PATH=$(pwd)/build/src/hydra-evaluator:$(pwd)/build/src/script:$(pwd)/build/src/hydra-queue-runner:$PATH PERL5LIB=$(pwd)/src/lib:$PERL5LIB export HYDRA_HOME="$(pwd)/src/" mkdir -p .hydra-data -- 2.48.1 From 2d4232475c1ace1339d73e2e6226aaa9f7675e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sat, 29 Mar 2025 11:10:17 +0000 Subject: [PATCH 09/36] gitignore hydra-data as created by foreman --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ddcbadc4..12df926f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /src/sql/hydra-postgresql.sql /src/sql/hydra-sqlite.sql /src/sql/tmp.sqlite +.hydra-data result result-* outputs -- 2.48.1 From 10225140270b42a4e3631de983449604ec807075 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 7 Apr 2025 10:58:22 -0400 Subject: [PATCH 10/36] flake.lock: Update to nix and nix-eval-jobs 2.27 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'nix': 'github:NixOS/nix/e310c19a1aeb1ce1ed4d41d5ab2d02db596e0918?narHash=sha256-q/RgA4bB7zWai4oPySq9mch7qH14IEeom2P64SXdqHs%3D' (2025-02-18) → 'github:NixOS/nix/d0f98c76f962147610489e84c10033ca92e9c532?narHash=sha256-u6RhBWQ1XohTZ4Ub5ml1PTcaxQgtqFNng6Sohy1rojw%3D' (2025-04-07) • Updated input 'nix-eval-jobs': 'github:nix-community/nix-eval-jobs/f7418fc1fa45b96d37baa95ff3c016dd5be3876b?narHash=sha256-Lo4KFBNcY8tmBuCmEr2XV0IUZtxXHmbXPNLkov/QSU0%3D' (2025-03-26) → 'github:nix-community/nix-eval-jobs/62f9c9e8d00d2ff6ab27a6197ab459a8e0808e59?narHash=sha256-PypQspB7h7EENe4RQQUQj2Ay8J1%2BO49AKNO9JbAU4Ek%3D' (2025-04-07) --- flake.lock | 15 ++++++++------- flake.nix | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/flake.lock b/flake.lock index 6f1b1347..f1744039 100644 --- a/flake.lock +++ b/flake.lock @@ -12,16 +12,16 @@ "nixpkgs-regression": [] }, "locked": { - "lastModified": 1739899400, - "narHash": "sha256-q/RgA4bB7zWai4oPySq9mch7qH14IEeom2P64SXdqHs=", + "lastModified": 1744029599, + "narHash": "sha256-u6RhBWQ1XohTZ4Ub5ml1PTcaxQgtqFNng6Sohy1rojw=", "owner": "NixOS", "repo": "nix", - "rev": "e310c19a1aeb1ce1ed4d41d5ab2d02db596e0918", + "rev": "d0f98c76f962147610489e84c10033ca92e9c532", "type": "github" }, "original": { "owner": "NixOS", - "ref": "2.26-maintenance", + "ref": "2.27-maintenance", "repo": "nix", "type": "github" } @@ -29,15 +29,16 @@ "nix-eval-jobs": { "flake": false, "locked": { - "lastModified": 1743008255, - "narHash": "sha256-Lo4KFBNcY8tmBuCmEr2XV0IUZtxXHmbXPNLkov/QSU0=", + "lastModified": 1744009874, + "narHash": "sha256-PypQspB7h7EENe4RQQUQj2Ay8J1+O49AKNO9JbAU4Ek=", "owner": "nix-community", "repo": "nix-eval-jobs", - "rev": "f7418fc1fa45b96d37baa95ff3c016dd5be3876b", + "rev": "62f9c9e8d00d2ff6ab27a6197ab459a8e0808e59", "type": "github" }, "original": { "owner": "nix-community", + "ref": "v2.27.0", "repo": "nix-eval-jobs", "type": "github" } diff --git a/flake.nix b/flake.nix index dc3aaf5c..5a305ca0 100644 --- a/flake.nix +++ b/flake.nix @@ -4,7 +4,7 @@ inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11-small"; inputs.nix = { - url = "github:NixOS/nix/2.26-maintenance"; + url = "github:NixOS/nix/2.27-maintenance"; inputs.nixpkgs.follows = "nixpkgs"; # hide nix dev tooling from our lock file @@ -16,7 +16,7 @@ }; inputs.nix-eval-jobs = { - url = "github:nix-community/nix-eval-jobs"; + url = "github:nix-community/nix-eval-jobs/v2.27.0"; # We want to control the deps precisely flake = false; }; -- 2.48.1 From 21c6afa83bb0fbbe2014960bf9f222f957f1cfc5 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 7 Apr 2025 11:12:12 -0400 Subject: [PATCH 11/36] Fix build (due to C++ API changes) --- src/hydra-queue-runner/queue-monitor.cc | 10 +++++++--- src/hydra-queue-runner/state.hh | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index bc956453..7d747d3e 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -1,6 +1,7 @@ #include "state.hh" #include "hydra-build-result.hh" #include "globals.hh" +#include "parsed-derivations.hh" #include @@ -463,14 +464,17 @@ Step::ptr State::createStep(ref destStore, it's not runnable yet, and other threads won't make it runnable while step->created == false. */ step->drv = std::make_unique(localStore->readDerivation(drvPath)); - step->parsedDrv = std::make_unique(drvPath, *step->drv); + { + auto parsedDrv = ParsedDerivation{drvPath, *step->drv}; + step->drvOptions = std::make_unique(DerivationOptions::fromParsedDerivation(parsedDrv)); + } - step->preferLocalBuild = step->parsedDrv->willBuildLocally(*localStore); + step->preferLocalBuild = step->drvOptions->willBuildLocally(*localStore, *step->drv); step->isDeterministic = getOr(step->drv->env, "isDetermistic", "0") == "1"; step->systemType = step->drv->platform; { - StringSet features = step->requiredSystemFeatures = step->parsedDrv->getRequiredSystemFeatures(); + StringSet features = step->requiredSystemFeatures = step->drvOptions->getRequiredSystemFeatures(*step->drv); if (step->preferLocalBuild) features.insert("local"); if (!features.empty()) { diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 30e01c74..e6c5f7df 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -13,7 +13,8 @@ #include "db.hh" -#include "parsed-derivations.hh" +#include "derivations.hh" +#include "derivation-options.hh" #include "pathlocks.hh" #include "pool.hh" #include "build-result.hh" @@ -167,7 +168,7 @@ struct Step nix::StorePath drvPath; std::unique_ptr drv; - std::unique_ptr parsedDrv; + std::unique_ptr drvOptions; std::set requiredSystemFeatures; bool preferLocalBuild; bool isDeterministic; -- 2.48.1 From 0769853decfc6d3c3848726e14930707e3cf356b Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 7 Apr 2025 11:15:55 -0400 Subject: [PATCH 12/36] flake.lock: Update to nix and nix-eval-jobs 2.28 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'nix': 'github:NixOS/nix/d0f98c76f962147610489e84c10033ca92e9c532?narHash=sha256-u6RhBWQ1XohTZ4Ub5ml1PTcaxQgtqFNng6Sohy1rojw%3D' (2025-04-07) → 'github:NixOS/nix/a4962f73b5fc874d4b16baef47921daf349addfc?narHash=sha256-r%2BpsCOW77vTSTNbxTVrYHeh6OgB0QukbnyUVDwg8s4I%3D' (2025-04-07) • Updated input 'nix-eval-jobs': 'github:nix-community/nix-eval-jobs/62f9c9e8d00d2ff6ab27a6197ab459a8e0808e59?narHash=sha256-PypQspB7h7EENe4RQQUQj2Ay8J1%2BO49AKNO9JbAU4Ek%3D' (2025-04-07) → 'github:nix-community/nix-eval-jobs/cba718bafe5dc1607c2b6761ecf53c641a6f3b21?narHash=sha256-v5n6t49X7MOpqS9j0FtI6TWOXvxuZMmGsp2OfUK5QfA%3D' (2025-04-07) --- flake.lock | 15 +++++++-------- flake.nix | 4 ++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/flake.lock b/flake.lock index f1744039..3a7a1672 100644 --- a/flake.lock +++ b/flake.lock @@ -12,16 +12,16 @@ "nixpkgs-regression": [] }, "locked": { - "lastModified": 1744029599, - "narHash": "sha256-u6RhBWQ1XohTZ4Ub5ml1PTcaxQgtqFNng6Sohy1rojw=", + "lastModified": 1744030329, + "narHash": "sha256-r+psCOW77vTSTNbxTVrYHeh6OgB0QukbnyUVDwg8s4I=", "owner": "NixOS", "repo": "nix", - "rev": "d0f98c76f962147610489e84c10033ca92e9c532", + "rev": "a4962f73b5fc874d4b16baef47921daf349addfc", "type": "github" }, "original": { "owner": "NixOS", - "ref": "2.27-maintenance", + "ref": "2.28-maintenance", "repo": "nix", "type": "github" } @@ -29,16 +29,15 @@ "nix-eval-jobs": { "flake": false, "locked": { - "lastModified": 1744009874, - "narHash": "sha256-PypQspB7h7EENe4RQQUQj2Ay8J1+O49AKNO9JbAU4Ek=", + "lastModified": 1744018595, + "narHash": "sha256-v5n6t49X7MOpqS9j0FtI6TWOXvxuZMmGsp2OfUK5QfA=", "owner": "nix-community", "repo": "nix-eval-jobs", - "rev": "62f9c9e8d00d2ff6ab27a6197ab459a8e0808e59", + "rev": "cba718bafe5dc1607c2b6761ecf53c641a6f3b21", "type": "github" }, "original": { "owner": "nix-community", - "ref": "v2.27.0", "repo": "nix-eval-jobs", "type": "github" } diff --git a/flake.nix b/flake.nix index 5a305ca0..e208c40b 100644 --- a/flake.nix +++ b/flake.nix @@ -4,7 +4,7 @@ inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11-small"; inputs.nix = { - url = "github:NixOS/nix/2.27-maintenance"; + url = "github:NixOS/nix/2.28-maintenance"; inputs.nixpkgs.follows = "nixpkgs"; # hide nix dev tooling from our lock file @@ -16,7 +16,7 @@ }; inputs.nix-eval-jobs = { - url = "github:nix-community/nix-eval-jobs/v2.27.0"; + url = "github:nix-community/nix-eval-jobs"; # We want to control the deps precisely flake = false; }; -- 2.48.1 From f58a75241974cb1f77e78a522190eea8409e69d1 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 7 Apr 2025 11:27:40 -0400 Subject: [PATCH 13/36] Fix Nix code Can now at least enter dev shell, but build is still broken. --- flake.nix | 27 +++++++++++++++++---------- package.nix | 22 +++++++++------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/flake.nix b/flake.nix index e208c40b..dc1d1b8a 100644 --- a/flake.nix +++ b/flake.nix @@ -34,7 +34,6 @@ hydra = final.callPackage ./package.nix { inherit (nixpkgs.lib) fileset; rawSrc = self; - nix-perl-bindings = final.nixComponents.nix-perl-bindings; }; }; @@ -73,21 +72,29 @@ validate-openapi = hydraJobs.tests.validate-openapi.${system}; }); - packages = forEachSystem (system: { - nix-eval-jobs = nixpkgs.legacyPackages.${system}.callPackage nix-eval-jobs { - nix = nix.packages.${system}.nix; - }; - hydra = nixpkgs.legacyPackages.${system}.callPackage ./package.nix { - inherit (nixpkgs.lib) fileset; - inherit (self.packages.${system}) nix-eval-jobs; - rawSrc = self; + packages = forEachSystem (system: let + nixComponents = { inherit (nix.packages.${system}) nix-util nix-store + nix-expr + nix-fetchers + nix-flake nix-main + nix-cmd nix-cli + nix-perl-bindings ; - nix-perl-bindings = nix.hydraJobs.perlBindings.${system}; + }; + in { + nix-eval-jobs = nixpkgs.legacyPackages.${system}.callPackage nix-eval-jobs { + inherit nixComponents; + }; + hydra = nixpkgs.legacyPackages.${system}.callPackage ./package.nix { + inherit (nixpkgs.lib) fileset; + inherit nixComponents; + inherit (self.packages.${system}) nix-eval-jobs; + rawSrc = self; }; default = self.packages.${system}.hydra; }); diff --git a/package.nix b/package.nix index 07701bc9..8bf6a199 100644 --- a/package.nix +++ b/package.nix @@ -8,11 +8,7 @@ , perlPackages -, nix-util -, nix-store -, nix-main -, nix-cli -, nix-perl-bindings +, nixComponents , git , makeWrapper @@ -65,7 +61,7 @@ let name = "hydra-perl-deps"; paths = lib.closePropagation ([ - nix-perl-bindings + nixComponents.nix-perl-bindings git ] ++ (with perlPackages; [ AuthenSASL @@ -165,7 +161,7 @@ stdenv.mkDerivation (finalAttrs: { nukeReferences pkg-config mdbook - nix-cli + nixComponents.nix-cli perlDeps perl unzip @@ -175,9 +171,9 @@ stdenv.mkDerivation (finalAttrs: { libpqxx openssl libxslt - nix-util - nix-store - nix-main + nixComponents.nix-util + nixComponents.nix-store + nixComponents.nix-main perlDeps perl boost @@ -204,14 +200,14 @@ stdenv.mkDerivation (finalAttrs: { glibcLocales libressl.nc python3 - nix-cli + nixComponents.nix-cli ]; hydraPath = lib.makeBinPath ( [ subversion openssh - nix-cli + nixComponents.nix-cli coreutils findutils pixz @@ -272,7 +268,7 @@ stdenv.mkDerivation (finalAttrs: { --prefix PATH ':' $out/bin:$hydraPath \ --set HYDRA_RELEASE ${version} \ --set HYDRA_HOME $out/libexec/hydra \ - --set NIX_RELEASE ${nix-cli.name or "unknown"} \ + --set NIX_RELEASE ${nixComponents.nix-cli.name or "unknown"} \ --set NIX_EVAL_JOBS_RELEASE ${nix-eval-jobs.name or "unknown"} done ''; -- 2.48.1 From 9c022848cffb4a1ddf4843f691244c7b9a636710 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 7 Apr 2025 11:36:59 -0400 Subject: [PATCH 14/36] Fix the build --- meson.build | 14 ----------- src/hydra-evaluator/hydra-evaluator.cc | 6 ++--- src/hydra-evaluator/meson.build | 3 ++- src/hydra-queue-runner/build-remote.cc | 26 ++++++++++---------- src/hydra-queue-runner/build-result.cc | 6 ++--- src/hydra-queue-runner/builder.cc | 4 +-- src/hydra-queue-runner/hydra-build-result.hh | 6 ++--- src/hydra-queue-runner/hydra-queue-runner.cc | 12 ++++----- src/hydra-queue-runner/meson.build | 4 ++- src/hydra-queue-runner/nar-extractor.cc | 2 +- src/hydra-queue-runner/nar-extractor.hh | 8 +++--- src/hydra-queue-runner/queue-monitor.cc | 4 +-- src/hydra-queue-runner/state.hh | 22 ++++++++--------- src/libhydra/db.hh | 4 +-- src/libhydra/hydra-config.hh | 4 +-- 15 files changed, 57 insertions(+), 68 deletions(-) diff --git a/meson.build b/meson.build index 9c9c09a1..c1eb577b 100644 --- a/meson.build +++ b/meson.build @@ -12,20 +12,6 @@ nix_util_dep = dependency('nix-util', required: true) nix_store_dep = dependency('nix-store', required: true) nix_main_dep = dependency('nix-main', required: true) -# Nix need extra flags not provided in its pkg-config files. -nix_dep = declare_dependency( - dependencies: [ - nix_util_dep, - nix_store_dep, - nix_main_dep, - ], - compile_args: [ - '-include', 'nix/config-util.hh', - '-include', 'nix/config-store.hh', - '-include', 'nix/config-main.hh', - ], -) - pqxx_dep = dependency('libpqxx', required: true) prom_cpp_core_dep = dependency('prometheus-cpp-core', required: true) diff --git a/src/hydra-evaluator/hydra-evaluator.cc b/src/hydra-evaluator/hydra-evaluator.cc index 9312d085..52664188 100644 --- a/src/hydra-evaluator/hydra-evaluator.cc +++ b/src/hydra-evaluator/hydra-evaluator.cc @@ -1,8 +1,8 @@ #include "db.hh" #include "hydra-config.hh" -#include "pool.hh" -#include "shared.hh" -#include "signals.hh" +#include +#include +#include #include #include diff --git a/src/hydra-evaluator/meson.build b/src/hydra-evaluator/meson.build index b3dc542e..53ddc354 100644 --- a/src/hydra-evaluator/meson.build +++ b/src/hydra-evaluator/meson.build @@ -2,7 +2,8 @@ hydra_evaluator = executable('hydra-evaluator', 'hydra-evaluator.cc', dependencies: [ libhydra_dep, - nix_dep, + nix_util_dep, + nix_main_dep, pqxx_dep, ], install: true, diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index 77bde2c4..79c32d46 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -5,20 +5,20 @@ #include #include -#include "build-result.hh" -#include "path.hh" -#include "legacy-ssh-store.hh" -#include "serve-protocol.hh" -#include "serve-protocol-impl.hh" +#include +#include +#include +#include +#include #include "state.hh" -#include "current-process.hh" -#include "processes.hh" -#include "util.hh" -#include "serve-protocol.hh" -#include "serve-protocol-impl.hh" -#include "ssh.hh" -#include "finally.hh" -#include "url.hh" +#include +#include +#include +#include +#include +#include +#include +#include using namespace nix; diff --git a/src/hydra-queue-runner/build-result.cc b/src/hydra-queue-runner/build-result.cc index ffdc37b7..b0695e8b 100644 --- a/src/hydra-queue-runner/build-result.cc +++ b/src/hydra-queue-runner/build-result.cc @@ -1,7 +1,7 @@ #include "hydra-build-result.hh" -#include "store-api.hh" -#include "util.hh" -#include "source-accessor.hh" +#include +#include +#include #include diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index 4bc00f0c..018215a5 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -2,8 +2,8 @@ #include "state.hh" #include "hydra-build-result.hh" -#include "finally.hh" -#include "binary-cache-store.hh" +#include +#include using namespace nix; diff --git a/src/hydra-queue-runner/hydra-build-result.hh b/src/hydra-queue-runner/hydra-build-result.hh index 7d47f67c..654bf1be 100644 --- a/src/hydra-queue-runner/hydra-build-result.hh +++ b/src/hydra-queue-runner/hydra-build-result.hh @@ -2,9 +2,9 @@ #include -#include "hash.hh" -#include "derivations.hh" -#include "store-api.hh" +#include +#include +#include #include "nar-extractor.hh" struct BuildProduct diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 99411f9f..64a98797 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -11,16 +11,16 @@ #include -#include "signals.hh" +#include #include "state.hh" #include "hydra-build-result.hh" -#include "store-api.hh" -#include "remote-store.hh" +#include +#include -#include "globals.hh" +#include #include "hydra-config.hh" -#include "s3-binary-cache-store.hh" -#include "shared.hh" +#include +#include using namespace nix; using nlohmann::json; diff --git a/src/hydra-queue-runner/meson.build b/src/hydra-queue-runner/meson.build index 1c20299f..27dad2c0 100644 --- a/src/hydra-queue-runner/meson.build +++ b/src/hydra-queue-runner/meson.build @@ -13,7 +13,9 @@ hydra_queue_runner = executable('hydra-queue-runner', srcs, dependencies: [ libhydra_dep, - nix_dep, + nix_util_dep, + nix_store_dep, + nix_main_dep, pqxx_dep, prom_cpp_core_dep, prom_cpp_pull_dep, diff --git a/src/hydra-queue-runner/nar-extractor.cc b/src/hydra-queue-runner/nar-extractor.cc index 8729dd39..3bf06ef3 100644 --- a/src/hydra-queue-runner/nar-extractor.cc +++ b/src/hydra-queue-runner/nar-extractor.cc @@ -1,6 +1,6 @@ #include "nar-extractor.hh" -#include "archive.hh" +#include #include diff --git a/src/hydra-queue-runner/nar-extractor.hh b/src/hydra-queue-runner/nar-extractor.hh index 2634135b..0060efe2 100644 --- a/src/hydra-queue-runner/nar-extractor.hh +++ b/src/hydra-queue-runner/nar-extractor.hh @@ -1,9 +1,9 @@ #pragma once -#include "source-accessor.hh" -#include "types.hh" -#include "serialise.hh" -#include "hash.hh" +#include +#include +#include +#include struct NarMemberData { diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 7d747d3e..81bda873 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -1,7 +1,7 @@ #include "state.hh" #include "hydra-build-result.hh" -#include "globals.hh" -#include "parsed-derivations.hh" +#include +#include #include diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index e6c5f7df..8933720d 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -13,18 +13,18 @@ #include "db.hh" -#include "derivations.hh" -#include "derivation-options.hh" -#include "pathlocks.hh" -#include "pool.hh" -#include "build-result.hh" -#include "store-api.hh" -#include "sync.hh" +#include +#include +#include +#include +#include +#include +#include #include "nar-extractor.hh" -#include "serve-protocol.hh" -#include "serve-protocol-impl.hh" -#include "serve-protocol-connection.hh" -#include "machines.hh" +#include +#include +#include +#include typedef unsigned int BuildID; diff --git a/src/libhydra/db.hh b/src/libhydra/db.hh index 1e927573..c664a01d 100644 --- a/src/libhydra/db.hh +++ b/src/libhydra/db.hh @@ -2,8 +2,8 @@ #include -#include "environment-variables.hh" -#include "util.hh" +#include +#include struct Connection : pqxx::connection diff --git a/src/libhydra/hydra-config.hh b/src/libhydra/hydra-config.hh index b1275896..85c58746 100644 --- a/src/libhydra/hydra-config.hh +++ b/src/libhydra/hydra-config.hh @@ -2,8 +2,8 @@ #include -#include "file-system.hh" -#include "util.hh" +#include +#include struct HydraConfig { -- 2.48.1 From 1ca17faed4dccb94f217d054510497be670efae4 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Thu, 11 Apr 2024 17:12:47 +0200 Subject: [PATCH 15/36] web: include current step status on /machines --- src/root/build.tt | 16 +--------------- src/root/common.tt | 19 +++++++++++++++++++ src/root/machine-status.tt | 2 ++ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/root/build.tt b/src/root/build.tt index 93a02e0f..18ff6f01 100644 --- a/src/root/build.tt +++ b/src/root/build.tt @@ -61,21 +61,7 @@ END; [% IF step.busy != 0 || ((step.machine || step.starttime) && (step.status == 0 || step.status == 1 || step.status == 3 || step.status == 4 || step.status == 7)); INCLUDE renderMachineName machine=step.machine; ELSE; "n/a"; END %] [% IF step.busy != 0 %] - [% IF step.busy == 1 %] - Preparing - [% ELSIF step.busy == 10 %] - Connecting - [% ELSIF step.busy == 20 %] - Sending inputs - [% ELSIF step.busy == 30 %] - Building - [% ELSIF step.busy == 40 %] - Receiving outputs - [% ELSIF step.busy == 50 %] - Post-processing - [% ELSE %] - Unknown state - [% END %] + [% INCLUDE renderBusyStatus %] [% ELSIF step.status == 0 %] [% IF step.isnondeterministic %] Succeeded with non-determistic result diff --git a/src/root/common.tt b/src/root/common.tt index 869d8856..814d08ca 100644 --- a/src/root/common.tt +++ b/src/root/common.tt @@ -245,6 +245,25 @@ BLOCK renderBuildStatusIcon; END; +BLOCK renderBusyStatus; + IF step.busy == 1 %] + Preparing + [% ELSIF step.busy == 10 %] + Connecting + [% ELSIF step.busy == 20 %] + Sending inputs + [% ELSIF step.busy == 30 %] + Building + [% ELSIF step.busy == 40 %] + Receiving outputs + [% ELSIF step.busy == 50 %] + Post-processing + [% ELSE %] + Unknown state + [% END; +END; + + BLOCK renderStatus; IF build.finished; buildstatus = build.buildstatus; diff --git a/src/root/machine-status.tt b/src/root/machine-status.tt index 4195c178..3af5073c 100644 --- a/src/root/machine-status.tt +++ b/src/root/machine-status.tt @@ -10,6 +10,7 @@ Build Step What + Status Since @@ -44,6 +45,7 @@ [% step.build %] [% IF step.busy >= 30 %][% step.stepnr %][% ELSE; step.stepnr; END %] [% step.drvpath.match('-(.*)').0 %] + [% INCLUDE renderBusyStatus %] [% INCLUDE renderDuration duration = curTime - step.starttime %] [% END %] -- 2.48.1 From b55bd25581eac900a3c96a8a61aa0f7b0cb04972 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Tue, 12 Mar 2024 22:32:58 +0100 Subject: [PATCH 16/36] Make "timed out" and "log limit exceeded" builds aborted In 73694087a088ed4481b4ab268a03351b1bcaac3c I gave builds that failed because of a timeout or exceeded log limit a stop sign and I stand by that reasoning: with that it's possible to distinguish between actual build failures and rather transient things such as timeouts. Back then I considered it a feature that these are shown in a different tab, but I don't think that's a good idea anymore. When using a jobset to e.g. track the regressions from a mass rebuild (like a compiler or gcc update), "Newly failed builds" should exclusively display regressions (and flaky builds of course, not much I can do about that). Also, when a bunch of builds fail in such a jobset because of e.g. a broken connection to a builder that results in a timeout, I want to be able to restart them all w/o rebuilding actual regressions. To make it clear that we not only have "Aborted" builds in the tab, I renamed the label to "Aborted / Timed out". --- src/lib/Hydra/Helper/BuildDiff.pm | 13 +++++++++++-- src/root/jobset-eval.tt | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/lib/Hydra/Helper/BuildDiff.pm b/src/lib/Hydra/Helper/BuildDiff.pm index cd8c7691..65dad17c 100644 --- a/src/lib/Hydra/Helper/BuildDiff.pm +++ b/src/lib/Hydra/Helper/BuildDiff.pm @@ -37,7 +37,16 @@ sub buildDiff { my $n = 0; foreach my $build (@{$builds}) { - my $aborted = $build->finished != 0 && ($build->buildstatus == 3 || $build->buildstatus == 4); + my $aborted = $build->finished != 0 && ( + # aborted + $build->buildstatus == 3 + # cancelled + || $build->buildstatus == 4 + # timeout + || $build->buildstatus == 7 + # log limit exceeded + || $build->buildstatus == 10 + ); my $d; my $found = 0; while ($n < scalar(@{$builds2})) { @@ -79,4 +88,4 @@ sub buildDiff { return $ret; } -1; \ No newline at end of file +1; diff --git a/src/root/jobset-eval.tt b/src/root/jobset-eval.tt index 8a8d92e4..fd0d4846 100644 --- a/src/root/jobset-eval.tt +++ b/src/root/jobset-eval.tt @@ -65,7 +65,7 @@ c.uri_for(c.controller('JobsetEval').action_for('view'), [% END %] [% IF aborted.size > 0 %] - + [% END %] [% IF nowFail.size > 0 %] -- 2.48.1 From baec2bbb4cf263e6e2e5039f20c1bf4af21394e8 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Sat, 16 Mar 2024 17:53:02 +0100 Subject: [PATCH 17/36] Running builds view: show build step names When using Hydra to build machine configurations, you'll often see "nixosConfigurations.foo" five times, i.e. for each build step being run. This isn't very helpful I think because in such a case, a single build step can also be compiling the Linux kernel. This change also fetches the `drvpath` and `type` from the `buildsteps` relation. We're already joining it, so this doesn't make much difference (confirmed via query logging that this doesn't cause extra SQL queries). Unfortunately build steps don't have a human readable name, so I'm deriving it from the drvpath by stripping away the hash (assuming that it'll never contain a `-` and that `/nix/store/` is used as prefix). I decided against using the Nix bindings for that to avoid too much overhead due to store operations for each build step. --- src/lib/Hydra/Controller/Root.pm | 2 +- src/root/common.tt | 16 +++++++++++++++- src/root/status.tt | 2 +- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/lib/Hydra/Controller/Root.pm b/src/lib/Hydra/Controller/Root.pm index a231d7c0..adb5ad44 100644 --- a/src/lib/Hydra/Controller/Root.pm +++ b/src/lib/Hydra/Controller/Root.pm @@ -162,7 +162,7 @@ sub status_GET { { "buildsteps.busy" => { '!=', 0 } }, { order_by => ["globalpriority DESC", "id"], join => "buildsteps", - columns => [@buildListColumns] + columns => [@buildListColumns, 'buildsteps.drvpath', 'buildsteps.type'] })] ); } diff --git a/src/root/common.tt b/src/root/common.tt index 814d08ca..b96cf99c 100644 --- a/src/root/common.tt +++ b/src/root/common.tt @@ -91,6 +91,15 @@ BLOCK renderDuration; duration % 60 %]s[% END; +BLOCK renderDrvInfo; + drvname = step.drvpath + .substr(11) # strip `/nix/store/` + .split('-').slice(1).join("-") # strip hash part + .substr(0, -4); # strip `.drv` + IF step.type == 0; action = "Build"; ELSE; action = "Substitution"; END; + IF drvname; %] ([% action %] of [% drvname %])[% END; +END; + BLOCK renderBuildListHeader %] @@ -131,7 +140,12 @@ BLOCK renderBuildListBody; [% END %] [% IF !hideJobName %] - + [% END %] diff --git a/src/root/status.tt b/src/root/status.tt index f1ec70b9..e6a07bb7 100644 --- a/src/root/status.tt +++ b/src/root/status.tt @@ -7,7 +7,7 @@ [% ELSE %] - [% INCLUDE renderBuildList builds=resource showSchedulingInfo=1 hideResultInfo=1 busy=1 %] + [% INCLUDE renderBuildList builds=resource showSchedulingInfo=1 hideResultInfo=1 busy=1 showStepName=1 %] [% END %] -- 2.48.1 From d5fb163618e360c5f21ea8fa6821a9b379e776f6 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Sun, 17 Mar 2024 12:57:59 +0100 Subject: [PATCH 18/36] Only show stepname if it doesn't equal the name of the drv When building e.g. nixpkgs, the "Running builds" view will mostly look like this hello.x86_64-linux (Build of hello-X.Y) exa.x86_64-linux (Build of exa-X.Y) ... This doesn't provide any useful information. Showing the step name only makes sense if it's not a child of the job's derivation. With this patch, that information will only be shown if the drv name (i.e. w/o `/nix/store/` prefix, .drv ext & hash) is not equal to the drv name of the job itself (build.nixname). --- src/root/common.tt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/root/common.tt b/src/root/common.tt index b96cf99c..842ad109 100644 --- a/src/root/common.tt +++ b/src/root/common.tt @@ -96,8 +96,10 @@ BLOCK renderDrvInfo; .substr(11) # strip `/nix/store/` .split('-').slice(1).join("-") # strip hash part .substr(0, -4); # strip `.drv` - IF step.type == 0; action = "Build"; ELSE; action = "Substitution"; END; - IF drvname; %] ([% action %] of [% drvname %])[% END; + IF drvname != releasename; + IF step.type == 0; action = "Build"; ELSE; action = "Substitution"; END; + IF drvname; %] ([% action %] of [% drvname %])[% END; + END; END; @@ -143,7 +145,7 @@ BLOCK renderBuildListBody; [% END %] -- 2.48.1 From 17094c8371d1474456553b3e37972fabb3ec15c4 Mon Sep 17 00:00:00 2001 From: ajs124 Date: Fri, 16 Feb 2024 16:50:33 +0100 Subject: [PATCH 19/36] lazy-load evaluation errors Closes #1362 --- src/lib/Hydra/Controller/Jobset.pm | 9 +++++++++ src/lib/Hydra/Controller/JobsetEval.pm | 9 +++++++++ src/root/eval-error.tt | 26 ++++++++++++++++++++++++++ src/root/jobset-eval.tt | 10 +--------- src/root/jobset.tt | 3 +-- src/root/layout.tt | 26 +------------------------- src/root/static/js/common.js | 6 ++++++ src/root/style.tt | 24 ++++++++++++++++++++++++ t/Hydra/Controller/Jobset/evals.t | 5 +++++ t/Hydra/Controller/JobsetEval/fetch.t | 4 ++++ 10 files changed, 86 insertions(+), 36 deletions(-) create mode 100644 src/root/eval-error.tt create mode 100644 src/root/style.tt diff --git a/src/lib/Hydra/Controller/Jobset.pm b/src/lib/Hydra/Controller/Jobset.pm index eeb4232a..20a52f6f 100644 --- a/src/lib/Hydra/Controller/Jobset.pm +++ b/src/lib/Hydra/Controller/Jobset.pm @@ -364,6 +364,15 @@ sub evals_GET { ); } +sub errors :Chained('jobsetChain') :PathPart('errors') :Args(0) :ActionClass('REST') { } + +sub errors_GET { + my ($self, $c) = @_; + + $c->stash->{template} = 'eval-error.tt'; + + $self->status_ok($c, entity => $c->stash->{jobset}); +} # Redirect to the latest finished evaluation of this jobset. sub latest_eval : Chained('jobsetChain') PathPart('latest-eval') { diff --git a/src/lib/Hydra/Controller/JobsetEval.pm b/src/lib/Hydra/Controller/JobsetEval.pm index 30179d49..aca03d72 100644 --- a/src/lib/Hydra/Controller/JobsetEval.pm +++ b/src/lib/Hydra/Controller/JobsetEval.pm @@ -86,6 +86,15 @@ sub view_GET { ); } +sub errors :Chained('evalChain') :PathPart('errors') :Args(0) :ActionClass('REST') { } + +sub errors_GET { + my ($self, $c) = @_; + + $c->stash->{template} = 'eval-error.tt'; + + $self->status_ok($c, entity => $c->stash->{eval}); +} sub create_jobset : Chained('evalChain') PathPart('create-jobset') Args(0) { my ($self, $c) = @_; diff --git a/src/root/eval-error.tt b/src/root/eval-error.tt new file mode 100644 index 00000000..c2ea28ec --- /dev/null +++ b/src/root/eval-error.tt @@ -0,0 +1,26 @@ +[% PROCESS common.tt %] + + + + + + + + [% INCLUDE style.tt %] + + + + +
+
+ [% IF jobset %] +

Errors occurred at [% INCLUDE renderDateTime timestamp=(jobset.errortime || jobset.lastcheckedtime) %].

+
[% HTML.escape(jobset.fetcherrormsg || jobset.errormsg) %]
+ [% ELSIF eval %] +

Errors occurred at [% INCLUDE renderDateTime timestamp=(eval.evaluationerror.errortime || eval.timestamp) %].

+
[% HTML.escape(eval.evaluationerror.errormsg) %]
+ [% END %] +
+
+ + diff --git a/src/root/jobset-eval.tt b/src/root/jobset-eval.tt index fd0d4846..146878f2 100644 --- a/src/root/jobset-eval.tt +++ b/src/root/jobset-eval.tt @@ -108,13 +108,6 @@ c.uri_for(c.controller('JobsetEval').action_for('view'),
- [% IF eval.evaluationerror.errormsg %] -
-

Errors occurred at [% INCLUDE renderDateTime timestamp=(eval.evaluationerror.errortime || eval.timestamp) %].

-
[% HTML.escape(eval.evaluationerror.errormsg) %]
-
- [% END %] -
[% INCLUDE renderSome builds=aborted tabname="#tabs-aborted" %]
@@ -174,8 +167,7 @@ c.uri_for(c.controller('JobsetEval').action_for('view'), [% IF eval.evaluationerror.errormsg %]
-

Errors occurred at [% INCLUDE renderDateTime timestamp=(eval.evaluationerror.errortime || eval.timestamp) %].

-
[% HTML.escape(eval.evaluationerror.errormsg) %]
+
[% END %]
diff --git a/src/root/jobset.tt b/src/root/jobset.tt index 5d8345f9..5afcbfde 100644 --- a/src/root/jobset.tt +++ b/src/root/jobset.tt @@ -119,8 +119,7 @@ [% IF jobset.errormsg || jobset.fetcherrormsg %]
-

Errors occurred at [% INCLUDE renderDateTime timestamp=(jobset.errortime || jobset.lastcheckedtime) %].

-
[% HTML.escape(jobset.fetcherrormsg || jobset.errormsg) %]
+
[% END %] diff --git a/src/root/layout.tt b/src/root/layout.tt index 399962b4..b520b455 100644 --- a/src/root/layout.tt +++ b/src/root/layout.tt @@ -10,31 +10,7 @@ - - - - - - - - - - - - - - - - - - - - - - - + [% INCLUDE style.tt %] [% IF c.config.enable_google_login %] diff --git a/src/root/static/js/common.js b/src/root/static/js/common.js index c51f769a..9f31d1e6 100644 --- a/src/root/static/js/common.js +++ b/src/root/static/js/common.js @@ -129,6 +129,12 @@ $(document).ready(function() { el.addClass("is-local"); } }); + + [...document.getElementsByTagName("iframe")].forEach((element) => { + element.contentWindow.addEventListener("DOMContentLoaded", (_) => { + element.style.height = element.contentWindow.document.body.scrollHeight + 'px'; + }) + }) }); var tabsLoaded = {}; diff --git a/src/root/style.tt b/src/root/style.tt new file mode 100644 index 00000000..4094b7bc --- /dev/null +++ b/src/root/style.tt @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/t/Hydra/Controller/Jobset/evals.t b/t/Hydra/Controller/Jobset/evals.t index 221efb65..25357f8f 100644 --- a/t/Hydra/Controller/Jobset/evals.t +++ b/t/Hydra/Controller/Jobset/evals.t @@ -32,4 +32,9 @@ subtest "/jobset/PROJECT/JOBSET/evals" => sub { ok($jobsetevals->is_success, "The page showing the jobset evals returns 200."); }; +subtest "/jobset/PROJECT/JOBSET/errors" => sub { + my $jobsetevals = request(GET '/jobset/' . $project->name . '/' . $jobset->name . '/errors'); + ok($jobsetevals->is_success, "The page showing the jobset eval errors returns 200."); +}; + done_testing; diff --git a/t/Hydra/Controller/JobsetEval/fetch.t b/t/Hydra/Controller/JobsetEval/fetch.t index 14169c39..609e9224 100644 --- a/t/Hydra/Controller/JobsetEval/fetch.t +++ b/t/Hydra/Controller/JobsetEval/fetch.t @@ -35,6 +35,10 @@ subtest "Fetching the eval's overview" => sub { is($fetch->code, 200, "channel page is 200"); }; +subtest "Fetching the eval's overview" => sub { + my $fetch = request(GET '/eval/' . $eval->id, '/errors'); + is($fetch->code, 200, "errors page is 200"); +}; done_testing; -- 2.48.1 From 5cb82812f25f759a2705e7d20e3414929df883b8 Mon Sep 17 00:00:00 2001 From: "git@71rd.net" Date: Sat, 3 Aug 2024 10:36:10 +0000 Subject: [PATCH 20/36] Stream files from store instead of buffering them When an artifact is requested from hydra the output is first copied from the nix store into memory and then sent as a response, delaying the download and taking up significant amounts of memory. As reported in https://github.com/NixOS/hydra/issues/1357 Instead of calling a command and blocking while reading in the entire output, this adds read_into_socket(). the function takes a command, starting a subprocess with that command, returning a file descriptor attached to stdout. This file descriptor is then by responsebuilder of Catalyst to steam the output directly (cherry picked from commit 459aa0a5983a0bd546399c08231468d6e9282f54) --- src/lib/Hydra/Controller/Build.pm | 2 +- src/lib/Hydra/Helper/Nix.pm | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/lib/Hydra/Controller/Build.pm b/src/lib/Hydra/Controller/Build.pm index de2c204d..5e7b6f24 100644 --- a/src/lib/Hydra/Controller/Build.pm +++ b/src/lib/Hydra/Controller/Build.pm @@ -238,7 +238,7 @@ sub serveFile { # XSS hole. $c->response->header('Content-Security-Policy' => 'sandbox allow-scripts'); - $c->stash->{'plain'} = { data => grab(cmd => ["nix", "--experimental-features", "nix-command", + $c->stash->{'plain'} = { data => readIntoSocket(cmd => ["nix", "--experimental-features", "nix-command", "store", "cat", "--store", getStoreUri(), "$path"]) }; # Detect MIME type. diff --git a/src/lib/Hydra/Helper/Nix.pm b/src/lib/Hydra/Helper/Nix.pm index bff7a5ed..f87946d5 100644 --- a/src/lib/Hydra/Helper/Nix.pm +++ b/src/lib/Hydra/Helper/Nix.pm @@ -36,6 +36,7 @@ our @EXPORT = qw( jobsetOverview jobsetOverview_ pathIsInsidePrefix + readIntoSocket readNixFile registerRoot restartBuilds @@ -417,6 +418,17 @@ sub pathIsInsidePrefix { return $cur; } +sub readIntoSocket{ + my (%args) = @_; + my $sock; + + eval { + my $x= join(" ", @{$args{cmd}}); + open($sock, "-|", $x) or die q(failed to open socket from command:\n $x); + }; + + return $sock; +} -- 2.48.1 From 80f917d8fae5ff8ce1847ac56331a19bda70bd7e Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Sun, 18 Aug 2024 10:40:42 +0200 Subject: [PATCH 21/36] readIntoSocket: fix with store URIs containing an `&` The third argument to `open()` in `-|` mode is passed to a shell if it's a string. In my case the store URI contains `?secret-key=${signingKey.directory}/secret&compression=zstd` For the `nix store cat` case this means that * until `&` the process will be started in the background. This fails immediately because no path to cat is specified. * `compression=zstd` is a variable assignment * the `$path` argument to `store cat` is attempted to be executed as another command Passing just the list solves the problem. (cherry picked from commit 3ee51dbe589458cc54ff753317bbc6db530bddc0) --- src/lib/Hydra/Helper/Nix.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/Hydra/Helper/Nix.pm b/src/lib/Hydra/Helper/Nix.pm index f87946d5..88fbdd6d 100644 --- a/src/lib/Hydra/Helper/Nix.pm +++ b/src/lib/Hydra/Helper/Nix.pm @@ -423,8 +423,7 @@ sub readIntoSocket{ my $sock; eval { - my $x= join(" ", @{$args{cmd}}); - open($sock, "-|", $x) or die q(failed to open socket from command:\n $x); + open($sock, "-|", @{$args{cmd}}) or die q(failed to open socket from command:\n $x); }; return $sock; -- 2.48.1 From eddc234915a47d9b603a5f35940f6f4523e0f02d Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 7 Apr 2025 12:31:02 -0400 Subject: [PATCH 22/36] Fix evaluation of NixOS tests, avoid `with` --- nixos-tests.nix | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/nixos-tests.nix b/nixos-tests.nix index 948359b0..e82f2858 100644 --- a/nixos-tests.nix +++ b/nixos-tests.nix @@ -27,8 +27,7 @@ in { install = forEachSystem (system: - with import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }; - simpleTest { + (import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }).simpleTest { name = "hydra-install"; nodes.machine = hydraServer; testScript = @@ -43,8 +42,7 @@ in }); notifications = forEachSystem (system: - with import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }; - simpleTest { + (import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }).simpleTest { name = "hydra-notifications"; nodes.machine = { imports = [ hydraServer ]; @@ -56,7 +54,7 @@ in ''; services.influxdb.enable = true; }; - testScript = '' + testScript = { nodes, ... }: '' machine.wait_for_job("hydra-init") # Create an admin account and some other state. @@ -87,7 +85,7 @@ in # Setup the project and jobset machine.succeed( - "su - hydra -c 'perl -I ${config.services.hydra-dev.package.perlDeps}/lib/perl5/site_perl ${./t/setup-notifications-jobset.pl}' >&2" + "su - hydra -c 'perl -I ${nodes.machine.services.hydra-dev.package.perlDeps}/lib/perl5/site_perl ${./t/setup-notifications-jobset.pl}' >&2" ) # Wait until hydra has build the job and @@ -101,9 +99,7 @@ in }); gitea = forEachSystem (system: - let pkgs = nixpkgs.legacyPackages.${system}; in - with import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }; - makeTest { + (import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }).makeTest { name = "hydra-gitea"; nodes.machine = { pkgs, ... }: { imports = [ hydraServer ]; @@ -129,7 +125,7 @@ in networking.firewall.allowedTCPPorts = [ 3000 ]; }; skipLint = true; - testScript = + testScript = { pkgs, ... }: let scripts.mktoken = pkgs.writeText "token.sql" '' INSERT INTO access_token (id, uid, name, created_unix, updated_unix, token_hash, token_salt, token_last_eight, scope) VALUES (1, 1, 'hydra', 1617107360, 1617107360, 'a930f319ca362d7b49a4040ac0af74521c3a3c3303a86f327b01994430672d33b6ec53e4ea774253208686c712495e12a486', 'XRjWE9YW0g', '31d3a9c7', 'all'); -- 2.48.1 From 29a7ab8009b4c76916105d318a51bb6b923724ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 7 Apr 2025 18:43:12 +0200 Subject: [PATCH 23/36] test/gitea: fix eval --- nixos-tests.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nixos-tests.nix b/nixos-tests.nix index e82f2858..9949db7f 100644 --- a/nixos-tests.nix +++ b/nixos-tests.nix @@ -99,6 +99,9 @@ in }); gitea = forEachSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + in (import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }).makeTest { name = "hydra-gitea"; nodes.machine = { pkgs, ... }: { @@ -125,7 +128,7 @@ in networking.firewall.allowedTCPPorts = [ 3000 ]; }; skipLint = true; - testScript = { pkgs, ... }: + testScript = let scripts.mktoken = pkgs.writeText "token.sql" '' INSERT INTO access_token (id, uid, name, created_unix, updated_unix, token_hash, token_salt, token_last_eight, scope) VALUES (1, 1, 'hydra', 1617107360, 1617107360, 'a930f319ca362d7b49a4040ac0af74521c3a3c3303a86f327b01994430672d33b6ec53e4ea774253208686c712495e12a486', 'XRjWE9YW0g', '31d3a9c7', 'all'); -- 2.48.1 From 641056bd0ed75895edac56df325610ff3757b4c2 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Thu, 11 Apr 2024 17:12:47 +0200 Subject: [PATCH 24/36] web: Skip System on /machines It is redundant --- src/root/machine-status.tt | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/root/machine-status.tt b/src/root/machine-status.tt index 3af5073c..07a2359d 100644 --- a/src/root/machine-status.tt +++ b/src/root/machine-status.tt @@ -6,7 +6,6 @@
- @@ -41,7 +40,6 @@ [% idle = 0 %] - -- 2.48.1 From 08bf31b71a59c6e7f1dae0fb62d6a4888ad7cc53 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Thu, 11 Apr 2024 15:03:23 +0200 Subject: [PATCH 25/36] queue-runner: limit parallelism of CPU intensive operations My current theory is that running more parallel xz than available CPU cores is reducing our overall throughput by requiring more scheduling overhead and more cache thrashing. --- src/hydra-queue-runner/build-remote.cc | 18 ++++++++++++++++++ src/hydra-queue-runner/hydra-queue-runner.cc | 1 + src/hydra-queue-runner/state.hh | 7 +++++++ src/root/common.tt | 2 ++ 4 files changed, 28 insertions(+) diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index 79c32d46..0c8b3f10 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -386,6 +386,16 @@ void RemoteResult::updateWithBuildResult(const nix::BuildResult & buildResult) } +/* Utility guard object to auto-release a semaphore on destruction. */ +template +class SemaphoreReleaser { +public: + SemaphoreReleaser(T* s) : sem(s) {} + ~SemaphoreReleaser() { sem->release(); } + +private: + T* sem; +}; void State::buildRemote(ref destStore, ::Machine::ptr machine, Step::ptr step, @@ -527,6 +537,14 @@ void State::buildRemote(ref destStore, result.logFile = ""; } + /* Throttle CPU-bound work. Opportunistically skip updating the current + * step, since this requires a DB roundtrip. */ + if (!localWorkThrottler.try_acquire()) { + updateStep(ssWaitingForLocalSlot); + localWorkThrottler.acquire(); + } + SemaphoreReleaser releaser(&localWorkThrottler); + StorePathSet outputs; for (auto & [_, realisation] : buildResult.builtOutputs) outputs.insert(realisation.outPath); diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 64a98797..cf7d4056 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -85,6 +85,7 @@ State::State(std::optional metricsAddrOpt) : config(std::make_unique()) , maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0)) , dbPool(config->getIntOption("max_db_connections", 128)) + , localWorkThrottler(config->getIntOption("max_local_worker_threads", std::min(maxSupportedLocalWorkers, std::max(4u, std::thread::hardware_concurrency()) - 2))) , maxOutputSize(config->getIntOption("max_output_size", 2ULL << 30)) , maxLogSize(config->getIntOption("max_log_size", 64ULL << 20)) , uploadLogsToBinaryCache(config->getBoolOption("upload_logs_to_binary_cache", false)) diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 8933720d..34b7a676 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -58,6 +60,7 @@ typedef enum { ssConnecting = 10, ssSendingInputs = 20, ssBuilding = 30, + ssWaitingForLocalSlot = 35, ssReceivingOutputs = 40, ssPostProcessing = 50, } StepState; @@ -353,6 +356,10 @@ private: typedef std::map Machines; nix::Sync machines; // FIXME: use atomic_shared_ptr + /* Throttler for CPU-bound local work. */ + static constexpr unsigned int maxSupportedLocalWorkers = 1024; + std::counting_semaphore localWorkThrottler; + /* Various stats. */ time_t startedAt; counter nrBuildsRead{0}; diff --git a/src/root/common.tt b/src/root/common.tt index 842ad109..6348bee7 100644 --- a/src/root/common.tt +++ b/src/root/common.tt @@ -270,6 +270,8 @@ BLOCK renderBusyStatus; Sending inputs [% ELSIF step.busy == 30 %] Building + [% ELSIF step.busy == 35 %] + Waiting to receive outputs [% ELSIF step.busy == 40 %] Receiving outputs [% ELSIF step.busy == 50 %] -- 2.48.1 From 478bb01f7f5ad592d948cd54b910a8d4714e297e Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 20 Apr 2024 16:48:03 +0200 Subject: [PATCH 26/36] queue-runner: add prom metrics to allow detecting internal bottlenecks By looking at the ratio of running vs. waiting for the dispatcher and the queue monitor, we should get better visibility into what hydra is currently bottlenecked on. There are other side effects we can try to measure to get to the same result, but having a simple way doesn't cost us much. --- src/hydra-queue-runner/dispatcher.cc | 12 ++++++--- src/hydra-queue-runner/hydra-queue-runner.cc | 28 ++++++++++++++++++++ src/hydra-queue-runner/queue-monitor.cc | 11 ++++++++ src/hydra-queue-runner/state.hh | 6 +++++ 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/hydra-queue-runner/dispatcher.cc b/src/hydra-queue-runner/dispatcher.cc index cbf982bf..11db0071 100644 --- a/src/hydra-queue-runner/dispatcher.cc +++ b/src/hydra-queue-runner/dispatcher.cc @@ -40,13 +40,15 @@ void State::dispatcher() printMsg(lvlDebug, "dispatcher woken up"); nrDispatcherWakeups++; - auto now1 = std::chrono::steady_clock::now(); + auto t_before_work = std::chrono::steady_clock::now(); auto sleepUntil = doDispatch(); - auto now2 = std::chrono::steady_clock::now(); + auto t_after_work = std::chrono::steady_clock::now(); - dispatchTimeMs += std::chrono::duration_cast(now2 - now1).count(); + prom.dispatcher_time_spent_running.Increment( + std::chrono::duration_cast(t_after_work - t_before_work).count()); + dispatchTimeMs += std::chrono::duration_cast(t_after_work - t_before_work).count(); /* Sleep until we're woken up (either because a runnable build is added, or because a build finishes). */ @@ -60,6 +62,10 @@ void State::dispatcher() *dispatcherWakeup_ = false; } + auto t_after_sleep = std::chrono::steady_clock::now(); + prom.dispatcher_time_spent_waiting.Increment( + std::chrono::duration_cast(t_after_sleep - t_after_work).count()); + } catch (std::exception & e) { printError("dispatcher: %s", e.what()); sleep(1); diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index cf7d4056..8123fd39 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -77,6 +77,34 @@ State::PromMetrics::PromMetrics() .Register(*registry) .Add({}) ) + , dispatcher_time_spent_running( + prometheus::BuildCounter() + .Name("hydraqueuerunner_dispatcher_time_spent_running") + .Help("Time (in micros) spent running the dispatcher") + .Register(*registry) + .Add({}) + ) + , dispatcher_time_spent_waiting( + prometheus::BuildCounter() + .Name("hydraqueuerunner_dispatcher_time_spent_waiting") + .Help("Time (in micros) spent waiting for the dispatcher to obtain work") + .Register(*registry) + .Add({}) + ) + , queue_monitor_time_spent_running( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_monitor_time_spent_running") + .Help("Time (in micros) spent running the queue monitor") + .Register(*registry) + .Add({}) + ) + , queue_monitor_time_spent_waiting( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_monitor_time_spent_waiting") + .Help("Time (in micros) spent waiting for the queue monitor to obtain work") + .Register(*registry) + .Add({}) + ) { } diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 81bda873..3af0752a 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -42,12 +42,19 @@ void State::queueMonitorLoop(Connection & conn) bool quit = false; while (!quit) { + auto t_before_work = std::chrono::steady_clock::now(); + localStore->clearPathInfoCache(); bool done = getQueuedBuilds(conn, destStore, lastBuildId); if (buildOne && buildOneDone) quit = true; + auto t_after_work = std::chrono::steady_clock::now(); + + prom.queue_monitor_time_spent_running.Increment( + std::chrono::duration_cast(t_after_work - t_before_work).count()); + /* Sleep until we get notification from the database about an event. */ if (done && !quit) { @@ -72,6 +79,10 @@ void State::queueMonitorLoop(Connection & conn) printMsg(lvlTalkative, "got notification: jobset shares changed"); processJobsetSharesChange(conn); } + + auto t_after_sleep = std::chrono::steady_clock::now(); + prom.queue_monitor_time_spent_waiting.Increment( + std::chrono::duration_cast(t_after_sleep - t_after_work).count()); } exit(0); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 34b7a676..5e05157b 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -459,6 +459,12 @@ private: prometheus::Counter& queue_checks_finished; prometheus::Gauge& queue_max_id; + prometheus::Counter& dispatcher_time_spent_running; + prometheus::Counter& dispatcher_time_spent_waiting; + + prometheus::Counter& queue_monitor_time_spent_running; + prometheus::Counter& queue_monitor_time_spent_waiting; + PromMetrics(); }; PromMetrics prom; -- 2.48.1 From 21d6d805ba60ff44f76c8b8321aa03755cdb4097 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 20 Apr 2024 16:53:52 +0200 Subject: [PATCH 27/36] queue-runner: remove id > X from new builds query Running the query with/without it shows that it makes no difference to postgres, since there's an index on finished=0 already. This allows a few simplifications, but also paves the way towards running multiple parallel monitor threads in the future. --- src/hydra-queue-runner/hydra-queue-runner.cc | 7 ------- src/hydra-queue-runner/queue-monitor.cc | 21 ++++---------------- src/hydra-queue-runner/state.hh | 4 +--- 3 files changed, 5 insertions(+), 27 deletions(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 8123fd39..405c44d1 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -70,13 +70,6 @@ State::PromMetrics::PromMetrics() .Register(*registry) .Add({}) ) - , queue_max_id( - prometheus::BuildGauge() - .Name("hydraqueuerunner_queue_max_build_id_info") - .Help("Maximum build record ID in the queue") - .Register(*registry) - .Add({}) - ) , dispatcher_time_spent_running( prometheus::BuildCounter() .Name("hydraqueuerunner_dispatcher_time_spent_running") diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 3af0752a..a9d386d0 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -38,15 +38,13 @@ void State::queueMonitorLoop(Connection & conn) auto destStore = getDestStore(); - unsigned int lastBuildId = 0; - bool quit = false; while (!quit) { auto t_before_work = std::chrono::steady_clock::now(); localStore->clearPathInfoCache(); - bool done = getQueuedBuilds(conn, destStore, lastBuildId); + bool done = getQueuedBuilds(conn, destStore); if (buildOne && buildOneDone) quit = true; @@ -64,12 +62,10 @@ void State::queueMonitorLoop(Connection & conn) conn.get_notifs(); if (auto lowestId = buildsAdded.get()) { - lastBuildId = std::min(lastBuildId, static_cast(std::stoul(*lowestId) - 1)); printMsg(lvlTalkative, "got notification: new builds added to the queue"); } if (buildsRestarted.get()) { printMsg(lvlTalkative, "got notification: builds restarted"); - lastBuildId = 0; // check all builds } if (buildsCancelled.get() || buildsDeleted.get() || buildsBumped.get()) { printMsg(lvlTalkative, "got notification: builds cancelled or bumped"); @@ -96,11 +92,11 @@ struct PreviousFailure : public std::exception { bool State::getQueuedBuilds(Connection & conn, - ref destStore, unsigned int & lastBuildId) + ref destStore) { prom.queue_checks_started.Increment(); - printInfo("checking the queue for builds > %d...", lastBuildId); + printInfo("checking the queue for builds..."); /* Grab the queued builds from the database, but don't process them yet (since we don't want a long-running transaction). */ @@ -108,8 +104,6 @@ bool State::getQueuedBuilds(Connection & conn, std::map newBuildsByID; std::multimap newBuildsByPath; - unsigned int newLastBuildId = lastBuildId; - { pqxx::work txn(conn); @@ -118,17 +112,12 @@ bool State::getQueuedBuilds(Connection & conn, "jobsets.name as jobset, job, drvPath, maxsilent, timeout, timestamp, " "globalPriority, priority from Builds " "inner join jobsets on builds.jobset_id = jobsets.id " - "where builds.id > $1 and finished = 0 order by globalPriority desc, builds.id", - lastBuildId); + "where finished = 0 order by globalPriority desc, builds.id"); for (auto const & row : res) { auto builds_(builds.lock()); BuildID id = row["id"].as(); if (buildOne && id != buildOne) continue; - if (id > newLastBuildId) { - newLastBuildId = id; - prom.queue_max_id.Set(id); - } if (builds_->count(id)) continue; auto build = std::make_shared( @@ -337,8 +326,6 @@ bool State::getQueuedBuilds(Connection & conn, } prom.queue_checks_finished.Increment(); - - lastBuildId = newBuildsByID.empty() ? newLastBuildId : newBuildsByID.begin()->first - 1; return newBuildsByID.empty(); } diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 5e05157b..4cb295e7 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -457,7 +457,6 @@ private: prometheus::Counter& queue_steps_created; prometheus::Counter& queue_checks_early_exits; prometheus::Counter& queue_checks_finished; - prometheus::Gauge& queue_max_id; prometheus::Counter& dispatcher_time_spent_running; prometheus::Counter& dispatcher_time_spent_waiting; @@ -507,8 +506,7 @@ private: void queueMonitorLoop(Connection & conn); /* Check the queue for new builds. */ - bool getQueuedBuilds(Connection & conn, - nix::ref destStore, unsigned int & lastBuildId); + bool getQueuedBuilds(Connection & conn, nix::ref destStore); /* Handle cancellation, deletion and priority bumps. */ void processQueueChange(Connection & conn); -- 2.48.1 From 2050b2c3243531e9d8266c3bfba34ae0e0a4fcba Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 20 Apr 2024 16:58:10 +0200 Subject: [PATCH 28/36] queue-runner: reduce the time between queue monitor restarts This will induce more DB queries (though these are fairly cheap), but at the benefit of processing bumps within 1m instead of within 10m. --- src/hydra-queue-runner/queue-monitor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index a9d386d0..2049a6c1 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -319,7 +319,7 @@ bool State::getQueuedBuilds(Connection & conn, /* Stop after a certain time to allow priority bumps to be processed. */ - if (std::chrono::system_clock::now() > start + std::chrono::seconds(600)) { + if (std::chrono::system_clock::now() > start + std::chrono::seconds(60)) { prom.queue_checks_early_exits.Increment(); break; } -- 2.48.1 From 28da0a705f4082d50db824bf2318937acef48ba0 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 20 Apr 2024 22:18:13 +0200 Subject: [PATCH 29/36] queue runner: introduce some parallelism for remote paths lookup Each output for a given step being ingested is looked up in parallel, which should basically multiply the speed of builds ingestion by the average number of outputs per derivation. --- src/hydra-queue-runner/queue-monitor.cc | 40 +++++++++++++++++++++---- src/hydra-queue-runner/state.hh | 6 ++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 2049a6c1..9eab6e90 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -2,6 +2,7 @@ #include "hydra-build-result.hh" #include #include +#include #include @@ -404,6 +405,34 @@ void State::processQueueChange(Connection & conn) } +std::map> State::getMissingRemotePaths( + ref destStore, + const std::map> & paths) +{ + Sync>> missing_; + ThreadPool tp; + + for (auto & [output, maybeOutputPath] : paths) { + if (!maybeOutputPath) { + auto missing(missing_.lock()); + missing->insert({output, maybeOutputPath}); + } else { + tp.enqueue([&] { + if (!destStore->isValidPath(*maybeOutputPath)) { + auto missing(missing_.lock()); + missing->insert({output, maybeOutputPath}); + } + }); + } + } + + tp.process(); + + auto missing(missing_.lock()); + return *missing; +} + + Step::ptr State::createStep(ref destStore, Connection & conn, Build::ptr build, const StorePath & drvPath, Build::ptr referringBuild, Step::ptr referringStep, std::set & finishedDrvs, @@ -487,16 +516,15 @@ Step::ptr State::createStep(ref destStore, /* Are all outputs valid? */ auto outputHashes = staticOutputHashes(*localStore, *(step->drv)); - bool valid = true; - std::map> missing; + std::map> paths; for (auto & [outputName, maybeOutputPath] : destStore->queryPartialDerivationOutputMap(drvPath, &*localStore)) { auto outputHash = outputHashes.at(outputName); - if (maybeOutputPath && destStore->isValidPath(*maybeOutputPath)) - continue; - valid = false; - missing.insert({{outputHash, outputName}, maybeOutputPath}); + paths.insert({{outputHash, outputName}, maybeOutputPath}); } + auto missing = getMissingRemotePaths(destStore, paths); + bool valid = missing.empty(); + /* Try to copy the missing paths from the local store or from substitutes. */ if (!missing.empty()) { diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 4cb295e7..18101a0a 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -514,6 +514,12 @@ private: BuildOutput getBuildOutputCached(Connection & conn, nix::ref destStore, const nix::StorePath & drvPath); + /* Returns paths missing from the remote store. Paths are processed in + * parallel to work around the possible latency of remote stores. */ + std::map> getMissingRemotePaths( + nix::ref destStore, + const std::map> & paths); + Step::ptr createStep(nix::ref store, Connection & conn, Build::ptr build, const nix::StorePath & drvPath, Build::ptr referringBuild, Step::ptr referringStep, std::set & finishedDrvs, -- 2.48.1 From 1e3929e75f1efa86aca543bbadf4a525e0598d40 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 20 Apr 2024 22:49:24 +0200 Subject: [PATCH 30/36] queue-runner: switch to pseudorandom ordering of builds processing We don't rely on sequential / monotonic build IDs processing anymore, so randomizing actually has the advantage of mixing builds for different systems together, to avoid only one chunk of builds for a single system getting processed while builders for other systems are starved. --- src/hydra-queue-runner/queue-monitor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hydra-queue-runner/queue-monitor.cc b/src/hydra-queue-runner/queue-monitor.cc index 9eab6e90..bb15ac04 100644 --- a/src/hydra-queue-runner/queue-monitor.cc +++ b/src/hydra-queue-runner/queue-monitor.cc @@ -102,7 +102,7 @@ bool State::getQueuedBuilds(Connection & conn, /* Grab the queued builds from the database, but don't process them yet (since we don't want a long-running transaction). */ std::vector newIDs; - std::map newBuildsByID; + std::unordered_map newBuildsByID; std::multimap newBuildsByPath; { @@ -113,7 +113,7 @@ bool State::getQueuedBuilds(Connection & conn, "jobsets.name as jobset, job, drvPath, maxsilent, timeout, timestamp, " "globalPriority, priority from Builds " "inner join jobsets on builds.jobset_id = jobsets.id " - "where finished = 0 order by globalPriority desc, builds.id"); + "where finished = 0 order by globalPriority desc, random()"); for (auto const & row : res) { auto builds_(builds.lock()); -- 2.48.1 From 37744c7018ea6144f20acc6f5be4a218c6f9b0ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 7 Apr 2025 18:54:39 +0200 Subject: [PATCH 31/36] don't build hydra twice in a pull request + enable merge queue --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 42cb6843..9c05d752 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,7 +1,10 @@ name: "Test" on: pull_request: + merge_group: push: + branches: + - master jobs: tests: runs-on: ubuntu-latest -- 2.48.1 From e15070c6c2fc0f0e24d00fcb82f741fba8db7840 Mon Sep 17 00:00:00 2001 From: K900 Date: Tue, 1 Oct 2024 19:14:24 +0300 Subject: [PATCH 32/36] Add metric for builds waiting for download slot (cherry picked from commit f23ec71227911891807706b6b978836e4d80edde) --- src/hydra-queue-runner/build-remote.cc | 1 + src/hydra-queue-runner/hydra-queue-runner.cc | 1 + src/hydra-queue-runner/state.hh | 1 + 3 files changed, 3 insertions(+) diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index 0c8b3f10..e25da38d 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -540,6 +540,7 @@ void State::buildRemote(ref destStore, /* Throttle CPU-bound work. Opportunistically skip updating the current * step, since this requires a DB roundtrip. */ if (!localWorkThrottler.try_acquire()) { + MaintainCount mc(nrStepsWaitingForDownloadSlot); updateStep(ssWaitingForLocalSlot); localWorkThrottler.acquire(); } diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index 405c44d1..adc903d8 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -573,6 +573,7 @@ void State::dumpStatus(Connection & conn) {"nrActiveSteps", activeSteps_.lock()->size()}, {"nrStepsBuilding", nrStepsBuilding.load()}, {"nrStepsCopyingTo", nrStepsCopyingTo.load()}, + {"nrStepsWaitingForDownloadSlot", nrStepsWaitingForDownloadSlot.load()}, {"nrStepsCopyingFrom", nrStepsCopyingFrom.load()}, {"nrStepsWaiting", nrStepsWaiting.load()}, {"nrUnsupportedSteps", nrUnsupportedSteps.load()}, diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 18101a0a..12aead40 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -369,6 +369,7 @@ private: counter nrStepsDone{0}; counter nrStepsBuilding{0}; counter nrStepsCopyingTo{0}; + counter nrStepsWaitingForDownloadSlot{0}; counter nrStepsCopyingFrom{0}; counter nrStepsWaiting{0}; counter nrUnsupportedSteps{0}; -- 2.48.1 From 5b9c22dd18ac7ae526c2c10d5ec39acf646427fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 7 Apr 2025 18:43:56 +0200 Subject: [PATCH 33/36] bump nixpkgs --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 3a7a1672..ddff55ec 100644 --- a/flake.lock +++ b/flake.lock @@ -44,11 +44,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1739461644, - "narHash": "sha256-1o1qR0KYozYGRrnqytSpAhVBYLNBHX+Lv6I39zGRzKM=", + "lastModified": 1743987495, + "narHash": "sha256-46T2vMZ4/AfCK0Y2OjlFzJPxmdpP8GtsuEqSSJv3oe4=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "97a719c9f0a07923c957cf51b20b329f9fb9d43f", + "rev": "db8f4fe18ce772a9c8f3adf321416981c8fe9371", "type": "github" }, "original": { -- 2.48.1 From 06ba54fca707d8dbbd3ae019b0844dc0716138f8 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sun, 21 Apr 2024 01:46:41 +0200 Subject: [PATCH 34/36] queue-runner: release machine reservation while copying outputs This allows for better builder usage when the queue runner is busy. To avoid running into uncontrollable imbalances between builder/queue runner, we only release the machine reservation after the local throttler has found a slot to start copying the outputs for that build. As opposed to asserting uniqueness to understand resource utilization, we just switch to using `std::unique_ptr`. --- src/hydra-queue-runner/build-remote.cc | 9 +++++++++ src/hydra-queue-runner/builder.cc | 24 +++++++++++------------- src/hydra-queue-runner/dispatcher.cc | 2 +- src/hydra-queue-runner/state.hh | 6 +++--- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index e25da38d..7e307c75 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -398,6 +398,7 @@ private: }; void State::buildRemote(ref destStore, + std::unique_ptr reservation, ::Machine::ptr machine, Step::ptr step, const ServeProto::BuildOptions & buildOptions, RemoteResult & result, std::shared_ptr activeStep, @@ -546,6 +547,14 @@ void State::buildRemote(ref destStore, } SemaphoreReleaser releaser(&localWorkThrottler); + /* Once we've started copying outputs, release the machine reservation + * so further builds can happen. We do not release the machine earlier + * to avoid situations where the queue runner is bottlenecked on + * copying outputs and we end up building too many things that we + * haven't been able to allow copy slots for. */ + reservation.reset(); + wakeDispatcher(); + StorePathSet outputs; for (auto & [_, realisation] : buildResult.builtOutputs) outputs.insert(realisation.outPath); diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index 018215a5..ff0634b1 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -16,7 +16,7 @@ void setThreadName(const std::string & name) } -void State::builder(MachineReservation::ptr reservation) +void State::builder(std::unique_ptr reservation) { setThreadName("bld~" + std::string(reservation->step->drvPath.to_string())); @@ -35,22 +35,20 @@ void State::builder(MachineReservation::ptr reservation) activeSteps_.lock()->erase(activeStep); }); + std::string machine = reservation->machine->storeUri.render(); + try { auto destStore = getDestStore(); - res = doBuildStep(destStore, reservation, activeStep); + // Might release the reservation. + res = doBuildStep(destStore, std::move(reservation), activeStep); } catch (std::exception & e) { printMsg(lvlError, "uncaught exception building ‘%s’ on ‘%s’: %s", - localStore->printStorePath(reservation->step->drvPath), - reservation->machine->storeUri.render(), + localStore->printStorePath(activeStep->step->drvPath), + machine, e.what()); } } - /* Release the machine and wake up the dispatcher. */ - assert(reservation.unique()); - reservation = 0; - wakeDispatcher(); - /* If there was a temporary failure, retry the step after an exponentially increasing interval. */ Step::ptr step = wstep.lock(); @@ -72,11 +70,11 @@ void State::builder(MachineReservation::ptr reservation) State::StepResult State::doBuildStep(nix::ref destStore, - MachineReservation::ptr reservation, + std::unique_ptr reservation, std::shared_ptr activeStep) { - auto & step(reservation->step); - auto & machine(reservation->machine); + auto step(reservation->step); + auto machine(reservation->machine); { auto step_(step->state.lock()); @@ -211,7 +209,7 @@ State::StepResult State::doBuildStep(nix::ref destStore, try { /* FIXME: referring builds may have conflicting timeouts. */ - buildRemote(destStore, machine, step, buildOptions, result, activeStep, updateStep, narMembers); + buildRemote(destStore, std::move(reservation), machine, step, buildOptions, result, activeStep, updateStep, narMembers); } catch (Error & e) { if (activeStep->state_.lock()->cancelled) { printInfo("marking step %d of build %d as cancelled", stepNr, buildId); diff --git a/src/hydra-queue-runner/dispatcher.cc b/src/hydra-queue-runner/dispatcher.cc index 11db0071..d3e145de 100644 --- a/src/hydra-queue-runner/dispatcher.cc +++ b/src/hydra-queue-runner/dispatcher.cc @@ -288,7 +288,7 @@ system_time State::doDispatch() /* Make a slot reservation and start a thread to do the build. */ auto builderThread = std::thread(&State::builder, this, - std::make_shared(*this, step, mi.machine)); + std::make_unique(*this, step, mi.machine)); builderThread.detach(); // FIXME? keepGoing = true; diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 12aead40..edfad4fb 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -400,7 +400,6 @@ private: struct MachineReservation { - typedef std::shared_ptr ptr; State & state; Step::ptr step; Machine::ptr machine; @@ -550,16 +549,17 @@ private: void abortUnsupported(); - void builder(MachineReservation::ptr reservation); + void builder(std::unique_ptr reservation); /* Perform the given build step. Return true if the step is to be retried. */ enum StepResult { sDone, sRetry, sMaybeCancelled }; StepResult doBuildStep(nix::ref destStore, - MachineReservation::ptr reservation, + std::unique_ptr reservation, std::shared_ptr activeStep); void buildRemote(nix::ref destStore, + std::unique_ptr reservation, Machine::ptr machine, Step::ptr step, const nix::ServeProto::BuildOptions & buildOptions, RemoteResult & result, std::shared_ptr activeStep, -- 2.48.1 From 65618fd590e39e389a296f25892e076c115386c8 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Fri, 12 Apr 2024 17:33:27 +0200 Subject: [PATCH 35/36] web: replace 'errormsg' with 'errormsg IS NULL' in most cases This is implement in an extremely hacky way due to poor DBIx feature support. Ideally, what we'd need is a way to tell DBIx to ignore the errormsg column unless explicitly requested, and to automatically add a computed 'errormsg IS NULL' column in others. Since it does not support that, this commit instead hacks some support via method overrides while taking care to not break anything obvious. --- package.nix | 1 + src/lib/Hydra/Controller/Jobset.pm | 6 ++++ src/lib/Hydra/Controller/JobsetEval.pm | 2 ++ src/lib/Hydra/Helper/Nix.pm | 3 +- .../Hydra/Schema/Result/EvaluationErrors.pm | 2 ++ src/lib/Hydra/Schema/Result/Jobsets.pm | 2 ++ .../Schema/ResultSet/EvaluationErrors.pm | 30 +++++++++++++++++++ src/lib/Hydra/Schema/ResultSet/Jobsets.pm | 30 +++++++++++++++++++ src/root/common.tt | 4 +-- src/root/jobset-eval.tt | 4 +-- src/root/jobset.tt | 6 ++-- t/evaluator/evaluate-constituents-globbing.t | 8 ++--- t/lib/CliRunners.pm | 4 +-- t/queue-runner/direct-indirect-constituents.t | 2 +- 14 files changed, 88 insertions(+), 16 deletions(-) create mode 100644 src/lib/Hydra/Schema/ResultSet/EvaluationErrors.pm create mode 100644 src/lib/Hydra/Schema/ResultSet/Jobsets.pm diff --git a/package.nix b/package.nix index 8bf6a199..4a7840c1 100644 --- a/package.nix +++ b/package.nix @@ -89,6 +89,7 @@ let DateTime DBDPg DBDSQLite + DBIxClassHelpers DigestSHA1 EmailMIME EmailSender diff --git a/src/lib/Hydra/Controller/Jobset.pm b/src/lib/Hydra/Controller/Jobset.pm index 20a52f6f..bc7d7444 100644 --- a/src/lib/Hydra/Controller/Jobset.pm +++ b/src/lib/Hydra/Controller/Jobset.pm @@ -371,6 +371,12 @@ sub errors_GET { $c->stash->{template} = 'eval-error.tt'; + my $jobsetName = $c->stash->{params}->{name}; + $c->stash->{jobset} = $c->stash->{project}->jobsets->find( + { name => $jobsetName }, + { '+columns' => { 'errormsg' => 'errormsg' } } + ); + $self->status_ok($c, entity => $c->stash->{jobset}); } diff --git a/src/lib/Hydra/Controller/JobsetEval.pm b/src/lib/Hydra/Controller/JobsetEval.pm index aca03d72..643a516c 100644 --- a/src/lib/Hydra/Controller/JobsetEval.pm +++ b/src/lib/Hydra/Controller/JobsetEval.pm @@ -93,6 +93,8 @@ sub errors_GET { $c->stash->{template} = 'eval-error.tt'; + $c->stash->{eval} = $c->model('DB::JobsetEvals')->find($c->stash->{eval}->id, { prefetch => 'evaluationerror' }); + $self->status_ok($c, entity => $c->stash->{eval}); } diff --git a/src/lib/Hydra/Helper/Nix.pm b/src/lib/Hydra/Helper/Nix.pm index 88fbdd6d..134b8b7e 100644 --- a/src/lib/Hydra/Helper/Nix.pm +++ b/src/lib/Hydra/Helper/Nix.pm @@ -297,8 +297,7 @@ sub getEvals { my @evals = $evals_result_set->search( { hasnewbuilds => 1 }, - { order_by => "$me.id DESC", rows => $rows, offset => $offset - , prefetch => { evaluationerror => [ ] } }); + { order_by => "$me.id DESC", rows => $rows, offset => $offset }); my @res = (); my $cache = {}; diff --git a/src/lib/Hydra/Schema/Result/EvaluationErrors.pm b/src/lib/Hydra/Schema/Result/EvaluationErrors.pm index 7033fa5e..f6cc48db 100644 --- a/src/lib/Hydra/Schema/Result/EvaluationErrors.pm +++ b/src/lib/Hydra/Schema/Result/EvaluationErrors.pm @@ -105,4 +105,6 @@ __PACKAGE__->add_column( "+id" => { retrieve_on_insert => 1 } ); +__PACKAGE__->mk_group_accessors('column' => 'has_error'); + 1; diff --git a/src/lib/Hydra/Schema/Result/Jobsets.pm b/src/lib/Hydra/Schema/Result/Jobsets.pm index cd704ac8..aee87e00 100644 --- a/src/lib/Hydra/Schema/Result/Jobsets.pm +++ b/src/lib/Hydra/Schema/Result/Jobsets.pm @@ -386,6 +386,8 @@ __PACKAGE__->add_column( "+id" => { retrieve_on_insert => 1 } ); +__PACKAGE__->mk_group_accessors('column' => 'has_error'); + sub supportsDynamicRunCommand { my ($self) = @_; diff --git a/src/lib/Hydra/Schema/ResultSet/EvaluationErrors.pm b/src/lib/Hydra/Schema/ResultSet/EvaluationErrors.pm new file mode 100644 index 00000000..a4c6d955 --- /dev/null +++ b/src/lib/Hydra/Schema/ResultSet/EvaluationErrors.pm @@ -0,0 +1,30 @@ +package Hydra::Schema::ResultSet::EvaluationErrors; + +use strict; +use utf8; +use warnings; + +use parent 'DBIx::Class::ResultSet'; + +use Storable qw(dclone); + +__PACKAGE__->load_components('Helper::ResultSet::RemoveColumns'); + +# Exclude expensive error message values unless explicitly requested, and +# replace them with a summary field describing their presence/absence. +sub search_rs { + my ( $class, $query, $attrs ) = @_; + + if ($attrs) { + $attrs = dclone($attrs); + } + + unless (exists $attrs->{'select'} || exists $attrs->{'columns'}) { + $attrs->{'+columns'}->{'has_error'} = "errormsg != ''"; + } + unless (exists $attrs->{'+columns'}->{'errormsg'}) { + push @{ $attrs->{'remove_columns'} }, 'errormsg'; + } + + return $class->next::method($query, $attrs); +} diff --git a/src/lib/Hydra/Schema/ResultSet/Jobsets.pm b/src/lib/Hydra/Schema/ResultSet/Jobsets.pm new file mode 100644 index 00000000..1b2a12e3 --- /dev/null +++ b/src/lib/Hydra/Schema/ResultSet/Jobsets.pm @@ -0,0 +1,30 @@ +package Hydra::Schema::ResultSet::Jobsets; + +use strict; +use utf8; +use warnings; + +use parent 'DBIx::Class::ResultSet'; + +use Storable qw(dclone); + +__PACKAGE__->load_components('Helper::ResultSet::RemoveColumns'); + +# Exclude expensive error message values unless explicitly requested, and +# replace them with a summary field describing their presence/absence. +sub search_rs { + my ( $class, $query, $attrs ) = @_; + + if ($attrs) { + $attrs = dclone($attrs); + } + + unless (exists $attrs->{'select'} || exists $attrs->{'columns'}) { + $attrs->{'+columns'}->{'has_error'} = "errormsg != ''"; + } + unless (exists $attrs->{'+columns'}->{'errormsg'}) { + push @{ $attrs->{'remove_columns'} }, 'errormsg'; + } + + return $class->next::method($query, $attrs); +} diff --git a/src/root/common.tt b/src/root/common.tt index 6348bee7..86335a74 100644 --- a/src/root/common.tt +++ b/src/root/common.tt @@ -513,7 +513,7 @@ BLOCK renderEvals %] ELSE %] - [% END %] - [% IF eval.evaluationerror.errormsg %] + [% IF eval.evaluationerror.has_error %] Eval Errors [% END %] @@ -639,7 +639,7 @@ BLOCK renderJobsetOverview %] [% IF j.get_column('nrtotal') > 0 %] diff --git a/src/root/jobset-eval.tt b/src/root/jobset-eval.tt index 146878f2..f0b92f97 100644 --- a/src/root/jobset-eval.tt +++ b/src/root/jobset-eval.tt @@ -90,7 +90,7 @@ c.uri_for(c.controller('JobsetEval').action_for('view'), [% END %] - [% IF eval.evaluationerror.errormsg %] + [% IF eval.evaluationerror.has_error %] [% END %] @@ -165,7 +165,7 @@ c.uri_for(c.controller('JobsetEval').action_for('view'), [% END %] - [% IF eval.evaluationerror.errormsg %] + [% IF eval.evaluationerror.has_error %]
diff --git a/src/root/jobset.tt b/src/root/jobset.tt index 5afcbfde..3e594756 100644 --- a/src/root/jobset.tt +++ b/src/root/jobset.tt @@ -61,7 +61,7 @@ [% END %] - [% IF jobset.errormsg || jobset.fetcherrormsg %] + [% IF jobset.has_error || jobset.fetcherrormsg %] [% END %] @@ -79,7 +79,7 @@
[% build.id %][% IF !hideJobsetName %][%build.jobset.get_column("project")%]:[%build.jobset.get_column("name")%]:[% END %][%build.get_column("job")%] + [% IF !hideJobsetName %][%build.jobset.get_column("project")%]:[%build.jobset.get_column("name")%]:[% END %][%build.get_column("job")%] + [% IF showStepName %] + [% INCLUDE renderDrvInfo step=build.buildsteps %] + [% END %] + [% t = showSchedulingInfo ? build.timestamp : build.stoptime; IF t; INCLUDE renderRelativeDate timestamp=(showSchedulingInfo ? build.timestamp : build.stoptime); ELSE; "-"; END %] [% !showSchedulingInfo and build.get_column('releasename') ? build.get_column('releasename') : build.nixname %] [% IF !hideJobsetName %][%build.jobset.get_column("project")%]:[%build.jobset.get_column("name")%]:[% END %][%build.get_column("job")%] [% IF showStepName %] - [% INCLUDE renderDrvInfo step=build.buildsteps %] + [% INCLUDE renderDrvInfo step=build.buildsteps releasename=build.nixname %] [% END %]
JobSystem Build Step What
[% INCLUDE renderFullJobName project=step.project jobset=step.jobset job=step.job %][% step.system %] [% step.build %] [% IF step.busy >= 30 %][% step.stepnr %][% ELSE; step.stepnr; END %] [% step.drvpath.match('-(.*)').0 %][% HTML.escape(j.description) %] [% IF j.lastcheckedtime; INCLUDE renderDateTime timestamp = j.lastcheckedtime; - IF j.errormsg || j.fetcherrormsg; %] Error[% END; + IF j.has_error || j.fetcherrormsg; %] Error[% END; ELSE; "-"; END %]Last checked: [% IF jobset.lastcheckedtime %] - [% INCLUDE renderDateTime timestamp = jobset.lastcheckedtime %], [% IF jobset.errormsg || jobset.fetcherrormsg %]with errors![% ELSE %]no errors[% END %] + [% INCLUDE renderDateTime timestamp = jobset.lastcheckedtime %], [% IF jobset.has_error || jobset.fetcherrormsg %]with errors![% ELSE %]no errors[% END %] [% ELSE %] never [% END %] @@ -117,7 +117,7 @@ - [% IF jobset.errormsg || jobset.fetcherrormsg %] + [% IF jobset.has_error || jobset.fetcherrormsg %]
diff --git a/t/evaluator/evaluate-constituents-globbing.t b/t/evaluator/evaluate-constituents-globbing.t index 49315d58..0ed0ee7c 100644 --- a/t/evaluator/evaluate-constituents-globbing.t +++ b/t/evaluator/evaluate-constituents-globbing.t @@ -61,8 +61,8 @@ subtest "* selects all except current aggregate" => sub { $jobset->discard_changes; # refresh from DB is( - $jobset->errormsg, - "", + $jobset->has_error, + 0, "eval-errors non-empty" ); }; @@ -101,7 +101,7 @@ subtest "trivial cycle check" => sub { ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); - $jobset->discard_changes; # refresh from DB + $jobset->discard_changes({ '+columns' => {'errormsg' => 'errormsg'} }); # refresh from DB like( $jobset->errormsg, qr/Dependency cycle: indirect_aggregate <-> ok_aggregate/, @@ -123,7 +123,7 @@ subtest "cycle check with globbing" => sub { ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); - $jobset->discard_changes; # refresh from DB + $jobset->discard_changes({ '+columns' => {'errormsg' => 'errormsg'} }); # refresh from DB like( $jobset->errormsg, qr/aggregate job ‘indirect_aggregate’ failed with the error: Dependency cycle: indirect_aggregate <-> packages.constituentA/, diff --git a/t/lib/CliRunners.pm b/t/lib/CliRunners.pm index 885f2ae4..8c53b551 100644 --- a/t/lib/CliRunners.pm +++ b/t/lib/CliRunners.pm @@ -14,7 +14,7 @@ our @EXPORT = qw( sub evalSucceeds { my ($jobset) = @_; my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("hydra-eval-jobset", $jobset->project->name, $jobset->name)); - $jobset->discard_changes; # refresh from DB + $jobset->discard_changes({ '+columns' => {'errormsg' => 'errormsg'} }); # refresh from DB if ($res) { chomp $stdout; chomp $stderr; utf8::decode($stdout) or die "Invalid unicode in stdout."; @@ -29,7 +29,7 @@ sub evalSucceeds { sub evalFails { my ($jobset) = @_; my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("hydra-eval-jobset", $jobset->project->name, $jobset->name)); - $jobset->discard_changes; # refresh from DB + $jobset->discard_changes({ '+columns' => {'errormsg' => 'errormsg'} }); # refresh from DB if (!$res) { chomp $stdout; chomp $stderr; utf8::decode($stdout) or die "Invalid unicode in stdout."; diff --git a/t/queue-runner/direct-indirect-constituents.t b/t/queue-runner/direct-indirect-constituents.t index 35370450..a017c76f 100644 --- a/t/queue-runner/direct-indirect-constituents.t +++ b/t/queue-runner/direct-indirect-constituents.t @@ -13,7 +13,7 @@ my $constituentBuildA = $builds->{"constituentA"}; my $constituentBuildB = $builds->{"constituentB"}; my $eval = $constituentBuildA->jobsetevals->first(); -is($eval->evaluationerror->errormsg, ""); +is($eval->evaluationerror->has_error, 0); subtest "Verifying the direct aggregate" => sub { my $aggBuild = $builds->{"direct_aggregate"}; -- 2.48.1 From 33a935e8ef44e07c709328235ed48c4b4de03483 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Tue, 8 Apr 2025 17:38:19 -0400 Subject: [PATCH 36/36] Queue-runner: Always produce a machines JSON object Even if there are no machines, there should at least be an empty object. --- src/hydra-queue-runner/hydra-queue-runner.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index adc903d8..05d7e263 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -615,6 +615,7 @@ void State::dumpStatus(Connection & conn) } { + auto machines_json = json::object(); auto machines_(machines.lock()); for (auto & i : *machines_) { auto & m(i.second); @@ -641,8 +642,9 @@ void State::dumpStatus(Connection & conn) machine["avgStepTime"] = (float) s->totalStepTime / s->nrStepsDone; machine["avgStepBuildTime"] = (float) s->totalStepBuildTime / s->nrStepsDone; } - statusJson["machines"][m->storeUri.render()] = machine; + machines_json[m->storeUri.render()] = machine; } + statusJson["machines"] = machines_json; } { -- 2.48.1