diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index e25da38d..7e307c75 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -398,6 +398,7 @@ private: }; void State::buildRemote(ref destStore, + std::unique_ptr reservation, ::Machine::ptr machine, Step::ptr step, const ServeProto::BuildOptions & buildOptions, RemoteResult & result, std::shared_ptr activeStep, @@ -546,6 +547,14 @@ void State::buildRemote(ref destStore, } SemaphoreReleaser releaser(&localWorkThrottler); + /* Once we've started copying outputs, release the machine reservation + * so further builds can happen. We do not release the machine earlier + * to avoid situations where the queue runner is bottlenecked on + * copying outputs and we end up building too many things that we + * haven't been able to allow copy slots for. */ + reservation.reset(); + wakeDispatcher(); + StorePathSet outputs; for (auto & [_, realisation] : buildResult.builtOutputs) outputs.insert(realisation.outPath); diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index 018215a5..ff0634b1 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -16,7 +16,7 @@ void setThreadName(const std::string & name) } -void State::builder(MachineReservation::ptr reservation) +void State::builder(std::unique_ptr reservation) { setThreadName("bld~" + std::string(reservation->step->drvPath.to_string())); @@ -35,22 +35,20 @@ void State::builder(MachineReservation::ptr reservation) activeSteps_.lock()->erase(activeStep); }); + std::string machine = reservation->machine->storeUri.render(); + try { auto destStore = getDestStore(); - res = doBuildStep(destStore, reservation, activeStep); + // Might release the reservation. + res = doBuildStep(destStore, std::move(reservation), activeStep); } catch (std::exception & e) { printMsg(lvlError, "uncaught exception building ā€˜%s’ on ā€˜%s’: %s", - localStore->printStorePath(reservation->step->drvPath), - reservation->machine->storeUri.render(), + localStore->printStorePath(activeStep->step->drvPath), + machine, e.what()); } } - /* Release the machine and wake up the dispatcher. */ - assert(reservation.unique()); - reservation = 0; - wakeDispatcher(); - /* If there was a temporary failure, retry the step after an exponentially increasing interval. */ Step::ptr step = wstep.lock(); @@ -72,11 +70,11 @@ void State::builder(MachineReservation::ptr reservation) State::StepResult State::doBuildStep(nix::ref destStore, - MachineReservation::ptr reservation, + std::unique_ptr reservation, std::shared_ptr activeStep) { - auto & step(reservation->step); - auto & machine(reservation->machine); + auto step(reservation->step); + auto machine(reservation->machine); { auto step_(step->state.lock()); @@ -211,7 +209,7 @@ State::StepResult State::doBuildStep(nix::ref destStore, try { /* FIXME: referring builds may have conflicting timeouts. */ - buildRemote(destStore, machine, step, buildOptions, result, activeStep, updateStep, narMembers); + buildRemote(destStore, std::move(reservation), machine, step, buildOptions, result, activeStep, updateStep, narMembers); } catch (Error & e) { if (activeStep->state_.lock()->cancelled) { printInfo("marking step %d of build %d as cancelled", stepNr, buildId); diff --git a/src/hydra-queue-runner/dispatcher.cc b/src/hydra-queue-runner/dispatcher.cc index 11db0071..d3e145de 100644 --- a/src/hydra-queue-runner/dispatcher.cc +++ b/src/hydra-queue-runner/dispatcher.cc @@ -288,7 +288,7 @@ system_time State::doDispatch() /* Make a slot reservation and start a thread to do the build. */ auto builderThread = std::thread(&State::builder, this, - std::make_shared(*this, step, mi.machine)); + std::make_unique(*this, step, mi.machine)); builderThread.detach(); // FIXME? keepGoing = true; diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 12aead40..edfad4fb 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -400,7 +400,6 @@ private: struct MachineReservation { - typedef std::shared_ptr ptr; State & state; Step::ptr step; Machine::ptr machine; @@ -550,16 +549,17 @@ private: void abortUnsupported(); - void builder(MachineReservation::ptr reservation); + void builder(std::unique_ptr reservation); /* Perform the given build step. Return true if the step is to be retried. */ enum StepResult { sDone, sRetry, sMaybeCancelled }; StepResult doBuildStep(nix::ref destStore, - MachineReservation::ptr reservation, + std::unique_ptr reservation, std::shared_ptr activeStep); void buildRemote(nix::ref destStore, + std::unique_ptr reservation, Machine::ptr machine, Step::ptr step, const nix::ServeProto::BuildOptions & buildOptions, RemoteResult & result, std::shared_ptr activeStep,