queue-runner: release machine reservation while copying outputs
This allows for better builder usage when the queue runner is busy. To avoid running into uncontrollable imbalances between builder/queue runner, we only release the machine reservation after the local throttler has found a slot to start copying the outputs for that build. As opposed to asserting uniqueness to understand resource utilization, we just switch to using `std::unique_ptr`.
This commit is contained in:
parent
cc4b206d85
commit
143a07bff0
@ -398,6 +398,7 @@ private:
|
||||
};
|
||||
|
||||
void State::buildRemote(ref<Store> destStore,
|
||||
std::unique_ptr<MachineReservation> reservation,
|
||||
::Machine::ptr machine, Step::ptr step,
|
||||
const ServeProto::BuildOptions & buildOptions,
|
||||
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
||||
@ -546,6 +547,14 @@ void State::buildRemote(ref<Store> destStore,
|
||||
}
|
||||
SemaphoreReleaser releaser(&localWorkThrottler);
|
||||
|
||||
/* Once we've started copying outputs, release the machine reservation
|
||||
* so further builds can happen. We do not release the machine earlier
|
||||
* to avoid situations where the queue runner is bottlenecked on
|
||||
* copying outputs and we end up building too many things that we
|
||||
* haven't been able to allow copy slots for. */
|
||||
reservation.reset();
|
||||
wakeDispatcher();
|
||||
|
||||
StorePathSet outputs;
|
||||
for (auto & [_, realisation] : buildResult.builtOutputs)
|
||||
outputs.insert(realisation.outPath);
|
||||
|
@ -16,7 +16,7 @@ void setThreadName(const std::string & name)
|
||||
}
|
||||
|
||||
|
||||
void State::builder(MachineReservation::ptr reservation)
|
||||
void State::builder(std::unique_ptr<MachineReservation> reservation)
|
||||
{
|
||||
setThreadName("bld~" + std::string(reservation->step->drvPath.to_string()));
|
||||
|
||||
@ -35,22 +35,20 @@ void State::builder(MachineReservation::ptr reservation)
|
||||
activeSteps_.lock()->erase(activeStep);
|
||||
});
|
||||
|
||||
std::string machine = reservation->machine->storeUri.render();
|
||||
|
||||
try {
|
||||
auto destStore = getDestStore();
|
||||
res = doBuildStep(destStore, reservation, activeStep);
|
||||
// Might release the reservation.
|
||||
res = doBuildStep(destStore, std::move(reservation), activeStep);
|
||||
} catch (std::exception & e) {
|
||||
printMsg(lvlError, "uncaught exception building ‘%s’ on ‘%s’: %s",
|
||||
localStore->printStorePath(reservation->step->drvPath),
|
||||
reservation->machine->storeUri.render(),
|
||||
localStore->printStorePath(activeStep->step->drvPath),
|
||||
machine,
|
||||
e.what());
|
||||
}
|
||||
}
|
||||
|
||||
/* Release the machine and wake up the dispatcher. */
|
||||
assert(reservation.unique());
|
||||
reservation = 0;
|
||||
wakeDispatcher();
|
||||
|
||||
/* If there was a temporary failure, retry the step after an
|
||||
exponentially increasing interval. */
|
||||
Step::ptr step = wstep.lock();
|
||||
@ -72,11 +70,11 @@ void State::builder(MachineReservation::ptr reservation)
|
||||
|
||||
|
||||
State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||
MachineReservation::ptr reservation,
|
||||
std::unique_ptr<MachineReservation> reservation,
|
||||
std::shared_ptr<ActiveStep> activeStep)
|
||||
{
|
||||
auto & step(reservation->step);
|
||||
auto & machine(reservation->machine);
|
||||
auto step(reservation->step);
|
||||
auto machine(reservation->machine);
|
||||
|
||||
{
|
||||
auto step_(step->state.lock());
|
||||
@ -211,7 +209,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
|
||||
|
||||
try {
|
||||
/* FIXME: referring builds may have conflicting timeouts. */
|
||||
buildRemote(destStore, machine, step, buildOptions, result, activeStep, updateStep, narMembers);
|
||||
buildRemote(destStore, std::move(reservation), machine, step, buildOptions, result, activeStep, updateStep, narMembers);
|
||||
} catch (Error & e) {
|
||||
if (activeStep->state_.lock()->cancelled) {
|
||||
printInfo("marking step %d of build %d as cancelled", stepNr, buildId);
|
||||
|
@ -288,7 +288,7 @@ system_time State::doDispatch()
|
||||
/* Make a slot reservation and start a thread to
|
||||
do the build. */
|
||||
auto builderThread = std::thread(&State::builder, this,
|
||||
std::make_shared<MachineReservation>(*this, step, mi.machine));
|
||||
std::make_unique<MachineReservation>(*this, step, mi.machine));
|
||||
builderThread.detach(); // FIXME?
|
||||
|
||||
keepGoing = true;
|
||||
|
@ -400,7 +400,6 @@ private:
|
||||
|
||||
struct MachineReservation
|
||||
{
|
||||
typedef std::shared_ptr<MachineReservation> ptr;
|
||||
State & state;
|
||||
Step::ptr step;
|
||||
Machine::ptr machine;
|
||||
@ -550,16 +549,17 @@ private:
|
||||
|
||||
void abortUnsupported();
|
||||
|
||||
void builder(MachineReservation::ptr reservation);
|
||||
void builder(std::unique_ptr<MachineReservation> reservation);
|
||||
|
||||
/* Perform the given build step. Return true if the step is to be
|
||||
retried. */
|
||||
enum StepResult { sDone, sRetry, sMaybeCancelled };
|
||||
StepResult doBuildStep(nix::ref<nix::Store> destStore,
|
||||
MachineReservation::ptr reservation,
|
||||
std::unique_ptr<MachineReservation> reservation,
|
||||
std::shared_ptr<ActiveStep> activeStep);
|
||||
|
||||
void buildRemote(nix::ref<nix::Store> destStore,
|
||||
std::unique_ptr<MachineReservation> reservation,
|
||||
Machine::ptr machine, Step::ptr step,
|
||||
const nix::ServeProto::BuildOptions & buildOptions,
|
||||
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
||||
|
Loading…
x
Reference in New Issue
Block a user