Abort unsupported build steps

If we don't see machine that supports a build step for
'max_unsupported_time' seconds, the step is aborted. The default is 0,
which is appropriate for Hydra installations that don't provision
missing machines dynamically.

(cherry picked from commit f5cdbfe21d)
This commit is contained in:
Eelco Dolstra
2020-03-26 15:00:04 +01:00
parent 68a59f34a0
commit 4417f9f260
4 changed files with 208 additions and 92 deletions

View File

@ -68,7 +68,7 @@ struct RemoteResult
std::unique_ptr<nix::TokenServer::Token> tokens;
std::shared_ptr<nix::FSAccessor> accessor;
BuildStatus buildStatus()
BuildStatus buildStatus() const
{
return stepStatus == bsCachedFailure ? bsFailed : stepStatus;
}
@ -198,6 +198,10 @@ struct Step
/* The time at which this step became runnable. */
system_time runnableSince;
/* The time that we last saw a machine that supports this
step. */
system_time lastSupported = std::chrono::system_clock::now();
};
std::atomic_bool finished{false}; // debugging
@ -303,6 +307,9 @@ private:
const float retryBackoff = 3.0;
const unsigned int maxParallelCopyClosure = 4;
/* Time in seconds before unsupported build steps are aborted. */
const unsigned int maxUnsupportedTime = 0;
nix::Path hydraData, logDir;
bool useSubstitutes = false;
@ -483,6 +490,15 @@ private:
Build::ptr referringBuild, Step::ptr referringStep, std::set<nix::StorePath> & finishedDrvs,
std::set<Step::ptr> & newSteps, std::set<Step::ptr> & newRunnable);
void failStep(
Connection & conn,
Step::ptr step,
BuildID buildId,
const RemoteResult & result,
Machine::ptr machine,
bool & stepFinished,
bool & quit);
Jobset::ptr createJobset(pqxx::work & txn,
const std::string & projectName, const std::string & jobsetName);
@ -497,6 +513,8 @@ private:
void wakeDispatcher();
void abortUnsupported();
void builder(MachineReservation::ptr reservation);
/* Perform the given build step. Return true if the step is to be