Split hydra-queue-runner.cc more
This commit is contained in:
369
src/hydra-queue-runner/queue-monitor.cc
Normal file
369
src/hydra-queue-runner/queue-monitor.cc
Normal file
@ -0,0 +1,369 @@
|
||||
#include "state.hh"
|
||||
#include "build-result.hh"
|
||||
|
||||
using namespace nix;
|
||||
|
||||
|
||||
void State::queueMonitor()
|
||||
{
|
||||
while (true) {
|
||||
try {
|
||||
queueMonitorLoop();
|
||||
} catch (std::exception & e) {
|
||||
printMsg(lvlError, format("queue monitor: %1%") % e.what());
|
||||
sleep(10); // probably a DB problem, so don't retry right away
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void State::queueMonitorLoop()
|
||||
{
|
||||
auto conn(dbPool.get());
|
||||
|
||||
receiver buildsAdded(*conn, "builds_added");
|
||||
receiver buildsRestarted(*conn, "builds_restarted");
|
||||
receiver buildsCancelled(*conn, "builds_cancelled");
|
||||
receiver buildsDeleted(*conn, "builds_deleted");
|
||||
|
||||
auto store = openStore(); // FIXME: pool
|
||||
|
||||
unsigned int lastBuildId = 0;
|
||||
|
||||
while (true) {
|
||||
getQueuedBuilds(*conn, store, lastBuildId);
|
||||
|
||||
/* Sleep until we get notification from the database about an
|
||||
event. */
|
||||
conn->await_notification();
|
||||
nrQueueWakeups++;
|
||||
|
||||
if (buildsAdded.get())
|
||||
printMsg(lvlTalkative, "got notification: new builds added to the queue");
|
||||
if (buildsRestarted.get()) {
|
||||
printMsg(lvlTalkative, "got notification: builds restarted");
|
||||
lastBuildId = 0; // check all builds
|
||||
}
|
||||
if (buildsCancelled.get() || buildsDeleted.get()) {
|
||||
printMsg(lvlTalkative, "got notification: builds cancelled");
|
||||
removeCancelledBuilds(*conn);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void State::getQueuedBuilds(Connection & conn, std::shared_ptr<StoreAPI> store, unsigned int & lastBuildId)
|
||||
{
|
||||
printMsg(lvlInfo, format("checking the queue for builds > %1%...") % lastBuildId);
|
||||
|
||||
/* Grab the queued builds from the database, but don't process
|
||||
them yet (since we don't want a long-running transaction). */
|
||||
std::multimap<Path, Build::ptr> newBuilds;
|
||||
|
||||
{
|
||||
pqxx::work txn(conn);
|
||||
|
||||
auto res = txn.parameterized("select id, project, jobset, job, drvPath, maxsilent, timeout from Builds where id > $1 and finished = 0 order by id")(lastBuildId).exec();
|
||||
|
||||
for (auto const & row : res) {
|
||||
auto builds_(builds.lock());
|
||||
BuildID id = row["id"].as<BuildID>();
|
||||
if (buildOne && id != buildOne) continue;
|
||||
if (id > lastBuildId) lastBuildId = id;
|
||||
if (has(*builds_, id)) continue;
|
||||
|
||||
auto build = std::make_shared<Build>();
|
||||
build->id = id;
|
||||
build->drvPath = row["drvPath"].as<string>();
|
||||
build->fullJobName = row["project"].as<string>() + ":" + row["jobset"].as<string>() + ":" + row["job"].as<string>();
|
||||
build->maxSilentTime = row["maxsilent"].as<int>();
|
||||
build->buildTimeout = row["timeout"].as<int>();
|
||||
|
||||
newBuilds.emplace(std::make_pair(build->drvPath, build));
|
||||
}
|
||||
}
|
||||
|
||||
std::set<Step::ptr> newRunnable;
|
||||
unsigned int nrAdded;
|
||||
std::function<void(Build::ptr)> createBuild;
|
||||
|
||||
createBuild = [&](Build::ptr build) {
|
||||
printMsg(lvlTalkative, format("loading build %1% (%2%)") % build->id % build->fullJobName);
|
||||
nrAdded++;
|
||||
|
||||
if (!store->isValidPath(build->drvPath)) {
|
||||
/* Derivation has been GC'ed prematurely. */
|
||||
printMsg(lvlError, format("aborting GC'ed build %1%") % build->id);
|
||||
if (!build->finishedInDB) {
|
||||
pqxx::work txn(conn);
|
||||
txn.parameterized
|
||||
("update Builds set finished = 1, busy = 0, buildStatus = $2, startTime = $3, stopTime = $3, errorMsg = $4 where id = $1 and finished = 0")
|
||||
(build->id)
|
||||
((int) bsAborted)
|
||||
(time(0))
|
||||
("derivation was garbage-collected prior to build").exec();
|
||||
txn.commit();
|
||||
build->finishedInDB = true;
|
||||
nrBuildsDone++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::set<Step::ptr> newSteps;
|
||||
std::set<Path> finishedDrvs; // FIXME: re-use?
|
||||
Step::ptr step = createStep(store, build->drvPath, build, 0, finishedDrvs, newSteps, newRunnable);
|
||||
|
||||
/* Some of the new steps may be the top level of builds that
|
||||
we haven't processed yet. So do them now. This ensures that
|
||||
if build A depends on build B with top-level step X, then X
|
||||
will be "accounted" to B in doBuildStep(). */
|
||||
for (auto & r : newSteps) {
|
||||
while (true) {
|
||||
auto i = newBuilds.find(r->drvPath);
|
||||
if (i == newBuilds.end()) break;
|
||||
Build::ptr b = i->second;
|
||||
newBuilds.erase(i);
|
||||
createBuild(b);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't get a step, it means the step's outputs are
|
||||
all valid. So we mark this as a finished, cached build. */
|
||||
if (!step) {
|
||||
Derivation drv = readDerivation(build->drvPath);
|
||||
BuildOutput res = getBuildOutput(store, drv);
|
||||
|
||||
pqxx::work txn(conn);
|
||||
time_t now = time(0);
|
||||
markSucceededBuild(txn, build, res, true, now, now);
|
||||
txn.commit();
|
||||
|
||||
build->finishedInDB = true;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* If any step has an unsupported system type or has a
|
||||
previously failed output path, then fail the build right
|
||||
away. */
|
||||
bool badStep = false;
|
||||
for (auto & r : newSteps) {
|
||||
BuildStatus buildStatus = bsSuccess;
|
||||
BuildStepStatus buildStepStatus = bssFailed;
|
||||
|
||||
if (checkCachedFailure(r, conn)) {
|
||||
printMsg(lvlError, format("marking build %1% as cached failure") % build->id);
|
||||
buildStatus = step == r ? bsFailed : bsDepFailed;
|
||||
buildStepStatus = bssFailed;
|
||||
}
|
||||
|
||||
if (buildStatus == bsSuccess) {
|
||||
bool supported = false;
|
||||
{
|
||||
auto machines_(machines.lock()); // FIXME: use shared_mutex
|
||||
for (auto & m : *machines_)
|
||||
if (m.second->supportsStep(r)) { supported = true; break; }
|
||||
}
|
||||
|
||||
if (!supported) {
|
||||
printMsg(lvlError, format("aborting unsupported build %1%") % build->id);
|
||||
buildStatus = bsUnsupported;
|
||||
buildStepStatus = bssUnsupported;
|
||||
}
|
||||
}
|
||||
|
||||
if (buildStatus != bsSuccess) {
|
||||
time_t now = time(0);
|
||||
if (!build->finishedInDB) {
|
||||
pqxx::work txn(conn);
|
||||
createBuildStep(txn, 0, build, r, "", buildStepStatus);
|
||||
txn.parameterized
|
||||
("update Builds set finished = 1, busy = 0, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = $4 where id = $1 and finished = 0")
|
||||
(build->id)
|
||||
((int) buildStatus)
|
||||
(now)
|
||||
(buildStatus != bsUnsupported ? 1 : 0).exec();
|
||||
txn.commit();
|
||||
build->finishedInDB = true;
|
||||
nrBuildsDone++;
|
||||
}
|
||||
badStep = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (badStep) return;
|
||||
|
||||
/* Note: if we exit this scope prior to this, the build and
|
||||
all newly created steps are destroyed. */
|
||||
|
||||
{
|
||||
auto builds_(builds.lock());
|
||||
if (!build->finishedInDB) // FIXME: can this happen?
|
||||
(*builds_)[build->id] = build;
|
||||
build->toplevel = step;
|
||||
}
|
||||
|
||||
printMsg(lvlChatty, format("added build %1% (top-level step %2%, %3% new steps)")
|
||||
% build->id % step->drvPath % newSteps.size());
|
||||
};
|
||||
|
||||
/* Now instantiate build steps for each new build. The builder
|
||||
threads can start building the runnable build steps right away,
|
||||
even while we're still processing other new builds. */
|
||||
while (!newBuilds.empty()) {
|
||||
auto build = newBuilds.begin()->second;
|
||||
newBuilds.erase(newBuilds.begin());
|
||||
|
||||
newRunnable.clear();
|
||||
nrAdded = 0;
|
||||
try {
|
||||
createBuild(build);
|
||||
} catch (Error & e) {
|
||||
e.addPrefix(format("while loading build %1%: ") % build->id);
|
||||
throw;
|
||||
}
|
||||
|
||||
/* Add the new runnable build steps to ‘runnable’ and wake up
|
||||
the builder threads. */
|
||||
printMsg(lvlChatty, format("got %1% new runnable steps from %2% new builds") % newRunnable.size() % nrAdded);
|
||||
for (auto & r : newRunnable)
|
||||
makeRunnable(r);
|
||||
|
||||
nrBuildsRead += nrAdded;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void State::removeCancelledBuilds(Connection & conn)
|
||||
{
|
||||
/* Get the current set of queued builds. */
|
||||
std::set<BuildID> currentIds;
|
||||
{
|
||||
pqxx::work txn(conn);
|
||||
auto res = txn.exec("select id from Builds where finished = 0");
|
||||
for (auto const & row : res)
|
||||
currentIds.insert(row["id"].as<BuildID>());
|
||||
}
|
||||
|
||||
auto builds_(builds.lock());
|
||||
|
||||
for (auto i = builds_->begin(); i != builds_->end(); ) {
|
||||
if (currentIds.find(i->first) == currentIds.end()) {
|
||||
printMsg(lvlInfo, format("discarding cancelled build %1%") % i->first);
|
||||
i = builds_->erase(i);
|
||||
// FIXME: ideally we would interrupt active build steps here.
|
||||
} else
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Step::ptr State::createStep(std::shared_ptr<StoreAPI> store, const Path & drvPath,
|
||||
Build::ptr referringBuild, Step::ptr referringStep, std::set<Path> & finishedDrvs,
|
||||
std::set<Step::ptr> & newSteps, std::set<Step::ptr> & newRunnable)
|
||||
{
|
||||
if (finishedDrvs.find(drvPath) != finishedDrvs.end()) return 0;
|
||||
|
||||
/* Check if the requested step already exists. If not, create a
|
||||
new step. In any case, make the step reachable from
|
||||
referringBuild or referringStep. This is done atomically (with
|
||||
‘steps’ locked), to ensure that this step can never become
|
||||
reachable from a new build after doBuildStep has removed it
|
||||
from ‘steps’. */
|
||||
Step::ptr step;
|
||||
bool isNew = false;
|
||||
{
|
||||
auto steps_(steps.lock());
|
||||
|
||||
/* See if the step already exists in ‘steps’ and is not
|
||||
stale. */
|
||||
auto prev = steps_->find(drvPath);
|
||||
if (prev != steps_->end()) {
|
||||
step = prev->second.lock();
|
||||
/* Since ‘step’ is a strong pointer, the referred Step
|
||||
object won't be deleted after this. */
|
||||
if (!step) steps_->erase(drvPath); // remove stale entry
|
||||
}
|
||||
|
||||
/* If it doesn't exist, create it. */
|
||||
if (!step) {
|
||||
step = std::make_shared<Step>();
|
||||
step->drvPath = drvPath;
|
||||
isNew = true;
|
||||
}
|
||||
|
||||
auto step_(step->state.lock());
|
||||
|
||||
assert(step_->created != isNew);
|
||||
|
||||
if (referringBuild)
|
||||
step_->builds.push_back(referringBuild);
|
||||
|
||||
if (referringStep)
|
||||
step_->rdeps.push_back(referringStep);
|
||||
|
||||
(*steps_)[drvPath] = step;
|
||||
}
|
||||
|
||||
if (!isNew) return step;
|
||||
|
||||
printMsg(lvlDebug, format("considering derivation ‘%1%’") % drvPath);
|
||||
|
||||
/* Initialize the step. Note that the step may be visible in
|
||||
‘steps’ before this point, but that doesn't matter because
|
||||
it's not runnable yet, and other threads won't make it
|
||||
runnable while step->created == false. */
|
||||
step->drv = readDerivation(drvPath);
|
||||
{
|
||||
auto i = step->drv.env.find("requiredSystemFeatures");
|
||||
if (i != step->drv.env.end())
|
||||
step->requiredSystemFeatures = tokenizeString<std::set<std::string>>(i->second);
|
||||
}
|
||||
|
||||
auto attr = step->drv.env.find("preferLocalBuild");
|
||||
step->preferLocalBuild =
|
||||
attr != step->drv.env.end() && attr->second == "1"
|
||||
&& has(localPlatforms, step->drv.platform);
|
||||
|
||||
/* Are all outputs valid? */
|
||||
bool valid = true;
|
||||
for (auto & i : step->drv.outputs) {
|
||||
if (!store->isValidPath(i.second.path)) {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: check whether all outputs are in the binary cache.
|
||||
if (valid) {
|
||||
finishedDrvs.insert(drvPath);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* No, we need to build. */
|
||||
printMsg(lvlDebug, format("creating build step ‘%1%’") % drvPath);
|
||||
newSteps.insert(step);
|
||||
|
||||
/* Create steps for the dependencies. */
|
||||
for (auto & i : step->drv.inputDrvs) {
|
||||
auto dep = createStep(store, i.first, 0, step, finishedDrvs, newSteps, newRunnable);
|
||||
if (dep) {
|
||||
auto step_(step->state.lock());
|
||||
step_->deps.insert(dep);
|
||||
}
|
||||
}
|
||||
|
||||
/* If the step has no (remaining) dependencies, make it
|
||||
runnable. */
|
||||
{
|
||||
auto step_(step->state.lock());
|
||||
assert(!step_->created);
|
||||
step_->created = true;
|
||||
if (step_->deps.empty())
|
||||
newRunnable.insert(step);
|
||||
}
|
||||
|
||||
return step;
|
||||
}
|
Reference in New Issue
Block a user