2015-05-28 17:39:29 +02:00
|
|
|
|
#include <iostream>
|
2015-05-29 01:31:12 +02:00
|
|
|
|
#include <thread>
|
|
|
|
|
|
2015-06-19 14:51:59 +02:00
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
#include <sys/stat.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
|
2015-07-07 10:17:21 +02:00
|
|
|
|
#include "state.hh"
|
2015-07-21 15:14:17 +02:00
|
|
|
|
#include "build-result.hh"
|
2015-05-29 17:14:20 +02:00
|
|
|
|
|
2015-05-28 17:39:29 +02:00
|
|
|
|
#include "shared.hh"
|
|
|
|
|
#include "globals.hh"
|
2015-06-22 14:06:44 +02:00
|
|
|
|
#include "value-to-json.hh"
|
2015-05-28 17:39:29 +02:00
|
|
|
|
|
|
|
|
|
using namespace nix;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
State::State()
|
|
|
|
|
{
|
2015-06-09 14:21:21 +02:00
|
|
|
|
hydraData = getEnv("HYDRA_DATA");
|
|
|
|
|
if (hydraData == "") throw Error("$HYDRA_DATA must be set");
|
|
|
|
|
|
|
|
|
|
logDir = canonPath(hydraData + "/build-logs");
|
2015-06-25 12:24:11 +02:00
|
|
|
|
|
2015-06-30 00:20:19 +02:00
|
|
|
|
localPlatforms = {settings.thisSystem};
|
|
|
|
|
if (settings.thisSystem == "x86_64-linux")
|
|
|
|
|
localPlatforms.insert("i686-linux");
|
2015-05-28 17:39:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-08-25 14:11:50 +02:00
|
|
|
|
void State::parseMachines(const std::string & contents)
|
2015-06-09 14:21:21 +02:00
|
|
|
|
{
|
2015-06-25 12:24:11 +02:00
|
|
|
|
Machines newMachines, oldMachines;
|
|
|
|
|
{
|
|
|
|
|
auto machines_(machines.lock());
|
|
|
|
|
oldMachines = *machines_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto line : tokenizeString<Strings>(contents, "\n")) {
|
|
|
|
|
line = trim(string(line, 0, line.find('#')));
|
|
|
|
|
auto tokens = tokenizeString<std::vector<std::string>>(line);
|
|
|
|
|
if (tokens.size() < 3) continue;
|
|
|
|
|
tokens.resize(7);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
auto machine = std::make_shared<Machine>();
|
2015-06-25 12:24:11 +02:00
|
|
|
|
machine->sshName = tokens[0];
|
|
|
|
|
machine->systemTypes = tokenizeString<StringSet>(tokens[1], ",");
|
|
|
|
|
machine->sshKey = tokens[2];
|
|
|
|
|
if (tokens[3] != "")
|
|
|
|
|
string2Int(tokens[3], machine->maxJobs);
|
|
|
|
|
else
|
|
|
|
|
machine->maxJobs = 1;
|
|
|
|
|
machine->speedFactor = atof(tokens[4].c_str());
|
2015-06-30 00:20:19 +02:00
|
|
|
|
if (tokens[5] == "-") tokens[5] = "";
|
2015-06-25 12:24:11 +02:00
|
|
|
|
machine->supportedFeatures = tokenizeString<StringSet>(tokens[5], ",");
|
2015-06-30 00:20:19 +02:00
|
|
|
|
if (tokens[6] == "-") tokens[6] = "";
|
2015-06-25 12:24:11 +02:00
|
|
|
|
machine->mandatoryFeatures = tokenizeString<StringSet>(tokens[6], ",");
|
|
|
|
|
for (auto & f : machine->mandatoryFeatures)
|
|
|
|
|
machine->supportedFeatures.insert(f);
|
|
|
|
|
|
|
|
|
|
/* Re-use the State object of the previous machine with the
|
|
|
|
|
same name. */
|
|
|
|
|
auto i = oldMachines.find(machine->sshName);
|
|
|
|
|
if (i == oldMachines.end())
|
|
|
|
|
printMsg(lvlChatty, format("adding new machine ‘%1%’") % machine->sshName);
|
|
|
|
|
else
|
|
|
|
|
printMsg(lvlChatty, format("updating machine ‘%1%’") % machine->sshName);
|
|
|
|
|
machine->state = i == oldMachines.end()
|
|
|
|
|
? std::make_shared<Machine::State>()
|
|
|
|
|
: i->second->state;
|
|
|
|
|
newMachines[machine->sshName] = machine;
|
2015-06-09 14:21:21 +02:00
|
|
|
|
}
|
|
|
|
|
|
2015-06-25 12:24:11 +02:00
|
|
|
|
for (auto & m : oldMachines)
|
|
|
|
|
if (newMachines.find(m.first) == newMachines.end())
|
|
|
|
|
printMsg(lvlInfo, format("removing machine ‘%1%’") % m.first);
|
|
|
|
|
|
2015-06-09 14:21:21 +02:00
|
|
|
|
auto machines_(machines.lock());
|
|
|
|
|
*machines_ = newMachines;
|
2015-08-17 15:48:10 +02:00
|
|
|
|
|
|
|
|
|
wakeDispatcher();
|
2015-06-09 14:21:21 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-25 12:24:11 +02:00
|
|
|
|
void State::monitorMachinesFile()
|
|
|
|
|
{
|
2015-08-25 14:11:50 +02:00
|
|
|
|
string defaultMachinesFile = "/etc/nix/machines";
|
|
|
|
|
auto machinesFiles = tokenizeString<std::vector<Path>>(
|
|
|
|
|
getEnv("NIX_REMOTE_SYSTEMS", pathExists(defaultMachinesFile) ? defaultMachinesFile : ""), ":");
|
|
|
|
|
|
|
|
|
|
if (machinesFiles.empty()) {
|
|
|
|
|
parseMachines("localhost " + concatStringsSep(",", localPlatforms)
|
|
|
|
|
+ " - " + int2String(settings.maxBuildJobs) + " 1");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<struct stat> fileStats;
|
|
|
|
|
fileStats.resize(machinesFiles.size());
|
|
|
|
|
for (unsigned int n = 0; n < machinesFiles.size(); ++n) {
|
|
|
|
|
auto & st(fileStats[n]);
|
|
|
|
|
st.st_ino = st.st_mtime = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto readMachinesFiles = [&]() {
|
|
|
|
|
|
|
|
|
|
/* Check if any of the machines files changed. */
|
|
|
|
|
bool anyChanged = false;
|
|
|
|
|
for (unsigned int n = 0; n < machinesFiles.size(); ++n) {
|
|
|
|
|
Path machinesFile = machinesFiles[n];
|
|
|
|
|
struct stat st;
|
|
|
|
|
if (stat(machinesFile.c_str(), &st) != 0) {
|
|
|
|
|
if (errno != ENOENT)
|
|
|
|
|
throw SysError(format("getting stats about ‘%1%’") % machinesFile);
|
|
|
|
|
st.st_ino = st.st_mtime = 0;
|
|
|
|
|
}
|
|
|
|
|
auto & old(fileStats[n]);
|
|
|
|
|
if (old.st_ino != st.st_ino || old.st_mtime != st.st_mtime)
|
|
|
|
|
anyChanged = true;
|
|
|
|
|
old = st;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!anyChanged) return;
|
|
|
|
|
|
|
|
|
|
debug("reloading machines files");
|
|
|
|
|
|
|
|
|
|
string contents;
|
|
|
|
|
for (auto & machinesFile : machinesFiles) {
|
|
|
|
|
try {
|
|
|
|
|
contents += readFile(machinesFile);
|
|
|
|
|
contents += '\n';
|
|
|
|
|
} catch (SysError & e) {
|
|
|
|
|
if (e.errNo != ENOENT) throw;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parseMachines(contents);
|
|
|
|
|
};
|
|
|
|
|
|
2015-06-25 12:24:11 +02:00
|
|
|
|
while (true) {
|
|
|
|
|
try {
|
2015-08-25 14:11:50 +02:00
|
|
|
|
readMachinesFiles();
|
2015-06-25 12:24:11 +02:00
|
|
|
|
// FIXME: use inotify.
|
2015-08-25 14:11:50 +02:00
|
|
|
|
sleep(30);
|
2015-06-25 12:24:11 +02:00
|
|
|
|
} catch (std::exception & e) {
|
|
|
|
|
printMsg(lvlError, format("reloading machines file: %1%") % e.what());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-22 14:06:44 +02:00
|
|
|
|
void State::clearBusy(Connection & conn, time_t stopTime)
|
2015-05-28 17:39:29 +02:00
|
|
|
|
{
|
2015-06-22 14:06:44 +02:00
|
|
|
|
pqxx::work txn(conn);
|
2015-05-28 19:06:17 +02:00
|
|
|
|
txn.parameterized
|
2015-05-28 17:39:29 +02:00
|
|
|
|
("update BuildSteps set busy = 0, status = $1, stopTime = $2 where busy = 1")
|
2015-05-28 19:06:17 +02:00
|
|
|
|
((int) bssAborted)
|
|
|
|
|
(stopTime, stopTime != 0).exec();
|
2015-06-09 14:31:14 +02:00
|
|
|
|
txn.exec("update Builds set busy = 0 where finished = 0 and busy = 1");
|
2015-05-28 17:39:29 +02:00
|
|
|
|
txn.commit();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int State::createBuildStep(pqxx::work & txn, time_t startTime, Build::ptr build, Step::ptr step,
|
2015-06-09 14:57:49 +02:00
|
|
|
|
const std::string & machine, BuildStepStatus status, const std::string & errorMsg, BuildID propagatedFrom)
|
2015-05-28 17:39:29 +02:00
|
|
|
|
{
|
2015-06-09 15:03:20 +02:00
|
|
|
|
/* Acquire an exclusive lock on BuildSteps to ensure that we don't
|
|
|
|
|
race with other threads creating a step of the same build. */
|
|
|
|
|
txn.exec("lock table BuildSteps in exclusive mode");
|
|
|
|
|
|
2015-05-28 17:39:29 +02:00
|
|
|
|
auto res = txn.parameterized("select max(stepnr) from BuildSteps where build = $1")(build->id).exec();
|
|
|
|
|
int stepNr = res[0][0].is_null() ? 1 : res[0][0].as<int>() + 1;
|
|
|
|
|
|
2015-05-28 19:06:17 +02:00
|
|
|
|
txn.parameterized
|
2015-06-09 14:57:49 +02:00
|
|
|
|
("insert into BuildSteps (build, stepnr, type, drvPath, busy, startTime, system, status, propagatedFrom, errorMsg, stopTime, machine) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)")
|
2015-06-15 15:48:05 +02:00
|
|
|
|
(build->id)(stepNr)(0)(step->drvPath)(status == bssBusy ? 1 : 0)
|
|
|
|
|
(startTime, startTime != 0)
|
|
|
|
|
(step->drv.platform)
|
2015-05-28 19:06:17 +02:00
|
|
|
|
((int) status, status != bssBusy)
|
|
|
|
|
(propagatedFrom, propagatedFrom != 0)
|
|
|
|
|
(errorMsg, errorMsg != "")
|
2015-06-15 15:48:05 +02:00
|
|
|
|
(startTime, startTime != 0 && status != bssBusy)
|
2015-06-15 15:07:04 +02:00
|
|
|
|
(machine).exec();
|
2015-05-28 17:39:29 +02:00
|
|
|
|
|
|
|
|
|
for (auto & output : step->drv.outputs)
|
|
|
|
|
txn.parameterized
|
|
|
|
|
("insert into BuildStepOutputs (build, stepnr, name, path) values ($1, $2, $3, $4)")
|
|
|
|
|
(build->id)(stepNr)(output.first)(output.second.path).exec();
|
|
|
|
|
|
|
|
|
|
return stepNr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-09 14:21:21 +02:00
|
|
|
|
void State::finishBuildStep(pqxx::work & txn, time_t startTime, time_t stopTime, BuildID buildId, int stepNr,
|
2015-06-09 14:57:49 +02:00
|
|
|
|
const std::string & machine, BuildStepStatus status, const std::string & errorMsg, BuildID propagatedFrom)
|
2015-05-28 17:39:29 +02:00
|
|
|
|
{
|
2015-06-09 14:21:21 +02:00
|
|
|
|
assert(startTime);
|
|
|
|
|
assert(stopTime);
|
2015-05-28 19:06:17 +02:00
|
|
|
|
txn.parameterized
|
2015-06-09 14:57:49 +02:00
|
|
|
|
("update BuildSteps set busy = 0, status = $1, propagatedFrom = $4, errorMsg = $5, startTime = $6, stopTime = $7, machine = $8 where build = $2 and stepnr = $3")
|
2015-05-28 19:06:17 +02:00
|
|
|
|
((int) status)(buildId)(stepNr)
|
|
|
|
|
(propagatedFrom, propagatedFrom != 0)
|
|
|
|
|
(errorMsg, errorMsg != "")
|
2015-06-09 14:57:49 +02:00
|
|
|
|
(startTime)(stopTime)
|
|
|
|
|
(machine, machine != "").exec();
|
2015-05-28 17:39:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-18 16:30:28 +02:00
|
|
|
|
/* Get the steps and unfinished builds that depend on the given step. */
|
|
|
|
|
void getDependents(Step::ptr step, std::set<Build::ptr> & builds, std::set<Step::ptr> & steps)
|
2015-05-28 17:39:29 +02:00
|
|
|
|
{
|
|
|
|
|
std::function<void(Step::ptr)> visit;
|
|
|
|
|
|
|
|
|
|
visit = [&](Step::ptr step) {
|
2015-06-18 16:30:28 +02:00
|
|
|
|
if (has(steps, step)) return;
|
|
|
|
|
steps.insert(step);
|
2015-05-28 17:39:29 +02:00
|
|
|
|
|
2015-05-29 17:14:20 +02:00
|
|
|
|
std::vector<Step::wptr> rdeps;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
auto step_(step->state.lock());
|
|
|
|
|
|
|
|
|
|
for (auto & build : step_->builds) {
|
|
|
|
|
auto build_ = build.lock();
|
2015-06-18 16:30:28 +02:00
|
|
|
|
if (build_ && !build_->finishedInDB) builds.insert(build_);
|
2015-05-29 17:14:20 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Make a copy of rdeps so that we don't hold the lock for
|
|
|
|
|
very long. */
|
|
|
|
|
rdeps = step_->rdeps;
|
2015-05-28 17:39:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
2015-05-29 17:14:20 +02:00
|
|
|
|
for (auto & rdep : rdeps) {
|
|
|
|
|
auto rdep_ = rdep.lock();
|
|
|
|
|
if (rdep_) visit(rdep_);
|
2015-05-28 17:39:29 +02:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
visit(step);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-08-10 14:50:22 +02:00
|
|
|
|
void visitDependencies(std::function<void(Step::ptr)> visitor, Step::ptr start)
|
|
|
|
|
{
|
|
|
|
|
std::set<Step::ptr> queued;
|
|
|
|
|
std::queue<Step::ptr> todo;
|
|
|
|
|
todo.push(start);
|
|
|
|
|
|
|
|
|
|
while (!todo.empty()) {
|
|
|
|
|
auto step = todo.front();
|
|
|
|
|
todo.pop();
|
|
|
|
|
|
|
|
|
|
visitor(step);
|
|
|
|
|
|
|
|
|
|
auto state(step->state.lock());
|
|
|
|
|
for (auto & dep : state->deps)
|
|
|
|
|
if (queued.find(dep) == queued.end()) {
|
|
|
|
|
queued.insert(dep);
|
|
|
|
|
todo.push(dep);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-05-28 17:39:29 +02:00
|
|
|
|
void State::markSucceededBuild(pqxx::work & txn, Build::ptr build,
|
2015-07-21 01:45:00 +02:00
|
|
|
|
const BuildOutput & res, bool isCachedBuild, time_t startTime, time_t stopTime)
|
2015-05-28 17:39:29 +02:00
|
|
|
|
{
|
2015-06-15 16:54:52 +02:00
|
|
|
|
printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2015-06-18 17:12:51 +02:00
|
|
|
|
if (build->finishedInDB) return;
|
2015-06-18 16:30:28 +02:00
|
|
|
|
|
2015-08-07 04:18:48 +02:00
|
|
|
|
if (txn.parameterized("select 1 from Builds where id = $1 and finished = 0")(build->id).exec().empty()) return;
|
|
|
|
|
|
2015-05-28 19:06:17 +02:00
|
|
|
|
txn.parameterized
|
2015-08-07 04:20:34 +02:00
|
|
|
|
("update Builds set finished = 1, busy = 0, buildStatus = $2, startTime = $3, stopTime = $4, size = $5, closureSize = $6, releaseName = $7, isCachedBuild = $8 where id = $1")
|
2015-05-28 17:39:29 +02:00
|
|
|
|
(build->id)
|
2015-06-17 17:11:42 +02:00
|
|
|
|
((int) (res.failed ? bsFailedWithOutput : bsSuccess))
|
2015-05-28 17:39:29 +02:00
|
|
|
|
(startTime)
|
|
|
|
|
(stopTime)
|
|
|
|
|
(res.size)
|
2015-05-28 19:06:17 +02:00
|
|
|
|
(res.closureSize)
|
|
|
|
|
(res.releaseName, res.releaseName != "")
|
|
|
|
|
(isCachedBuild ? 1 : 0).exec();
|
2015-05-28 17:39:29 +02:00
|
|
|
|
|
2015-08-07 04:18:48 +02:00
|
|
|
|
txn.parameterized("delete from BuildProducts where build = $1")(build->id).exec();
|
|
|
|
|
|
2015-05-28 17:39:29 +02:00
|
|
|
|
unsigned int productNr = 1;
|
|
|
|
|
for (auto & product : res.products) {
|
2015-05-28 19:06:17 +02:00
|
|
|
|
txn.parameterized
|
2015-05-28 17:39:29 +02:00
|
|
|
|
("insert into BuildProducts (build, productnr, type, subtype, fileSize, sha1hash, sha256hash, path, name, defaultPath) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)")
|
|
|
|
|
(build->id)
|
|
|
|
|
(productNr++)
|
|
|
|
|
(product.type)
|
2015-05-28 19:06:17 +02:00
|
|
|
|
(product.subtype)
|
|
|
|
|
(product.fileSize, product.isRegular)
|
|
|
|
|
(printHash(product.sha1hash), product.isRegular)
|
|
|
|
|
(printHash(product.sha256hash), product.isRegular)
|
2015-05-28 17:39:29 +02:00
|
|
|
|
(product.path)
|
|
|
|
|
(product.name)
|
|
|
|
|
(product.defaultPath).exec();
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-07 04:18:48 +02:00
|
|
|
|
txn.parameterized("delete from BuildMetrics where build = $1")(build->id).exec();
|
|
|
|
|
|
2015-07-31 00:57:30 +02:00
|
|
|
|
for (auto & metric : res.metrics) {
|
|
|
|
|
txn.parameterized
|
|
|
|
|
("insert into BuildMetrics (build, name, unit, value, project, jobset, job, timestamp) values ($1, $2, $3, $4, $5, $6, $7, $8)")
|
|
|
|
|
(build->id)
|
|
|
|
|
(metric.second.name)
|
|
|
|
|
(metric.second.unit, metric.second.unit != "")
|
|
|
|
|
(metric.second.value)
|
|
|
|
|
(build->projectName)
|
|
|
|
|
(build->jobsetName)
|
|
|
|
|
(build->jobName)
|
|
|
|
|
(build->timestamp).exec();
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-17 22:38:12 +02:00
|
|
|
|
nrBuildsDone++;
|
2015-05-28 17:39:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-15 15:31:42 +02:00
|
|
|
|
bool State::checkCachedFailure(Step::ptr step, Connection & conn)
|
|
|
|
|
{
|
|
|
|
|
pqxx::work txn(conn);
|
|
|
|
|
for (auto & path : outputPaths(step->drv))
|
|
|
|
|
if (!txn.parameterized("select 1 from FailedPaths where path = $1")(path).exec().empty())
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-19 14:51:59 +02:00
|
|
|
|
void State::logCompressor()
|
|
|
|
|
{
|
|
|
|
|
while (true) {
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
|
|
Path logPath;
|
|
|
|
|
{
|
|
|
|
|
auto logCompressorQueue_(logCompressorQueue.lock());
|
|
|
|
|
while (logCompressorQueue_->empty())
|
|
|
|
|
logCompressorQueue_.wait(logCompressorWakeup);
|
|
|
|
|
logPath = logCompressorQueue_->front();
|
|
|
|
|
logCompressorQueue_->pop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!pathExists(logPath)) continue;
|
|
|
|
|
|
|
|
|
|
printMsg(lvlChatty, format("compressing log file ‘%1%’") % logPath);
|
|
|
|
|
|
|
|
|
|
Path tmpPath = logPath + ".bz2.tmp";
|
|
|
|
|
|
|
|
|
|
AutoCloseFD fd = open(tmpPath.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
|
|
|
|
|
|
|
|
|
|
// FIXME: use libbz2
|
|
|
|
|
|
|
|
|
|
Pid pid = startProcess([&]() {
|
|
|
|
|
if (dup2(fd, STDOUT_FILENO) == -1)
|
|
|
|
|
throw SysError("cannot dup output pipe to stdout");
|
|
|
|
|
execlp("bzip2", "bzip2", "-c", logPath.c_str(), nullptr);
|
2015-06-23 00:14:49 +02:00
|
|
|
|
throw SysError("cannot start bzip2");
|
2015-06-19 14:51:59 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
int res = pid.wait(true);
|
|
|
|
|
|
|
|
|
|
if (res != 0)
|
|
|
|
|
throw Error(format("bzip2 returned exit code %1% while compressing ‘%2%’")
|
|
|
|
|
% res % logPath);
|
|
|
|
|
|
|
|
|
|
if (rename(tmpPath.c_str(), (logPath + ".bz2").c_str()) != 0)
|
|
|
|
|
throw SysError(format("renaming ‘%1%’") % tmpPath);
|
|
|
|
|
|
|
|
|
|
if (unlink(logPath.c_str()) != 0)
|
|
|
|
|
throw SysError(format("unlinking ‘%1%’") % logPath);
|
|
|
|
|
|
|
|
|
|
} catch (std::exception & e) {
|
|
|
|
|
printMsg(lvlError, format("log compressor: %1%") % e.what());
|
|
|
|
|
sleep(5);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-23 00:14:49 +02:00
|
|
|
|
void State::notificationSender()
|
|
|
|
|
{
|
|
|
|
|
while (true) {
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
|
|
NotificationItem item;
|
|
|
|
|
{
|
|
|
|
|
auto notificationSenderQueue_(notificationSenderQueue.lock());
|
|
|
|
|
while (notificationSenderQueue_->empty())
|
|
|
|
|
notificationSenderQueue_.wait(notificationSenderWakeup);
|
|
|
|
|
item = notificationSenderQueue_->front();
|
|
|
|
|
notificationSenderQueue_->pop();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
printMsg(lvlChatty, format("sending notification about build %1%") % item.first);
|
|
|
|
|
|
|
|
|
|
Pid pid = startProcess([&]() {
|
|
|
|
|
Strings argv({"hydra-notify", "build", int2String(item.first)});
|
|
|
|
|
for (auto id : item.second)
|
|
|
|
|
argv.push_back(int2String(id));
|
|
|
|
|
execvp("hydra-notify", (char * *) stringsToCharPtrs(argv).data()); // FIXME: remove cast
|
|
|
|
|
throw SysError("cannot start hydra-notify");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
int res = pid.wait(true);
|
|
|
|
|
|
|
|
|
|
if (res != 0)
|
|
|
|
|
throw Error(format("hydra-build returned exit code %1% notifying about build %2%")
|
|
|
|
|
% res % item.first);
|
|
|
|
|
|
|
|
|
|
} catch (std::exception & e) {
|
|
|
|
|
printMsg(lvlError, format("notification sender: %1%") % e.what());
|
|
|
|
|
sleep(5);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-22 14:24:03 +02:00
|
|
|
|
std::shared_ptr<PathLocks> State::acquireGlobalLock()
|
|
|
|
|
{
|
2015-07-02 01:01:44 +02:00
|
|
|
|
Path lockPath = hydraData + "/queue-runner/lock";
|
|
|
|
|
|
|
|
|
|
createDirs(dirOf(lockPath));
|
2015-06-22 14:24:03 +02:00
|
|
|
|
|
|
|
|
|
auto lock = std::make_shared<PathLocks>();
|
|
|
|
|
if (!lock->lockPaths(PathSet({lockPath}), "", false)) return 0;
|
|
|
|
|
|
|
|
|
|
return lock;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-22 14:15:43 +02:00
|
|
|
|
void State::dumpStatus(Connection & conn, bool log)
|
2015-06-15 18:20:14 +02:00
|
|
|
|
{
|
2015-06-22 14:06:44 +02:00
|
|
|
|
std::ostringstream out;
|
|
|
|
|
|
2015-06-15 18:20:14 +02:00
|
|
|
|
{
|
2015-06-22 14:06:44 +02:00
|
|
|
|
JSONObject root(out);
|
2015-06-22 17:11:17 +02:00
|
|
|
|
time_t now = time(0);
|
2015-06-22 14:06:44 +02:00
|
|
|
|
root.attr("status", "up");
|
|
|
|
|
root.attr("time", time(0));
|
2015-06-22 17:11:17 +02:00
|
|
|
|
root.attr("uptime", now - startedAt);
|
2015-06-22 14:06:44 +02:00
|
|
|
|
root.attr("pid", getpid());
|
|
|
|
|
{
|
|
|
|
|
auto builds_(builds.lock());
|
|
|
|
|
root.attr("nrQueuedBuilds", builds_->size());
|
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
auto steps_(steps.lock());
|
|
|
|
|
for (auto i = steps_->begin(); i != steps_->end(); )
|
|
|
|
|
if (i->second.lock()) ++i; else i = steps_->erase(i);
|
|
|
|
|
root.attr("nrUnfinishedSteps", steps_->size());
|
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
auto runnable_(runnable.lock());
|
|
|
|
|
for (auto i = runnable_->begin(); i != runnable_->end(); )
|
|
|
|
|
if (i->lock()) ++i; else i = runnable_->erase(i);
|
|
|
|
|
root.attr("nrRunnableSteps", runnable_->size());
|
|
|
|
|
}
|
|
|
|
|
root.attr("nrActiveSteps", nrActiveSteps);
|
|
|
|
|
root.attr("nrStepsBuilding", nrStepsBuilding);
|
2015-06-24 13:19:16 +02:00
|
|
|
|
root.attr("nrStepsCopyingTo", nrStepsCopyingTo);
|
|
|
|
|
root.attr("nrStepsCopyingFrom", nrStepsCopyingFrom);
|
2015-07-10 19:10:14 +02:00
|
|
|
|
root.attr("nrStepsWaiting", nrStepsWaiting);
|
2015-06-25 16:59:41 +02:00
|
|
|
|
root.attr("bytesSent"); out << bytesSent;
|
|
|
|
|
root.attr("bytesReceived"); out << bytesReceived;
|
2015-06-22 14:06:44 +02:00
|
|
|
|
root.attr("nrBuildsRead", nrBuildsRead);
|
|
|
|
|
root.attr("nrBuildsDone", nrBuildsDone);
|
|
|
|
|
root.attr("nrStepsDone", nrStepsDone);
|
|
|
|
|
root.attr("nrRetries", nrRetries);
|
|
|
|
|
root.attr("maxNrRetries", maxNrRetries);
|
2015-06-22 15:34:33 +02:00
|
|
|
|
if (nrStepsDone) {
|
2015-06-22 17:11:17 +02:00
|
|
|
|
root.attr("totalStepTime", totalStepTime);
|
|
|
|
|
root.attr("totalStepBuildTime", totalStepBuildTime);
|
2015-06-22 15:34:33 +02:00
|
|
|
|
root.attr("avgStepTime"); out << (float) totalStepTime / nrStepsDone;
|
|
|
|
|
root.attr("avgStepBuildTime"); out << (float) totalStepBuildTime / nrStepsDone;
|
|
|
|
|
}
|
2015-06-22 14:06:44 +02:00
|
|
|
|
root.attr("nrQueueWakeups", nrQueueWakeups);
|
|
|
|
|
root.attr("nrDispatcherWakeups", nrDispatcherWakeups);
|
|
|
|
|
root.attr("nrDbConnections", dbPool.count());
|
|
|
|
|
{
|
|
|
|
|
root.attr("machines");
|
|
|
|
|
JSONObject nested(out);
|
|
|
|
|
auto machines_(machines.lock());
|
2015-06-25 12:24:11 +02:00
|
|
|
|
for (auto & i : *machines_) {
|
|
|
|
|
auto & m(i.second);
|
|
|
|
|
auto & s(m->state);
|
2015-06-22 14:06:44 +02:00
|
|
|
|
nested.attr(m->sshName);
|
|
|
|
|
JSONObject nested2(out);
|
2015-06-25 12:24:11 +02:00
|
|
|
|
nested2.attr("currentJobs", s->currentJobs);
|
2015-08-17 13:50:41 +02:00
|
|
|
|
if (s->currentJobs == 0)
|
|
|
|
|
nested2.attr("idleSince", s->idleSince);
|
2015-06-25 12:24:11 +02:00
|
|
|
|
nested2.attr("nrStepsDone", s->nrStepsDone);
|
|
|
|
|
if (m->state->nrStepsDone) {
|
|
|
|
|
nested2.attr("totalStepTime", s->totalStepTime);
|
|
|
|
|
nested2.attr("totalStepBuildTime", s->totalStepBuildTime);
|
|
|
|
|
nested2.attr("avgStepTime"); out << (float) s->totalStepTime / s->nrStepsDone;
|
|
|
|
|
nested2.attr("avgStepBuildTime"); out << (float) s->totalStepBuildTime / s->nrStepsDone;
|
2015-06-22 17:11:17 +02:00
|
|
|
|
}
|
2015-06-22 14:06:44 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2015-08-11 01:30:24 +02:00
|
|
|
|
{
|
|
|
|
|
root.attr("jobsets");
|
|
|
|
|
JSONObject nested(out);
|
|
|
|
|
auto jobsets_(jobsets.lock());
|
|
|
|
|
for (auto & jobset : *jobsets_) {
|
|
|
|
|
nested.attr(jobset.first.first + ":" + jobset.first.second);
|
|
|
|
|
JSONObject nested2(out);
|
|
|
|
|
nested2.attr("shareUsed"); out << jobset.second->shareUsed();
|
|
|
|
|
nested2.attr("seconds", jobset.second->getSeconds());
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-08-17 13:50:41 +02:00
|
|
|
|
{
|
|
|
|
|
root.attr("machineTypes");
|
|
|
|
|
JSONObject nested(out);
|
|
|
|
|
auto machineTypes_(machineTypes.lock());
|
|
|
|
|
for (auto & i : *machineTypes_) {
|
|
|
|
|
nested.attr(i.first);
|
|
|
|
|
JSONObject nested2(out);
|
|
|
|
|
nested2.attr("runnable", i.second.runnable);
|
|
|
|
|
nested2.attr("running", i.second.running);
|
2015-08-17 15:45:44 +02:00
|
|
|
|
if (i.second.runnable > 0)
|
|
|
|
|
nested2.attr("waitTime", i.second.waitTime.count() +
|
|
|
|
|
i.second.runnable * (time(0) - lastDispatcherCheck));
|
2015-08-17 13:50:41 +02:00
|
|
|
|
if (i.second.running == 0)
|
2015-08-17 15:45:44 +02:00
|
|
|
|
nested2.attr("lastActive", std::chrono::system_clock::to_time_t(i.second.lastActive));
|
2015-08-17 13:50:41 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2015-06-15 18:20:14 +02:00
|
|
|
|
}
|
2015-06-22 14:06:44 +02:00
|
|
|
|
|
2015-06-22 14:15:43 +02:00
|
|
|
|
if (log) printMsg(lvlInfo, format("status: %1%") % out.str());
|
|
|
|
|
|
2015-06-15 18:20:14 +02:00
|
|
|
|
{
|
2015-06-22 14:06:44 +02:00
|
|
|
|
pqxx::work txn(conn);
|
|
|
|
|
// FIXME: use PostgreSQL 9.5 upsert.
|
|
|
|
|
txn.exec("delete from SystemStatus where what = 'queue-runner'");
|
|
|
|
|
txn.parameterized("insert into SystemStatus values ('queue-runner', $1)")(out.str()).exec();
|
|
|
|
|
txn.exec("notify status_dumped");
|
|
|
|
|
txn.commit();
|
2015-06-15 18:20:14 +02:00
|
|
|
|
}
|
2015-06-22 14:06:44 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void State::showStatus()
|
|
|
|
|
{
|
|
|
|
|
auto conn(dbPool.get());
|
|
|
|
|
receiver statusDumped(*conn, "status_dumped");
|
|
|
|
|
|
|
|
|
|
string status;
|
|
|
|
|
bool barf = false;
|
|
|
|
|
|
|
|
|
|
/* Get the last JSON status dump from the database. */
|
2015-06-15 18:20:14 +02:00
|
|
|
|
{
|
2015-06-22 14:06:44 +02:00
|
|
|
|
pqxx::work txn(*conn);
|
|
|
|
|
auto res = txn.exec("select status from SystemStatus where what = 'queue-runner'");
|
|
|
|
|
if (res.size()) status = res[0][0].as<string>();
|
2015-06-15 18:20:14 +02:00
|
|
|
|
}
|
2015-06-22 14:06:44 +02:00
|
|
|
|
|
|
|
|
|
if (status != "") {
|
|
|
|
|
|
|
|
|
|
/* If the status is not empty, then the queue runner is
|
|
|
|
|
running. Ask it to update the status dump. */
|
|
|
|
|
{
|
|
|
|
|
pqxx::work txn(*conn);
|
|
|
|
|
txn.exec("notify dump_status");
|
|
|
|
|
txn.commit();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Wait until it has done so. */
|
|
|
|
|
barf = conn->await_notification(5, 0) == 0;
|
|
|
|
|
|
|
|
|
|
/* Get the new status. */
|
|
|
|
|
{
|
|
|
|
|
pqxx::work txn(*conn);
|
|
|
|
|
auto res = txn.exec("select status from SystemStatus where what = 'queue-runner'");
|
|
|
|
|
if (res.size()) status = res[0][0].as<string>();
|
2015-06-15 18:20:14 +02:00
|
|
|
|
}
|
2015-06-22 14:06:44 +02:00
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (status == "") status = R"({"status":"down"})";
|
|
|
|
|
|
|
|
|
|
std::cout << status << "\n";
|
|
|
|
|
|
|
|
|
|
if (barf)
|
|
|
|
|
throw Error("queue runner did not respond; status information may be wrong");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void State::unlock()
|
|
|
|
|
{
|
2015-06-22 14:24:03 +02:00
|
|
|
|
auto lock = acquireGlobalLock();
|
|
|
|
|
if (!lock)
|
|
|
|
|
throw Error("hydra-queue-runner is currently running");
|
|
|
|
|
|
2015-06-22 14:06:44 +02:00
|
|
|
|
auto conn(dbPool.get());
|
|
|
|
|
|
|
|
|
|
clearBusy(*conn, 0);
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
pqxx::work txn(*conn);
|
|
|
|
|
txn.exec("delete from SystemStatus where what = 'queue-runner'");
|
|
|
|
|
txn.commit();
|
2015-06-15 18:20:14 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-25 15:29:22 +02:00
|
|
|
|
void State::run(BuildID buildOne)
|
2015-05-29 01:31:12 +02:00
|
|
|
|
{
|
2015-06-22 17:11:17 +02:00
|
|
|
|
startedAt = time(0);
|
2015-06-25 15:29:22 +02:00
|
|
|
|
this->buildOne = buildOne;
|
2015-06-22 17:11:17 +02:00
|
|
|
|
|
2015-06-22 14:24:03 +02:00
|
|
|
|
auto lock = acquireGlobalLock();
|
|
|
|
|
if (!lock)
|
|
|
|
|
throw Error("hydra-queue-runner is already running");
|
|
|
|
|
|
2015-06-22 14:06:44 +02:00
|
|
|
|
{
|
|
|
|
|
auto conn(dbPool.get());
|
|
|
|
|
clearBusy(*conn, 0);
|
2015-06-22 14:15:43 +02:00
|
|
|
|
dumpStatus(*conn, false);
|
2015-06-22 14:06:44 +02:00
|
|
|
|
}
|
2015-05-29 01:31:12 +02:00
|
|
|
|
|
2015-06-25 12:24:11 +02:00
|
|
|
|
std::thread(&State::monitorMachinesFile, this).detach();
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2015-06-23 00:14:49 +02:00
|
|
|
|
std::thread(&State::queueMonitor, this).detach();
|
2015-05-29 01:31:12 +02:00
|
|
|
|
|
2015-06-10 15:55:46 +02:00
|
|
|
|
std::thread(&State::dispatcher, this).detach();
|
2015-05-29 01:31:12 +02:00
|
|
|
|
|
2015-06-19 14:51:59 +02:00
|
|
|
|
/* Run a log compressor thread. If needed, we could start more
|
|
|
|
|
than one. */
|
|
|
|
|
std::thread(&State::logCompressor, this).detach();
|
|
|
|
|
|
2015-06-23 00:14:49 +02:00
|
|
|
|
/* Idem for notification sending. */
|
|
|
|
|
std::thread(&State::notificationSender, this).detach();
|
|
|
|
|
|
|
|
|
|
/* Monitor the database for status dump requests (e.g. from
|
|
|
|
|
‘hydra-queue-runner --status’). */
|
2015-06-18 01:57:01 +02:00
|
|
|
|
while (true) {
|
|
|
|
|
try {
|
|
|
|
|
auto conn(dbPool.get());
|
|
|
|
|
receiver dumpStatus(*conn, "dump_status");
|
|
|
|
|
while (true) {
|
2015-06-22 14:15:43 +02:00
|
|
|
|
bool timeout = conn->await_notification(300, 0) == 0;
|
|
|
|
|
State::dumpStatus(*conn, timeout);
|
2015-06-18 01:57:01 +02:00
|
|
|
|
}
|
|
|
|
|
} catch (std::exception & e) {
|
|
|
|
|
printMsg(lvlError, format("main thread: %1%") % e.what());
|
|
|
|
|
sleep(10); // probably a DB problem, so don't retry right away
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-05-29 01:31:12 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-05-28 17:39:29 +02:00
|
|
|
|
int main(int argc, char * * argv)
|
|
|
|
|
{
|
|
|
|
|
return handleExceptions(argv[0], [&]() {
|
|
|
|
|
initNix();
|
|
|
|
|
|
2015-06-10 15:55:46 +02:00
|
|
|
|
signal(SIGINT, SIG_DFL);
|
|
|
|
|
signal(SIGTERM, SIG_DFL);
|
|
|
|
|
signal(SIGHUP, SIG_DFL);
|
2015-05-29 01:31:12 +02:00
|
|
|
|
|
2015-06-17 21:35:20 +02:00
|
|
|
|
bool unlock = false;
|
2015-06-22 14:06:44 +02:00
|
|
|
|
bool status = false;
|
2015-06-25 15:29:22 +02:00
|
|
|
|
BuildID buildOne = 0;
|
2015-06-17 21:35:20 +02:00
|
|
|
|
|
|
|
|
|
parseCmdLine(argc, argv, [&](Strings::iterator & arg, const Strings::iterator & end) {
|
|
|
|
|
if (*arg == "--unlock")
|
|
|
|
|
unlock = true;
|
2015-06-22 14:06:44 +02:00
|
|
|
|
else if (*arg == "--status")
|
|
|
|
|
status = true;
|
2015-06-25 15:29:22 +02:00
|
|
|
|
else if (*arg == "--build-one") {
|
|
|
|
|
if (!string2Int<BuildID>(getArg(*arg, arg, end), buildOne))
|
|
|
|
|
throw Error("‘--build-one’ requires a build ID");
|
|
|
|
|
} else
|
2015-06-17 21:35:20 +02:00
|
|
|
|
return false;
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
|
2015-05-28 17:39:29 +02:00
|
|
|
|
settings.buildVerbosity = lvlVomit;
|
|
|
|
|
settings.useSubstitutes = false;
|
2015-06-17 11:48:38 +02:00
|
|
|
|
settings.lockCPU = false;
|
2015-05-28 17:39:29 +02:00
|
|
|
|
|
|
|
|
|
State state;
|
2015-06-22 14:06:44 +02:00
|
|
|
|
if (status)
|
|
|
|
|
state.showStatus();
|
|
|
|
|
else if (unlock)
|
|
|
|
|
state.unlock();
|
2015-06-17 21:35:20 +02:00
|
|
|
|
else
|
2015-06-25 15:29:22 +02:00
|
|
|
|
state.run(buildOne);
|
2015-05-28 17:39:29 +02:00
|
|
|
|
});
|
|
|
|
|
}
|