2015-06-09 14:21:21 +02:00
|
|
|
|
#include <algorithm>
|
2016-09-30 17:05:07 +02:00
|
|
|
|
#include <cmath>
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
#include <sys/stat.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
|
2022-03-09 23:50:30 +01:00
|
|
|
|
#include "build-result.hh"
|
2015-06-09 14:21:21 +02:00
|
|
|
|
#include "serve-protocol.hh"
|
2015-07-07 10:25:33 +02:00
|
|
|
|
#include "state.hh"
|
|
|
|
|
#include "util.hh"
|
2015-06-09 14:21:21 +02:00
|
|
|
|
#include "worker-protocol.hh"
|
2016-02-26 16:16:36 +01:00
|
|
|
|
#include "finally.hh"
|
2022-01-07 17:06:56 +01:00
|
|
|
|
#include "url.hh"
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
using namespace nix;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct Child
|
|
|
|
|
{
|
|
|
|
|
Pid pid;
|
|
|
|
|
AutoCloseFD to, from;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2015-06-25 15:29:22 +02:00
|
|
|
|
static void append(Strings & dst, const Strings & src)
|
|
|
|
|
{
|
|
|
|
|
dst.insert(dst.end(), src.begin(), src.end());
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-07 17:06:56 +01:00
|
|
|
|
static Strings extraStoreArgs(std::string & machine)
|
|
|
|
|
{
|
|
|
|
|
Strings result;
|
|
|
|
|
try {
|
|
|
|
|
auto parsed = parseURL(machine);
|
|
|
|
|
if (parsed.scheme != "ssh") {
|
|
|
|
|
throw SysError("Currently, only (legacy-)ssh stores are supported!");
|
|
|
|
|
}
|
|
|
|
|
machine = parsed.authority.value_or("");
|
|
|
|
|
auto remoteStore = parsed.query.find("remote-store");
|
|
|
|
|
if (remoteStore != parsed.query.end()) {
|
|
|
|
|
result = {"--store", shellEscape(remoteStore->second)};
|
|
|
|
|
}
|
|
|
|
|
} catch (BadURL &) {
|
|
|
|
|
// We just try to continue with `machine->sshName` here for backwards compat.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2015-06-25 15:29:22 +02:00
|
|
|
|
|
2015-08-26 13:43:02 +02:00
|
|
|
|
static void openConnection(Machine::ptr machine, Path tmpDir, int stderrFD, Child & child)
|
2015-06-09 14:21:21 +02:00
|
|
|
|
{
|
2022-03-09 23:50:30 +01:00
|
|
|
|
std::string pgmName;
|
2015-06-09 14:21:21 +02:00
|
|
|
|
Pipe to, from;
|
|
|
|
|
to.create();
|
|
|
|
|
from.create();
|
|
|
|
|
|
2022-03-30 22:39:48 +02:00
|
|
|
|
Strings argv;
|
|
|
|
|
if (machine->isLocalhost()) {
|
|
|
|
|
pgmName = "nix-store";
|
|
|
|
|
argv = {"nix-store", "--builders", "", "--serve", "--write"};
|
|
|
|
|
} else {
|
|
|
|
|
pgmName = "ssh";
|
|
|
|
|
auto sshName = machine->sshName;
|
|
|
|
|
Strings extraArgs = extraStoreArgs(sshName);
|
|
|
|
|
argv = {"ssh", sshName};
|
|
|
|
|
if (machine->sshKey != "") append(argv, {"-i", machine->sshKey});
|
|
|
|
|
if (machine->sshPublicHostKey != "") {
|
|
|
|
|
Path fileName = tmpDir + "/host-key";
|
|
|
|
|
auto p = machine->sshName.find("@");
|
|
|
|
|
std::string host = p != std::string::npos ? std::string(machine->sshName, p + 1) : machine->sshName;
|
|
|
|
|
writeFile(fileName, host + " " + machine->sshPublicHostKey + "\n");
|
|
|
|
|
append(argv, {"-oUserKnownHostsFile=" + fileName});
|
|
|
|
|
}
|
|
|
|
|
append(argv,
|
|
|
|
|
{ "-x", "-a", "-oBatchMode=yes", "-oConnectTimeout=60", "-oTCPKeepAlive=yes"
|
|
|
|
|
, "--", "nix-store", "--serve", "--write" });
|
|
|
|
|
append(argv, extraArgs);
|
|
|
|
|
}
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2022-03-30 22:39:48 +02:00
|
|
|
|
child.pid = startProcess([&]() {
|
2021-04-08 14:12:26 +02:00
|
|
|
|
restoreProcessContext();
|
2017-04-05 11:01:57 +02:00
|
|
|
|
|
2016-10-06 15:24:09 +02:00
|
|
|
|
if (dup2(to.readSide.get(), STDIN_FILENO) == -1)
|
2015-06-09 14:21:21 +02:00
|
|
|
|
throw SysError("cannot dup input pipe to stdin");
|
|
|
|
|
|
2016-10-06 15:24:09 +02:00
|
|
|
|
if (dup2(from.writeSide.get(), STDOUT_FILENO) == -1)
|
2015-06-09 14:21:21 +02:00
|
|
|
|
throw SysError("cannot dup output pipe to stdout");
|
|
|
|
|
|
|
|
|
|
if (dup2(stderrFD, STDERR_FILENO) == -1)
|
|
|
|
|
throw SysError("cannot dup stderr");
|
|
|
|
|
|
2015-06-25 15:29:22 +02:00
|
|
|
|
execvp(argv.front().c_str(), (char * *) stringsToCharPtrs(argv).data()); // FIXME: remove cast
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2019-01-22 17:27:36 +01:00
|
|
|
|
throw SysError("cannot start %s", pgmName);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
});
|
|
|
|
|
|
2016-10-06 15:24:09 +02:00
|
|
|
|
to.readSide = -1;
|
|
|
|
|
from.writeSide = -1;
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2016-10-06 15:24:09 +02:00
|
|
|
|
child.to = to.writeSide.release();
|
|
|
|
|
child.from = from.readSide.release();
|
2015-06-09 14:21:21 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2022-02-20 17:18:52 +00:00
|
|
|
|
static void copyClosureTo(std::timed_mutex & sendMutex, Store & destStore,
|
2019-12-30 22:49:26 +01:00
|
|
|
|
FdSource & from, FdSink & to, const StorePathSet & paths,
|
2015-06-09 14:21:21 +02:00
|
|
|
|
bool useSubstitutes = false)
|
|
|
|
|
{
|
2019-12-30 22:49:26 +01:00
|
|
|
|
StorePathSet closure;
|
2022-02-20 17:18:52 +00:00
|
|
|
|
destStore.computeFSClosure(paths, closure);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
/* Send the "query valid paths" command with the "lock" option
|
|
|
|
|
enabled. This prevents a race where the remote host
|
|
|
|
|
garbage-collect paths that are already there. Optionally, ask
|
|
|
|
|
the remote host to substitute missing paths. */
|
2016-02-26 21:15:05 +01:00
|
|
|
|
// FIXME: substitute output pollutes our build log
|
2019-12-30 22:49:26 +01:00
|
|
|
|
to << cmdQueryValidPaths << 1 << useSubstitutes;
|
2022-02-20 17:18:52 +00:00
|
|
|
|
worker_proto::write(destStore, to, closure);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
to.flush();
|
|
|
|
|
|
|
|
|
|
/* Get back the set of paths that are already valid on the remote
|
|
|
|
|
host. */
|
2022-02-20 17:18:52 +00:00
|
|
|
|
auto present = worker_proto::read(destStore, from, Phantom<StorePathSet> {});
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2015-06-09 16:03:41 +02:00
|
|
|
|
if (present.size() == closure.size()) return;
|
|
|
|
|
|
2022-02-20 17:18:52 +00:00
|
|
|
|
auto sorted = destStore.topoSortPaths(closure);
|
2015-06-09 16:03:41 +02:00
|
|
|
|
|
2019-12-30 22:49:26 +01:00
|
|
|
|
StorePathSet missing;
|
2015-06-09 16:03:41 +02:00
|
|
|
|
for (auto i = sorted.rbegin(); i != sorted.rend(); ++i)
|
2020-06-23 13:43:54 +02:00
|
|
|
|
if (!present.count(*i)) missing.insert(*i);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2020-06-23 13:43:54 +02:00
|
|
|
|
printMsg(lvlDebug, "sending %d missing paths", missing.size());
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2017-09-01 16:28:49 +02:00
|
|
|
|
std::unique_lock<std::timed_mutex> sendLock(sendMutex,
|
|
|
|
|
std::chrono::seconds(600));
|
|
|
|
|
|
2015-07-21 01:45:00 +02:00
|
|
|
|
to << cmdImportPaths;
|
2022-02-20 17:18:52 +00:00
|
|
|
|
destStore.exportPaths(missing, to);
|
2015-06-09 16:03:41 +02:00
|
|
|
|
to.flush();
|
|
|
|
|
|
|
|
|
|
if (readInt(from) != 1)
|
|
|
|
|
throw Error("remote machine failed to import closure");
|
2015-06-09 14:21:21 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-07-27 18:11:04 +02:00
|
|
|
|
// FIXME: use Store::topoSortPaths().
|
2020-07-28 13:46:57 +02:00
|
|
|
|
StorePaths reverseTopoSortPaths(const std::map<StorePath, ValidPathInfo> & paths)
|
2020-07-27 18:11:04 +02:00
|
|
|
|
{
|
|
|
|
|
StorePaths sorted;
|
|
|
|
|
StorePathSet visited;
|
|
|
|
|
|
|
|
|
|
std::function<void(const StorePath & path)> dfsVisit;
|
|
|
|
|
|
|
|
|
|
dfsVisit = [&](const StorePath & path) {
|
|
|
|
|
if (!visited.insert(path).second) return;
|
|
|
|
|
|
|
|
|
|
auto info = paths.find(path);
|
|
|
|
|
auto references = info == paths.end() ? StorePathSet() : info->second.references;
|
|
|
|
|
|
|
|
|
|
for (auto & i : references)
|
|
|
|
|
/* Don't traverse into paths that don't exist. That can
|
|
|
|
|
happen due to substitutes for non-existent paths. */
|
|
|
|
|
if (i != path && paths.count(i))
|
|
|
|
|
dfsVisit(i);
|
|
|
|
|
|
|
|
|
|
sorted.push_back(path);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (auto & i : paths)
|
|
|
|
|
dfsVisit(i.first);
|
|
|
|
|
|
|
|
|
|
return sorted;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-15 21:10:29 +01:00
|
|
|
|
void State::buildRemote(ref<Store> destStore,
|
2015-07-07 10:25:33 +02:00
|
|
|
|
Machine::ptr machine, Step::ptr step,
|
2016-12-07 15:57:13 +01:00
|
|
|
|
unsigned int maxSilentTime, unsigned int buildTimeout, unsigned int repeats,
|
2017-12-07 15:35:31 +01:00
|
|
|
|
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
2020-07-27 20:38:59 +02:00
|
|
|
|
std::function<void(StepState)> updateStep,
|
|
|
|
|
NarMemberDatas & narMembers)
|
2015-06-09 14:21:21 +02:00
|
|
|
|
{
|
2016-10-26 15:09:16 +02:00
|
|
|
|
assert(BuildResult::TimedOut == 8);
|
|
|
|
|
|
2022-03-09 23:50:30 +01:00
|
|
|
|
std::string base(step->drvPath.to_string());
|
|
|
|
|
result.logFile = logDir + "/" + std::string(base, 0, 2) + "/" + std::string(base, 2);
|
2015-06-19 14:51:59 +02:00
|
|
|
|
AutoDelete autoDelete(result.logFile, false);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2015-06-19 14:51:59 +02:00
|
|
|
|
createDirs(dirOf(result.logFile));
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2016-10-06 15:24:09 +02:00
|
|
|
|
AutoCloseFD logFD = open(result.logFile.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0666);
|
2020-06-23 13:43:54 +02:00
|
|
|
|
if (!logFD) throw SysError("creating log file ‘%s’", result.logFile);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2015-08-26 13:43:02 +02:00
|
|
|
|
nix::Path tmpDir = createTempDir();
|
|
|
|
|
AutoDelete tmpDirDel(tmpDir, true);
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
try {
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssConnecting);
|
|
|
|
|
|
2020-07-27 18:11:04 +02:00
|
|
|
|
// FIXME: rewrite to use Store.
|
2016-03-22 16:54:40 +01:00
|
|
|
|
Child child;
|
2016-10-06 15:24:09 +02:00
|
|
|
|
openConnection(machine, tmpDir, logFD.get(), child);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2016-11-07 19:34:35 +01:00
|
|
|
|
{
|
|
|
|
|
auto activeStepState(activeStep->state_.lock());
|
|
|
|
|
if (activeStepState->cancelled) throw Error("step cancelled");
|
|
|
|
|
activeStepState->pid = child.pid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Finally clearPid([&]() {
|
|
|
|
|
auto activeStepState(activeStep->state_.lock());
|
|
|
|
|
activeStepState->pid = -1;
|
|
|
|
|
|
|
|
|
|
/* FIXME: there is a slight race here with step
|
|
|
|
|
cancellation in State::processQueueChange(), which
|
|
|
|
|
could call kill() on this pid after we've done waitpid()
|
|
|
|
|
on it. With pid wrap-around, there is a tiny
|
|
|
|
|
possibility that we end up killing another
|
|
|
|
|
process. Meh. */
|
|
|
|
|
});
|
|
|
|
|
|
2016-10-06 15:24:09 +02:00
|
|
|
|
FdSource from(child.from.get());
|
|
|
|
|
FdSink to(child.to.get());
|
2016-02-26 16:16:36 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
Finally updateStats([&]() {
|
|
|
|
|
bytesReceived += from.read;
|
|
|
|
|
bytesSent += to.written;
|
|
|
|
|
});
|
2015-10-06 17:35:08 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Handshake. */
|
|
|
|
|
unsigned int remoteVersion;
|
2015-06-17 11:45:20 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
try {
|
2020-07-27 18:11:04 +02:00
|
|
|
|
to << SERVE_MAGIC_1 << 0x204;
|
2016-03-22 16:54:40 +01:00
|
|
|
|
to.flush();
|
2015-07-31 03:39:20 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
unsigned int magic = readInt(from);
|
|
|
|
|
if (magic != SERVE_MAGIC_2)
|
2020-06-23 13:43:54 +02:00
|
|
|
|
throw Error("protocol mismatch with ‘nix-store --serve’ on ‘%1%’", machine->sshName);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
remoteVersion = readInt(from);
|
|
|
|
|
if (GET_PROTOCOL_MAJOR(remoteVersion) != 0x200)
|
2020-06-23 13:43:54 +02:00
|
|
|
|
throw Error("unsupported ‘nix-store --serve’ protocol version on ‘%1%’", machine->sshName);
|
2016-12-08 16:03:02 +01:00
|
|
|
|
if (GET_PROTOCOL_MINOR(remoteVersion) < 3 && repeats > 0)
|
|
|
|
|
throw Error("machine ‘%1%’ does not support repeating a build; please upgrade it to Nix 1.12", machine->sshName);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
|
|
|
|
|
} catch (EndOfFile & e) {
|
2017-02-03 14:39:18 +01:00
|
|
|
|
child.pid.wait();
|
2022-03-09 23:50:30 +01:00
|
|
|
|
std::string s = chomp(readFile(result.logFile));
|
2020-06-23 13:43:54 +02:00
|
|
|
|
throw Error("cannot connect to ‘%1%’: %2%", machine->sshName, s);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
}
|
2015-07-21 15:53:27 +02:00
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
auto info(machine->state->connectInfo.lock());
|
2016-03-22 16:54:40 +01:00
|
|
|
|
info->consecutiveFailures = 0;
|
2015-07-21 15:53:27 +02:00
|
|
|
|
}
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Gather the inputs. If the remote side is Nix <= 1.9, we have to
|
|
|
|
|
copy the entire closure of ‘drvPath’, as well as the required
|
|
|
|
|
outputs of the input derivations. On Nix > 1.9, we only need to
|
|
|
|
|
copy the immediate sources of the derivation and the required
|
|
|
|
|
outputs of the input derivations. */
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssSendingInputs);
|
|
|
|
|
|
2019-12-30 22:49:26 +01:00
|
|
|
|
StorePathSet inputs;
|
|
|
|
|
BasicDerivation basicDrv(*step->drv);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
|
2021-02-23 09:50:15 +01:00
|
|
|
|
for (auto & p : step->drv->inputSrcs)
|
|
|
|
|
inputs.insert(p);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
|
2019-12-30 22:49:26 +01:00
|
|
|
|
for (auto & input : step->drv->inputDrvs) {
|
2020-08-04 11:33:29 +02:00
|
|
|
|
auto drv2 = localStore->readDerivation(input.first);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
for (auto & name : input.second) {
|
2020-08-04 11:33:29 +02:00
|
|
|
|
if (auto i = get(drv2.outputs, name)) {
|
2020-09-26 23:37:39 +02:00
|
|
|
|
auto outPath = i->path(*localStore, drv2.name, name);
|
|
|
|
|
inputs.insert(*outPath);
|
|
|
|
|
basicDrv.inputSrcs.insert(*outPath);
|
2020-08-04 11:33:29 +02:00
|
|
|
|
}
|
2016-03-22 16:54:40 +01:00
|
|
|
|
}
|
2015-06-17 17:28:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Ensure that the inputs exist in the destination store. This is
|
|
|
|
|
a no-op for regular stores, but for the binary cache store,
|
|
|
|
|
this will copy the inputs to the binary cache from the local
|
|
|
|
|
store. */
|
2021-03-10 12:42:19 -05:00
|
|
|
|
if (localStore != std::shared_ptr<Store>(destStore)) {
|
2022-03-29 15:28:47 -04:00
|
|
|
|
copyClosure(*localStore, *destStore,
|
|
|
|
|
step->drv->inputSrcs,
|
|
|
|
|
NoRepair, NoCheckSigs, NoSubstitute);
|
2021-03-10 12:42:19 -05:00
|
|
|
|
}
|
2016-02-15 21:10:29 +01:00
|
|
|
|
|
2022-01-21 15:35:47 -05:00
|
|
|
|
{
|
2017-09-14 17:22:48 +02:00
|
|
|
|
auto mc1 = std::make_shared<MaintainCount<counter>>(nrStepsWaiting);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
mc1.reset();
|
2017-09-14 17:22:48 +02:00
|
|
|
|
MaintainCount<counter> mc2(nrStepsCopyingTo);
|
2022-01-21 15:35:47 -05:00
|
|
|
|
|
2019-12-30 22:49:26 +01:00
|
|
|
|
printMsg(lvlDebug, "sending closure of ‘%s’ to ‘%s’",
|
|
|
|
|
localStore->printStorePath(step->drvPath), machine->sshName);
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
auto now1 = std::chrono::steady_clock::now();
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2022-01-21 15:35:47 -05:00
|
|
|
|
/* Copy the input closure. */
|
|
|
|
|
if (machine->isLocalhost()) {
|
|
|
|
|
StorePathSet closure;
|
|
|
|
|
destStore->computeFSClosure(inputs, closure);
|
|
|
|
|
copyPaths(*destStore, *localStore, closure, NoRepair, NoCheckSigs, NoSubstitute);
|
|
|
|
|
} else {
|
2022-02-20 17:18:52 +00:00
|
|
|
|
copyClosureTo(machine->state->sendLock, *destStore, from, to, inputs, true);
|
2022-01-21 15:35:47 -05:00
|
|
|
|
}
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
auto now2 = std::chrono::steady_clock::now();
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
|
|
|
|
|
}
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
autoDelete.cancel();
|
2015-06-19 14:51:59 +02:00
|
|
|
|
|
2016-10-26 13:39:43 +02:00
|
|
|
|
/* Truncate the log to get rid of messages about substitutions
|
|
|
|
|
etc. on the remote system. */
|
|
|
|
|
if (lseek(logFD.get(), SEEK_SET, 0) != 0)
|
|
|
|
|
throw SysError("seeking to the start of log file ‘%s’", result.logFile);
|
|
|
|
|
|
|
|
|
|
if (ftruncate(logFD.get(), 0) == -1)
|
|
|
|
|
throw SysError("truncating log file ‘%s’", result.logFile);
|
|
|
|
|
|
|
|
|
|
logFD = -1;
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Do the build. */
|
2019-12-30 22:49:26 +01:00
|
|
|
|
printMsg(lvlDebug, "building ‘%s’ on ‘%s’",
|
|
|
|
|
localStore->printStorePath(step->drvPath),
|
|
|
|
|
machine->sshName);
|
2015-07-21 01:45:00 +02:00
|
|
|
|
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssBuilding);
|
|
|
|
|
|
2021-02-23 09:50:15 +01:00
|
|
|
|
to << cmdBuildDerivation << localStore->printStorePath(step->drvPath);
|
|
|
|
|
writeDerivation(to, *localStore, basicDrv);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
to << maxSilentTime << buildTimeout;
|
|
|
|
|
if (GET_PROTOCOL_MINOR(remoteVersion) >= 2)
|
2017-09-22 15:23:58 +02:00
|
|
|
|
to << maxLogSize;
|
2016-12-07 15:57:13 +01:00
|
|
|
|
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3) {
|
|
|
|
|
to << repeats // == build-repeat
|
|
|
|
|
<< step->isDeterministic; // == enforce-determinism
|
|
|
|
|
}
|
2016-03-22 16:54:40 +01:00
|
|
|
|
to.flush();
|
2015-07-21 01:45:00 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
result.startTime = time(0);
|
|
|
|
|
int res;
|
|
|
|
|
{
|
2017-09-14 17:22:48 +02:00
|
|
|
|
MaintainCount<counter> mc(nrStepsBuilding);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
res = readInt(from);
|
|
|
|
|
}
|
|
|
|
|
result.stopTime = time(0);
|
|
|
|
|
|
2021-02-23 09:50:15 +01:00
|
|
|
|
result.errorMsg = readString(from);
|
|
|
|
|
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3) {
|
|
|
|
|
result.timesBuilt = readInt(from);
|
|
|
|
|
result.isNonDeterministic = readInt(from);
|
|
|
|
|
auto start = readInt(from);
|
|
|
|
|
auto stop = readInt(from);
|
|
|
|
|
if (start && start) {
|
|
|
|
|
/* Note: this represents the duration of a single
|
|
|
|
|
round, rather than all rounds. */
|
|
|
|
|
result.startTime = start;
|
|
|
|
|
result.stopTime = stop;
|
2016-03-09 16:59:38 +01:00
|
|
|
|
}
|
2015-07-21 01:45:00 +02:00
|
|
|
|
}
|
2021-04-15 14:50:04 +02:00
|
|
|
|
if (GET_PROTOCOL_MINOR(remoteVersion) >= 6) {
|
|
|
|
|
worker_proto::read(*localStore, from, Phantom<DrvOutputs> {});
|
|
|
|
|
}
|
2021-02-23 09:50:15 +01:00
|
|
|
|
switch ((BuildResult::Status) res) {
|
|
|
|
|
case BuildResult::Built:
|
|
|
|
|
result.stepStatus = bsSuccess;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::Substituted:
|
|
|
|
|
case BuildResult::AlreadyValid:
|
|
|
|
|
result.stepStatus = bsSuccess;
|
|
|
|
|
result.isCached = true;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::PermanentFailure:
|
|
|
|
|
result.stepStatus = bsFailed;
|
|
|
|
|
result.canCache = true;
|
|
|
|
|
result.errorMsg = "";
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::InputRejected:
|
|
|
|
|
case BuildResult::OutputRejected:
|
|
|
|
|
result.stepStatus = bsFailed;
|
|
|
|
|
result.canCache = true;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::TransientFailure:
|
|
|
|
|
result.stepStatus = bsFailed;
|
|
|
|
|
result.canRetry = true;
|
|
|
|
|
result.errorMsg = "";
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::TimedOut:
|
|
|
|
|
result.stepStatus = bsTimedOut;
|
|
|
|
|
result.errorMsg = "";
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::MiscFailure:
|
|
|
|
|
result.stepStatus = bsAborted;
|
|
|
|
|
result.canRetry = true;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::LogLimitExceeded:
|
|
|
|
|
result.stepStatus = bsLogLimitExceeded;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::NotDeterministic:
|
|
|
|
|
result.stepStatus = bsNotDeterministic;
|
|
|
|
|
result.canRetry = false;
|
|
|
|
|
result.canCache = true;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
result.stepStatus = bsAborted;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (result.stepStatus != bsSuccess) return;
|
2015-07-21 01:45:00 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
result.errorMsg = "";
|
2016-03-09 16:59:38 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* If the path was substituted or already valid, then we didn't
|
|
|
|
|
get a build log. */
|
|
|
|
|
if (result.isCached) {
|
2019-12-30 22:49:26 +01:00
|
|
|
|
printMsg(lvlInfo, "outputs of ‘%s’ substituted or already valid on ‘%s’",
|
|
|
|
|
localStore->printStorePath(step->drvPath), machine->sshName);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
unlink(result.logFile.c_str());
|
|
|
|
|
result.logFile = "";
|
|
|
|
|
}
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Copy the output paths. */
|
2019-09-25 17:26:03 +02:00
|
|
|
|
if (!machine->isLocalhost() || localStore != std::shared_ptr<Store>(destStore)) {
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssReceivingOutputs);
|
|
|
|
|
|
2017-09-14 17:22:48 +02:00
|
|
|
|
MaintainCount<counter> mc(nrStepsCopyingFrom);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
|
|
|
|
|
auto now1 = std::chrono::steady_clock::now();
|
|
|
|
|
|
2020-09-26 23:37:39 +02:00
|
|
|
|
StorePathSet outputs;
|
|
|
|
|
for (auto & i : step->drv->outputsAndOptPaths(*localStore)) {
|
|
|
|
|
if (i.second.second)
|
|
|
|
|
outputs.insert(*i.second.second);
|
|
|
|
|
}
|
2016-03-22 16:54:40 +01:00
|
|
|
|
|
2020-07-27 18:11:04 +02:00
|
|
|
|
/* Get info about each output path. */
|
|
|
|
|
std::map<StorePath, ValidPathInfo> infos;
|
2016-03-22 16:54:40 +01:00
|
|
|
|
size_t totalNarSize = 0;
|
2019-12-30 22:49:26 +01:00
|
|
|
|
to << cmdQueryPathInfos;
|
2020-10-18 21:01:06 +02:00
|
|
|
|
worker_proto::write(*localStore, to, outputs);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
to.flush();
|
|
|
|
|
while (true) {
|
2020-07-27 18:11:04 +02:00
|
|
|
|
auto storePathS = readString(from);
|
|
|
|
|
if (storePathS == "") break;
|
2020-11-02 17:08:02 +01:00
|
|
|
|
auto deriver = readString(from); // deriver
|
2020-10-18 21:01:06 +02:00
|
|
|
|
auto references = worker_proto::read(*localStore, from, Phantom<StorePathSet> {});
|
2016-03-22 16:54:40 +01:00
|
|
|
|
readLongLong(from); // download size
|
2020-08-27 17:46:36 +02:00
|
|
|
|
auto narSize = readLongLong(from);
|
|
|
|
|
auto narHash = Hash::parseAny(readString(from), htSHA256);
|
|
|
|
|
auto ca = parseContentAddressOpt(readString(from));
|
2020-07-27 18:11:04 +02:00
|
|
|
|
readStrings<StringSet>(from); // sigs
|
2020-08-27 17:46:36 +02:00
|
|
|
|
ValidPathInfo info(localStore->parseStorePath(storePathS), narHash);
|
|
|
|
|
assert(outputs.count(info.path));
|
|
|
|
|
info.references = references;
|
|
|
|
|
info.narSize = narSize;
|
|
|
|
|
totalNarSize += info.narSize;
|
|
|
|
|
info.narHash = narHash;
|
|
|
|
|
info.ca = ca;
|
2020-11-02 17:08:02 +01:00
|
|
|
|
if (deriver != "")
|
|
|
|
|
info.deriver = localStore->parseStorePath(deriver);
|
2020-07-27 18:11:04 +02:00
|
|
|
|
infos.insert_or_assign(info.path, info);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
}
|
2016-03-09 14:30:13 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
if (totalNarSize > maxOutputSize) {
|
|
|
|
|
result.stepStatus = bsNarSizeLimitExceeded;
|
|
|
|
|
return;
|
|
|
|
|
}
|
2016-03-09 14:30:13 +01:00
|
|
|
|
|
2020-07-27 18:11:04 +02:00
|
|
|
|
/* Copy each path. */
|
2019-12-30 22:49:26 +01:00
|
|
|
|
printMsg(lvlDebug, "copying outputs of ‘%s’ from ‘%s’ (%d bytes)",
|
|
|
|
|
localStore->printStorePath(step->drvPath), machine->sshName, totalNarSize);
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2020-07-28 13:46:57 +02:00
|
|
|
|
auto pathsSorted = reverseTopoSortPaths(infos);
|
2020-07-27 18:11:04 +02:00
|
|
|
|
|
|
|
|
|
for (auto & path : pathsSorted) {
|
|
|
|
|
auto & info = infos.find(path)->second;
|
2020-07-27 20:38:59 +02:00
|
|
|
|
|
|
|
|
|
/* Receive the NAR from the remote and add it to the
|
|
|
|
|
destination store. Meanwhile, extract all the info from the
|
|
|
|
|
NAR that getBuildOutput() needs. */
|
|
|
|
|
auto source2 = sinkToSource([&](Sink & sink)
|
|
|
|
|
{
|
2020-11-10 04:05:59 +01:00
|
|
|
|
/* Note: we should only send the command to dump the store
|
|
|
|
|
path to the remote if the NAR is actually going to get read
|
|
|
|
|
by the destination store, which won't happen if this path
|
|
|
|
|
is already valid on the destination store. Since this
|
|
|
|
|
lambda function only gets executed if someone tries to read
|
|
|
|
|
from source2, we will send the command from here rather
|
|
|
|
|
than outside the lambda. */
|
|
|
|
|
to << cmdDumpStorePath << localStore->printStorePath(path);
|
|
|
|
|
to.flush();
|
|
|
|
|
|
2020-07-27 20:38:59 +02:00
|
|
|
|
TeeSource tee(from, sink);
|
|
|
|
|
extractNarData(tee, localStore->printStorePath(path), narMembers);
|
|
|
|
|
});
|
|
|
|
|
|
2020-08-04 10:53:06 +02:00
|
|
|
|
destStore->addToStore(info, *source2, NoRepair, NoCheckSigs);
|
2020-07-27 18:11:04 +02:00
|
|
|
|
}
|
2016-03-09 14:30:13 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
auto now2 = std::chrono::steady_clock::now();
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
|
|
|
|
|
}
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Shut down the connection. */
|
2016-10-06 15:24:09 +02:00
|
|
|
|
child.to = -1;
|
2017-02-03 14:39:18 +01:00
|
|
|
|
child.pid.wait();
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
} catch (Error & e) {
|
|
|
|
|
/* Disable this machine until a certain period of time has
|
|
|
|
|
passed. This period increases on every consecutive
|
|
|
|
|
failure. However, don't count failures that occurred soon
|
|
|
|
|
after the last one (to take into account steps started in
|
|
|
|
|
parallel). */
|
|
|
|
|
auto info(machine->state->connectInfo.lock());
|
|
|
|
|
auto now = std::chrono::system_clock::now();
|
|
|
|
|
if (info->consecutiveFailures == 0 || info->lastFailure < now - std::chrono::seconds(30)) {
|
|
|
|
|
info->consecutiveFailures = std::min(info->consecutiveFailures + 1, (unsigned int) 4);
|
|
|
|
|
info->lastFailure = now;
|
2016-09-30 17:05:07 +02:00
|
|
|
|
int delta = retryInterval * std::pow(retryBackoff, info->consecutiveFailures - 1) + (rand() % 30);
|
2020-06-23 13:43:54 +02:00
|
|
|
|
printMsg(lvlInfo, "will disable machine ‘%1%’ for %2%s", machine->sshName, delta);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
info->disabledUntil = now + std::chrono::seconds(delta);
|
|
|
|
|
}
|
|
|
|
|
throw;
|
2015-06-24 13:19:16 +02:00
|
|
|
|
}
|
2015-06-09 14:21:21 +02:00
|
|
|
|
}
|