2015-06-09 14:21:21 +02:00
|
|
|
|
#include <algorithm>
|
2016-09-30 17:05:07 +02:00
|
|
|
|
#include <cmath>
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
#include <sys/stat.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
|
2025-04-07 11:36:59 -04:00
|
|
|
|
#include <nix/store/build-result.hh>
|
|
|
|
|
#include <nix/store/path.hh>
|
|
|
|
|
#include <nix/store/legacy-ssh-store.hh>
|
|
|
|
|
#include <nix/store/serve-protocol.hh>
|
|
|
|
|
#include <nix/store/serve-protocol-impl.hh>
|
2015-07-07 10:25:33 +02:00
|
|
|
|
#include "state.hh"
|
2025-04-07 11:36:59 -04:00
|
|
|
|
#include <nix/util/current-process.hh>
|
|
|
|
|
#include <nix/util/processes.hh>
|
|
|
|
|
#include <nix/util/util.hh>
|
|
|
|
|
#include <nix/store/serve-protocol.hh>
|
|
|
|
|
#include <nix/store/serve-protocol-impl.hh>
|
|
|
|
|
#include <nix/store/ssh.hh>
|
|
|
|
|
#include <nix/util/finally.hh>
|
|
|
|
|
#include <nix/util/url.hh>
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
using namespace nix;
|
|
|
|
|
|
2024-05-21 13:34:30 -04:00
|
|
|
|
bool ::Machine::isLocalhost() const
|
2022-01-07 17:06:56 +01:00
|
|
|
|
{
|
2024-05-21 13:34:30 -04:00
|
|
|
|
return storeUri.params.empty() && std::visit(overloaded {
|
|
|
|
|
[](const StoreReference::Auto &) {
|
|
|
|
|
return true;
|
|
|
|
|
},
|
|
|
|
|
[](const StoreReference::Specified & s) {
|
|
|
|
|
return
|
|
|
|
|
(s.scheme == "local" || s.scheme == "unix") ||
|
|
|
|
|
((s.scheme == "ssh" || s.scheme == "ssh-ng") &&
|
|
|
|
|
s.authority == "localhost");
|
|
|
|
|
},
|
|
|
|
|
}, storeUri.variant);
|
2022-01-07 17:06:56 +01:00
|
|
|
|
}
|
2015-06-25 15:29:22 +02:00
|
|
|
|
|
2024-05-21 13:34:30 -04:00
|
|
|
|
namespace nix::build_remote {
|
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
static std::unique_ptr<SSHMaster::Connection> openConnection(
|
|
|
|
|
::Machine::ptr machine, SSHMaster & master)
|
|
|
|
|
{
|
|
|
|
|
Strings command = {"nix-store", "--serve", "--write"};
|
|
|
|
|
if (machine->isLocalhost()) {
|
|
|
|
|
command.push_back("--builders");
|
|
|
|
|
command.push_back("");
|
|
|
|
|
} else {
|
|
|
|
|
auto remoteStore = machine->storeUri.params.find("remote-store");
|
|
|
|
|
if (remoteStore != machine->storeUri.params.end()) {
|
|
|
|
|
command.push_back("--store");
|
|
|
|
|
command.push_back(shellEscape(remoteStore->second));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto ret = master.startCommand(std::move(command), {
|
|
|
|
|
"-a", "-oBatchMode=yes", "-oConnectTimeout=60", "-oTCPKeepAlive=yes"
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// XXX: determine the actual max value we can use from /proc.
|
|
|
|
|
|
|
|
|
|
// FIXME: Should this be upstreamed into `startCommand` in Nix?
|
|
|
|
|
|
|
|
|
|
int pipesize = 1024 * 1024;
|
|
|
|
|
|
|
|
|
|
fcntl(ret->in.get(), F_SETPIPE_SZ, &pipesize);
|
|
|
|
|
fcntl(ret->out.get(), F_SETPIPE_SZ, &pipesize);
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2023-11-30 11:31:58 -05:00
|
|
|
|
static void copyClosureTo(
|
2024-01-23 11:03:19 -05:00
|
|
|
|
::Machine::Connection & conn,
|
2023-11-30 11:31:58 -05:00
|
|
|
|
Store & destStore,
|
|
|
|
|
const StorePathSet & paths,
|
2022-02-20 20:06:22 +00:00
|
|
|
|
SubstituteFlag useSubstitutes = NoSubstitute)
|
2015-06-09 14:21:21 +02:00
|
|
|
|
{
|
2019-12-30 22:49:26 +01:00
|
|
|
|
StorePathSet closure;
|
2022-02-20 17:18:52 +00:00
|
|
|
|
destStore.computeFSClosure(paths, closure);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
/* Send the "query valid paths" command with the "lock" option
|
|
|
|
|
enabled. This prevents a race where the remote host
|
|
|
|
|
garbage-collect paths that are already there. Optionally, ask
|
|
|
|
|
the remote host to substitute missing paths. */
|
2016-02-26 21:15:05 +01:00
|
|
|
|
// FIXME: substitute output pollutes our build log
|
2015-06-09 14:21:21 +02:00
|
|
|
|
/* Get back the set of paths that are already valid on the remote
|
|
|
|
|
host. */
|
2025-03-03 10:10:04 -05:00
|
|
|
|
auto present = conn.queryValidPaths(
|
|
|
|
|
destStore, true, closure, useSubstitutes);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2015-06-09 16:03:41 +02:00
|
|
|
|
if (present.size() == closure.size()) return;
|
|
|
|
|
|
2022-02-20 17:18:52 +00:00
|
|
|
|
auto sorted = destStore.topoSortPaths(closure);
|
2015-06-09 16:03:41 +02:00
|
|
|
|
|
2019-12-30 22:49:26 +01:00
|
|
|
|
StorePathSet missing;
|
2015-06-09 16:03:41 +02:00
|
|
|
|
for (auto i = sorted.rbegin(); i != sorted.rend(); ++i)
|
2020-06-23 13:43:54 +02:00
|
|
|
|
if (!present.count(*i)) missing.insert(*i);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2020-06-23 13:43:54 +02:00
|
|
|
|
printMsg(lvlDebug, "sending %d missing paths", missing.size());
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2023-11-30 11:31:58 -05:00
|
|
|
|
std::unique_lock<std::timed_mutex> sendLock(conn.machine->state->sendLock,
|
2017-09-01 16:28:49 +02:00
|
|
|
|
std::chrono::seconds(600));
|
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
conn.to << ServeProto::Command::ImportPaths;
|
|
|
|
|
destStore.exportPaths(missing, conn.to);
|
|
|
|
|
conn.to.flush();
|
|
|
|
|
|
|
|
|
|
if (readInt(conn.from) != 1)
|
|
|
|
|
throw Error("remote machine failed to import closure");
|
2015-06-09 14:21:21 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-07-27 18:11:04 +02:00
|
|
|
|
// FIXME: use Store::topoSortPaths().
|
2023-12-07 14:10:28 -05:00
|
|
|
|
static StorePaths reverseTopoSortPaths(const std::map<StorePath, UnkeyedValidPathInfo> & paths)
|
2020-07-27 18:11:04 +02:00
|
|
|
|
{
|
|
|
|
|
StorePaths sorted;
|
|
|
|
|
StorePathSet visited;
|
|
|
|
|
|
|
|
|
|
std::function<void(const StorePath & path)> dfsVisit;
|
|
|
|
|
|
|
|
|
|
dfsVisit = [&](const StorePath & path) {
|
|
|
|
|
if (!visited.insert(path).second) return;
|
|
|
|
|
|
|
|
|
|
auto info = paths.find(path);
|
|
|
|
|
auto references = info == paths.end() ? StorePathSet() : info->second.references;
|
|
|
|
|
|
|
|
|
|
for (auto & i : references)
|
|
|
|
|
/* Don't traverse into paths that don't exist. That can
|
|
|
|
|
happen due to substitutes for non-existent paths. */
|
|
|
|
|
if (i != path && paths.count(i))
|
|
|
|
|
dfsVisit(i);
|
|
|
|
|
|
|
|
|
|
sorted.push_back(path);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (auto & i : paths)
|
|
|
|
|
dfsVisit(i.first);
|
|
|
|
|
|
|
|
|
|
return sorted;
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-30 12:19:05 -05:00
|
|
|
|
static std::pair<Path, AutoCloseFD> openLogFile(const std::string & logDir, const StorePath & drvPath)
|
2022-03-21 10:42:44 +01:00
|
|
|
|
{
|
2022-10-25 11:07:51 -04:00
|
|
|
|
std::string base(drvPath.to_string());
|
2023-11-30 11:27:40 -05:00
|
|
|
|
auto logFile = logDir + "/" + std::string(base, 0, 2) + "/" + std::string(base, 2);
|
2022-03-21 10:42:44 +01:00
|
|
|
|
|
|
|
|
|
createDirs(dirOf(logFile));
|
|
|
|
|
|
|
|
|
|
AutoCloseFD logFD = open(logFile.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0666);
|
|
|
|
|
if (!logFD) throw SysError("creating log file ‘%s’", logFile);
|
|
|
|
|
|
|
|
|
|
return {std::move(logFile), std::move(logFD)};
|
|
|
|
|
}
|
2020-07-27 18:11:04 +02:00
|
|
|
|
|
2023-11-30 12:19:05 -05:00
|
|
|
|
static BasicDerivation sendInputs(
|
2022-03-21 11:35:38 +01:00
|
|
|
|
State & state,
|
|
|
|
|
Step & step,
|
|
|
|
|
Store & localStore,
|
|
|
|
|
Store & destStore,
|
2024-01-23 11:03:19 -05:00
|
|
|
|
::Machine::Connection & conn,
|
2022-03-21 11:35:38 +01:00
|
|
|
|
unsigned int & overhead,
|
|
|
|
|
counter & nrStepsWaiting,
|
|
|
|
|
counter & nrStepsCopyingTo
|
|
|
|
|
)
|
|
|
|
|
{
|
2023-12-11 12:46:36 -05:00
|
|
|
|
/* Replace the input derivations by their output paths to send a
|
|
|
|
|
minimal closure to the builder.
|
|
|
|
|
|
|
|
|
|
`tryResolve` currently does *not* rewrite input addresses, so it
|
|
|
|
|
is safe to do this in all cases. (It should probably have a mode
|
|
|
|
|
to do that, however, but we would not use it here.)
|
|
|
|
|
*/
|
|
|
|
|
BasicDerivation basicDrv = ({
|
|
|
|
|
auto maybeBasicDrv = step.drv->tryResolve(destStore, &localStore);
|
|
|
|
|
if (!maybeBasicDrv)
|
|
|
|
|
throw Error(
|
|
|
|
|
"the derivation '%s' can’t be resolved. It’s probably "
|
|
|
|
|
"missing some outputs",
|
|
|
|
|
localStore.printStorePath(step.drvPath));
|
|
|
|
|
*maybeBasicDrv;
|
|
|
|
|
});
|
2022-03-21 11:35:38 +01:00
|
|
|
|
|
|
|
|
|
/* Ensure that the inputs exist in the destination store. This is
|
2022-03-24 09:39:24 +01:00
|
|
|
|
a no-op for regular stores, but for the binary cache store,
|
|
|
|
|
this will copy the inputs to the binary cache from the local
|
|
|
|
|
store. */
|
2023-11-30 11:27:40 -05:00
|
|
|
|
if (&localStore != &destStore) {
|
2022-10-25 11:07:51 -04:00
|
|
|
|
copyClosure(localStore, destStore,
|
|
|
|
|
step.drv->inputSrcs,
|
|
|
|
|
NoRepair, NoCheckSigs, NoSubstitute);
|
2022-03-21 11:35:38 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
auto mc1 = std::make_shared<MaintainCount<counter>>(nrStepsWaiting);
|
|
|
|
|
mc1.reset();
|
|
|
|
|
MaintainCount<counter> mc2(nrStepsCopyingTo);
|
|
|
|
|
|
|
|
|
|
printMsg(lvlDebug, "sending closure of ‘%s’ to ‘%s’",
|
2024-05-21 13:34:30 -04:00
|
|
|
|
localStore.printStorePath(step.drvPath), conn.machine->storeUri.render());
|
2022-03-21 11:35:38 +01:00
|
|
|
|
|
|
|
|
|
auto now1 = std::chrono::steady_clock::now();
|
|
|
|
|
|
|
|
|
|
/* Copy the input closure. */
|
|
|
|
|
if (conn.machine->isLocalhost()) {
|
|
|
|
|
StorePathSet closure;
|
2022-03-24 14:27:45 +01:00
|
|
|
|
destStore.computeFSClosure(basicDrv.inputSrcs, closure);
|
2022-03-21 11:35:38 +01:00
|
|
|
|
copyPaths(destStore, localStore, closure, NoRepair, NoCheckSigs, NoSubstitute);
|
|
|
|
|
} else {
|
2022-02-20 20:06:22 +00:00
|
|
|
|
copyClosureTo(conn, destStore, basicDrv.inputSrcs, Substitute);
|
2022-03-21 11:35:38 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto now2 = std::chrono::steady_clock::now();
|
|
|
|
|
|
|
|
|
|
overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-24 14:27:45 +01:00
|
|
|
|
return basicDrv;
|
2022-03-21 11:35:38 +01:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-30 12:19:05 -05:00
|
|
|
|
static BuildResult performBuild(
|
2024-01-23 11:03:19 -05:00
|
|
|
|
::Machine::Connection & conn,
|
2022-03-21 12:14:37 +01:00
|
|
|
|
Store & localStore,
|
|
|
|
|
StorePath drvPath,
|
|
|
|
|
const BasicDerivation & drv,
|
2023-12-07 14:18:00 -05:00
|
|
|
|
const ServeProto::BuildOptions & options,
|
2022-03-21 12:14:37 +01:00
|
|
|
|
counter & nrStepsBuilding
|
|
|
|
|
)
|
|
|
|
|
{
|
2025-03-03 10:10:04 -05:00
|
|
|
|
conn.putBuildDerivationRequest(localStore, drvPath, drv, options);
|
2022-03-21 12:14:37 +01:00
|
|
|
|
|
2023-12-07 02:00:22 -05:00
|
|
|
|
BuildResult result;
|
|
|
|
|
|
|
|
|
|
time_t startTime, stopTime;
|
2022-03-21 12:14:37 +01:00
|
|
|
|
|
2023-12-07 02:00:22 -05:00
|
|
|
|
startTime = time(0);
|
2022-03-21 12:14:37 +01:00
|
|
|
|
{
|
|
|
|
|
MaintainCount<counter> mc(nrStepsBuilding);
|
2025-03-03 10:10:04 -05:00
|
|
|
|
result = ServeProto::Serialise<BuildResult>::read(localStore, conn);
|
2022-03-21 12:14:37 +01:00
|
|
|
|
}
|
2023-12-07 02:00:22 -05:00
|
|
|
|
stopTime = time(0);
|
|
|
|
|
|
|
|
|
|
if (!result.startTime) {
|
|
|
|
|
// If the builder gave `startTime = 0`, use our measurements
|
|
|
|
|
// instead of the builder's.
|
|
|
|
|
//
|
|
|
|
|
// Note: this represents the duration of a single round, rather
|
|
|
|
|
// than all rounds.
|
|
|
|
|
result.startTime = startTime;
|
|
|
|
|
result.stopTime = stopTime;
|
2022-03-21 12:14:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
2023-12-11 12:42:43 -05:00
|
|
|
|
// If the protocol was too old to give us `builtOutputs`, initialize
|
|
|
|
|
// it manually by introspecting the derivation.
|
2025-03-03 10:10:04 -05:00
|
|
|
|
if (GET_PROTOCOL_MINOR(conn.remoteVersion) < 6)
|
2023-12-11 12:42:43 -05:00
|
|
|
|
{
|
2023-12-04 16:05:50 -05:00
|
|
|
|
// If the remote is too old to handle CA derivations, we can’t get this
|
|
|
|
|
// far anyways
|
|
|
|
|
assert(drv.type().hasKnownOutputPaths());
|
|
|
|
|
DerivationOutputsAndOptPaths drvOutputs = drv.outputsAndOptPaths(localStore);
|
2023-12-11 12:46:36 -05:00
|
|
|
|
// Since this a `BasicDerivation`, `staticOutputHashes` will not
|
|
|
|
|
// do any real work.
|
2023-12-04 16:05:50 -05:00
|
|
|
|
auto outputHashes = staticOutputHashes(localStore, drv);
|
|
|
|
|
for (auto & [outputName, output] : drvOutputs) {
|
|
|
|
|
auto outputPath = output.second;
|
|
|
|
|
// We’ve just asserted that the output paths of the derivation
|
|
|
|
|
// were known
|
|
|
|
|
assert(outputPath);
|
|
|
|
|
auto outputHash = outputHashes.at(outputName);
|
|
|
|
|
auto drvOutput = DrvOutput { outputHash, outputName };
|
|
|
|
|
result.builtOutputs.insert_or_assign(
|
|
|
|
|
std::move(outputName),
|
|
|
|
|
Realisation { drvOutput, *outputPath });
|
|
|
|
|
}
|
2022-03-21 12:14:37 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-30 12:19:05 -05:00
|
|
|
|
static void copyPathFromRemote(
|
2024-01-23 11:03:19 -05:00
|
|
|
|
::Machine::Connection & conn,
|
2022-03-21 15:26:31 +01:00
|
|
|
|
NarMemberDatas & narMembers,
|
|
|
|
|
Store & localStore,
|
|
|
|
|
Store & destStore,
|
|
|
|
|
const ValidPathInfo & info
|
|
|
|
|
)
|
|
|
|
|
{
|
2025-03-03 10:10:04 -05:00
|
|
|
|
/* Receive the NAR from the remote and add it to the
|
|
|
|
|
destination store. Meanwhile, extract all the info from the
|
|
|
|
|
NAR that getBuildOutput() needs. */
|
|
|
|
|
auto source2 = sinkToSource([&](Sink & sink)
|
|
|
|
|
{
|
|
|
|
|
/* Note: we should only send the command to dump the store
|
|
|
|
|
path to the remote if the NAR is actually going to get read
|
|
|
|
|
by the destination store, which won't happen if this path
|
|
|
|
|
is already valid on the destination store. Since this
|
|
|
|
|
lambda function only gets executed if someone tries to read
|
|
|
|
|
from source2, we will send the command from here rather
|
|
|
|
|
than outside the lambda. */
|
|
|
|
|
conn.to << ServeProto::Command::DumpStorePath << localStore.printStorePath(info.path);
|
|
|
|
|
conn.to.flush();
|
|
|
|
|
|
|
|
|
|
TeeSource tee(conn.from, sink);
|
|
|
|
|
extractNarData(tee, localStore.printStorePath(info.path), narMembers);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
destStore.addToStore(info, *source2, NoRepair, NoCheckSigs);
|
2022-03-21 15:26:31 +01:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-30 12:19:05 -05:00
|
|
|
|
static void copyPathsFromRemote(
|
2024-01-23 11:03:19 -05:00
|
|
|
|
::Machine::Connection & conn,
|
2022-03-21 15:26:31 +01:00
|
|
|
|
NarMemberDatas & narMembers,
|
|
|
|
|
Store & localStore,
|
|
|
|
|
Store & destStore,
|
2023-12-07 14:10:28 -05:00
|
|
|
|
const std::map<StorePath, UnkeyedValidPathInfo> & infos
|
2022-03-21 15:26:31 +01:00
|
|
|
|
)
|
|
|
|
|
{
|
|
|
|
|
auto pathsSorted = reverseTopoSortPaths(infos);
|
|
|
|
|
|
|
|
|
|
for (auto & path : pathsSorted) {
|
|
|
|
|
auto & info = infos.find(path)->second;
|
2023-12-07 14:10:28 -05:00
|
|
|
|
copyPathFromRemote(
|
|
|
|
|
conn, narMembers, localStore, destStore,
|
|
|
|
|
ValidPathInfo { path, info });
|
2022-03-21 15:26:31 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-25 10:04:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* using namespace nix::build_remote; */
|
|
|
|
|
|
|
|
|
|
void RemoteResult::updateWithBuildResult(const nix::BuildResult & buildResult)
|
|
|
|
|
{
|
|
|
|
|
startTime = buildResult.startTime;
|
|
|
|
|
stopTime = buildResult.stopTime;
|
|
|
|
|
timesBuilt = buildResult.timesBuilt;
|
|
|
|
|
errorMsg = buildResult.errorMsg;
|
|
|
|
|
isNonDeterministic = buildResult.isNonDeterministic;
|
|
|
|
|
|
|
|
|
|
switch ((BuildResult::Status) buildResult.status) {
|
|
|
|
|
case BuildResult::Built:
|
|
|
|
|
stepStatus = bsSuccess;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::Substituted:
|
|
|
|
|
case BuildResult::AlreadyValid:
|
|
|
|
|
stepStatus = bsSuccess;
|
|
|
|
|
isCached = true;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::PermanentFailure:
|
|
|
|
|
stepStatus = bsFailed;
|
|
|
|
|
canCache = true;
|
|
|
|
|
errorMsg = "";
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::InputRejected:
|
|
|
|
|
case BuildResult::OutputRejected:
|
|
|
|
|
stepStatus = bsFailed;
|
|
|
|
|
canCache = true;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::TransientFailure:
|
|
|
|
|
stepStatus = bsFailed;
|
|
|
|
|
canRetry = true;
|
|
|
|
|
errorMsg = "";
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::TimedOut:
|
|
|
|
|
stepStatus = bsTimedOut;
|
|
|
|
|
errorMsg = "";
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::MiscFailure:
|
|
|
|
|
stepStatus = bsAborted;
|
|
|
|
|
canRetry = true;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::LogLimitExceeded:
|
|
|
|
|
stepStatus = bsLogLimitExceeded;
|
|
|
|
|
break;
|
|
|
|
|
case BuildResult::NotDeterministic:
|
|
|
|
|
stepStatus = bsNotDeterministic;
|
|
|
|
|
canRetry = false;
|
|
|
|
|
canCache = true;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
stepStatus = bsAborted;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-21 15:26:31 +01:00
|
|
|
|
|
2016-02-15 21:10:29 +01:00
|
|
|
|
void State::buildRemote(ref<Store> destStore,
|
2024-01-23 11:03:19 -05:00
|
|
|
|
::Machine::ptr machine, Step::ptr step,
|
2023-12-07 14:18:00 -05:00
|
|
|
|
const ServeProto::BuildOptions & buildOptions,
|
2017-12-07 15:35:31 +01:00
|
|
|
|
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
|
2020-07-27 20:38:59 +02:00
|
|
|
|
std::function<void(StepState)> updateStep,
|
|
|
|
|
NarMemberDatas & narMembers)
|
2015-06-09 14:21:21 +02:00
|
|
|
|
{
|
2016-10-26 15:09:16 +02:00
|
|
|
|
assert(BuildResult::TimedOut == 8);
|
|
|
|
|
|
2022-10-25 10:04:29 +02:00
|
|
|
|
auto [logFile, logFD] = build_remote::openLogFile(logDir, step->drvPath);
|
2022-03-21 10:42:44 +01:00
|
|
|
|
AutoDelete logFileDel(logFile, false);
|
|
|
|
|
result.logFile = logFile;
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
try {
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssConnecting);
|
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
auto storeRef = machine->completeStoreReference();
|
|
|
|
|
|
|
|
|
|
auto * pSpecified = std::get_if<StoreReference::Specified>(&storeRef.variant);
|
|
|
|
|
if (!pSpecified || pSpecified->scheme != "ssh") {
|
|
|
|
|
throw Error("Currently, only (legacy-)ssh stores are supported!");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LegacySSHStoreConfig storeConfig {
|
|
|
|
|
pSpecified->scheme,
|
|
|
|
|
pSpecified->authority,
|
|
|
|
|
storeRef.params
|
2024-05-20 16:22:19 -04:00
|
|
|
|
};
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
auto master = storeConfig.createSSHMaster(
|
|
|
|
|
false, // no SSH master yet
|
|
|
|
|
logFD.get());
|
|
|
|
|
|
|
|
|
|
// FIXME: rewrite to use Store.
|
|
|
|
|
auto child = build_remote::openConnection(machine, master);
|
|
|
|
|
|
2016-11-07 19:34:35 +01:00
|
|
|
|
{
|
|
|
|
|
auto activeStepState(activeStep->state_.lock());
|
|
|
|
|
if (activeStepState->cancelled) throw Error("step cancelled");
|
2025-03-03 10:10:04 -05:00
|
|
|
|
activeStepState->pid = child->sshPid;
|
2016-11-07 19:34:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Finally clearPid([&]() {
|
|
|
|
|
auto activeStepState(activeStep->state_.lock());
|
|
|
|
|
activeStepState->pid = -1;
|
|
|
|
|
|
|
|
|
|
/* FIXME: there is a slight race here with step
|
|
|
|
|
cancellation in State::processQueueChange(), which
|
|
|
|
|
could call kill() on this pid after we've done waitpid()
|
|
|
|
|
on it. With pid wrap-around, there is a tiny
|
|
|
|
|
possibility that we end up killing another
|
|
|
|
|
process. Meh. */
|
|
|
|
|
});
|
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
::Machine::Connection conn {
|
|
|
|
|
{
|
|
|
|
|
.to = child->in.get(),
|
|
|
|
|
.from = child->out.get(),
|
|
|
|
|
/* Handshake. */
|
|
|
|
|
.remoteVersion = 0xdadbeef, // FIXME avoid dummy initialize
|
|
|
|
|
},
|
|
|
|
|
/*.machine =*/ machine,
|
|
|
|
|
};
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
Finally updateStats([&]() {
|
2025-03-03 10:10:04 -05:00
|
|
|
|
bytesReceived += conn.from.read;
|
|
|
|
|
bytesSent += conn.to.written;
|
2016-03-22 16:54:40 +01:00
|
|
|
|
});
|
2015-10-06 17:35:08 +02:00
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
constexpr ServeProto::Version our_version = 0x206;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
conn.remoteVersion = decltype(conn)::handshake(
|
|
|
|
|
conn.to,
|
|
|
|
|
conn.from,
|
|
|
|
|
our_version,
|
|
|
|
|
machine->storeUri.render());
|
|
|
|
|
} catch (EndOfFile & e) {
|
|
|
|
|
child->sshPid.wait();
|
|
|
|
|
std::string s = chomp(readFile(result.logFile));
|
|
|
|
|
throw Error("cannot connect to ‘%1%’: %2%", machine->storeUri.render(), s);
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-21 15:53:27 +02:00
|
|
|
|
{
|
|
|
|
|
auto info(machine->state->connectInfo.lock());
|
2016-03-22 16:54:40 +01:00
|
|
|
|
info->consecutiveFailures = 0;
|
2015-07-21 15:53:27 +02:00
|
|
|
|
}
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Gather the inputs. If the remote side is Nix <= 1.9, we have to
|
|
|
|
|
copy the entire closure of ‘drvPath’, as well as the required
|
|
|
|
|
outputs of the input derivations. On Nix > 1.9, we only need to
|
|
|
|
|
copy the immediate sources of the derivation and the required
|
|
|
|
|
outputs of the input derivations. */
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssSendingInputs);
|
2022-10-25 10:04:29 +02:00
|
|
|
|
BasicDerivation resolvedDrv = build_remote::sendInputs(*this, *step, *localStore, *destStore, conn, result.overhead, nrStepsWaiting, nrStepsCopyingTo);
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2022-03-21 10:42:44 +01:00
|
|
|
|
logFileDel.cancel();
|
2015-06-19 14:51:59 +02:00
|
|
|
|
|
2016-10-26 13:39:43 +02:00
|
|
|
|
/* Truncate the log to get rid of messages about substitutions
|
2022-03-21 11:35:38 +01:00
|
|
|
|
etc. on the remote system. */
|
2016-10-26 13:39:43 +02:00
|
|
|
|
if (lseek(logFD.get(), SEEK_SET, 0) != 0)
|
|
|
|
|
throw SysError("seeking to the start of log file ‘%s’", result.logFile);
|
|
|
|
|
|
|
|
|
|
if (ftruncate(logFD.get(), 0) == -1)
|
|
|
|
|
throw SysError("truncating log file ‘%s’", result.logFile);
|
|
|
|
|
|
|
|
|
|
logFD = -1;
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Do the build. */
|
2019-12-30 22:49:26 +01:00
|
|
|
|
printMsg(lvlDebug, "building ‘%s’ on ‘%s’",
|
|
|
|
|
localStore->printStorePath(step->drvPath),
|
2024-05-21 13:34:30 -04:00
|
|
|
|
machine->storeUri.render());
|
2015-07-21 01:45:00 +02:00
|
|
|
|
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssBuilding);
|
|
|
|
|
|
2022-10-25 10:04:29 +02:00
|
|
|
|
BuildResult buildResult = build_remote::performBuild(
|
2022-03-21 12:14:37 +01:00
|
|
|
|
conn,
|
|
|
|
|
*localStore,
|
|
|
|
|
step->drvPath,
|
2022-03-24 14:27:45 +01:00
|
|
|
|
resolvedDrv,
|
2022-03-21 16:33:25 +01:00
|
|
|
|
buildOptions,
|
2022-03-21 12:14:37 +01:00
|
|
|
|
nrStepsBuilding
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
result.updateWithBuildResult(buildResult);
|
2015-07-21 01:45:00 +02:00
|
|
|
|
|
2021-02-23 09:50:15 +01:00
|
|
|
|
if (result.stepStatus != bsSuccess) return;
|
2015-07-21 01:45:00 +02:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
result.errorMsg = "";
|
2016-03-09 16:59:38 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* If the path was substituted or already valid, then we didn't
|
|
|
|
|
get a build log. */
|
|
|
|
|
if (result.isCached) {
|
2019-12-30 22:49:26 +01:00
|
|
|
|
printMsg(lvlInfo, "outputs of ‘%s’ substituted or already valid on ‘%s’",
|
2024-05-21 13:34:30 -04:00
|
|
|
|
localStore->printStorePath(step->drvPath), machine->storeUri.render());
|
2016-03-22 16:54:40 +01:00
|
|
|
|
unlink(result.logFile.c_str());
|
|
|
|
|
result.logFile = "";
|
|
|
|
|
}
|
2015-06-09 14:21:21 +02:00
|
|
|
|
|
2023-12-04 16:05:50 -05:00
|
|
|
|
StorePathSet outputs;
|
|
|
|
|
for (auto & [_, realisation] : buildResult.builtOutputs)
|
|
|
|
|
outputs.insert(realisation.outPath);
|
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
/* Copy the output paths. */
|
2019-09-25 17:26:03 +02:00
|
|
|
|
if (!machine->isLocalhost() || localStore != std::shared_ptr<Store>(destStore)) {
|
2017-12-07 15:35:31 +01:00
|
|
|
|
updateStep(ssReceivingOutputs);
|
|
|
|
|
|
2017-09-14 17:22:48 +02:00
|
|
|
|
MaintainCount<counter> mc(nrStepsCopyingFrom);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
|
|
|
|
|
auto now1 = std::chrono::steady_clock::now();
|
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
auto infos = conn.queryPathInfos(*localStore, outputs);
|
2024-05-20 18:00:16 -04:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
size_t totalNarSize = 0;
|
2024-05-20 18:00:16 -04:00
|
|
|
|
for (auto & [_, info] : infos) totalNarSize += info.narSize;
|
2016-03-09 14:30:13 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
if (totalNarSize > maxOutputSize) {
|
|
|
|
|
result.stepStatus = bsNarSizeLimitExceeded;
|
|
|
|
|
return;
|
|
|
|
|
}
|
2016-03-09 14:30:13 +01:00
|
|
|
|
|
2020-07-27 18:11:04 +02:00
|
|
|
|
/* Copy each path. */
|
2019-12-30 22:49:26 +01:00
|
|
|
|
printMsg(lvlDebug, "copying outputs of ‘%s’ from ‘%s’ (%d bytes)",
|
2024-05-21 13:34:30 -04:00
|
|
|
|
localStore->printStorePath(step->drvPath), machine->storeUri.render(), totalNarSize);
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2022-10-25 10:04:29 +02:00
|
|
|
|
build_remote::copyPathsFromRemote(conn, narMembers, *localStore, *destStore, infos);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
auto now2 = std::chrono::steady_clock::now();
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
|
|
|
|
|
}
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2023-12-04 16:05:50 -05:00
|
|
|
|
/* Register the outputs of the newly built drv */
|
|
|
|
|
if (experimentalFeatureSettings.isEnabled(Xp::CaDerivations)) {
|
|
|
|
|
auto outputHashes = staticOutputHashes(*localStore, *step->drv);
|
|
|
|
|
for (auto & [outputName, realisation] : buildResult.builtOutputs) {
|
|
|
|
|
// Register the resolved drv output
|
|
|
|
|
destStore->registerDrvOutput(realisation);
|
|
|
|
|
|
|
|
|
|
// Also register the unresolved one
|
|
|
|
|
auto unresolvedRealisation = realisation;
|
|
|
|
|
unresolvedRealisation.signatures.clear();
|
|
|
|
|
unresolvedRealisation.id.drvHash = outputHashes.at(outputName);
|
|
|
|
|
destStore->registerDrvOutput(unresolvedRealisation);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-03 10:10:04 -05:00
|
|
|
|
/* Shut down the connection. */
|
|
|
|
|
child->in = -1;
|
|
|
|
|
child->sshPid.wait();
|
2016-02-17 10:28:42 +01:00
|
|
|
|
|
2016-03-22 16:54:40 +01:00
|
|
|
|
} catch (Error & e) {
|
|
|
|
|
/* Disable this machine until a certain period of time has
|
|
|
|
|
passed. This period increases on every consecutive
|
|
|
|
|
failure. However, don't count failures that occurred soon
|
|
|
|
|
after the last one (to take into account steps started in
|
|
|
|
|
parallel). */
|
|
|
|
|
auto info(machine->state->connectInfo.lock());
|
|
|
|
|
auto now = std::chrono::system_clock::now();
|
|
|
|
|
if (info->consecutiveFailures == 0 || info->lastFailure < now - std::chrono::seconds(30)) {
|
|
|
|
|
info->consecutiveFailures = std::min(info->consecutiveFailures + 1, (unsigned int) 4);
|
|
|
|
|
info->lastFailure = now;
|
2016-09-30 17:05:07 +02:00
|
|
|
|
int delta = retryInterval * std::pow(retryBackoff, info->consecutiveFailures - 1) + (rand() % 30);
|
2024-05-21 13:34:30 -04:00
|
|
|
|
printMsg(lvlInfo, "will disable machine ‘%1%’ for %2%s", machine->storeUri.render(), delta);
|
2016-03-22 16:54:40 +01:00
|
|
|
|
info->disabledUntil = now + std::chrono::seconds(delta);
|
|
|
|
|
}
|
|
|
|
|
throw;
|
2015-06-24 13:19:16 +02:00
|
|
|
|
}
|
2015-06-09 14:21:21 +02:00
|
|
|
|
}
|