#! /var/run/current-system/sw/bin/perl -w

use strict;
use Cwd;
use File::Basename;
use POSIX qw(dup2 :sys_wait_h);
use Hydra::Schema;
use Hydra::Helper::Nix;
use Hydra::Model::DB;
use IO::Handle;
use Nix::Store;

chdir Hydra::Model::DB::getHydraPath or die;
my $db = Hydra::Model::DB->new();

STDOUT->autoflush();

my $lastTime;

#$SIG{CHLD} = 'IGNORE';


sub unlockDeadBuilds {
    # Unlock builds whose building process has died.
    txn_do($db, sub {
        my @builds = $db->resultset('Builds')->search({finished => 0, busy => 1});
        foreach my $build (@builds) {
            my $pid = $build->locker;
            my $unlock = 0;
            if ($pid == $$) {
                if (!defined $lastTime || $build->starttime < $lastTime - 300) {
                    $unlock = 1;
                }
            } elsif (kill(0, $pid) != 1) { # see if we can signal the process
                $unlock = 1;
            }
            if ($unlock) {
                print "build ", $build->id, " pid $pid died, unlocking\n";
                $build->update({ busy => 0, locker => "" });
                $build->buildsteps->search({ busy => 1 })->update({ busy => 0, status => 4, stoptime => time });
            }
        }
    });
}


# Given a build, return an arbitrary queued build on which this build
# depends; or undef if no such build exists.
sub findBuildDependencyInQueue {
    my ($buildsByDrv, $build) = @_;
    return undef unless isValidPath($build->drvpath);
    my @deps = grep { /\.drv$/ && $_ ne $build->drvpath } computeFSClosure(0, 0, $build->drvpath);
    return unless scalar @deps > 0;
    foreach my $d (@deps) {
        my $b = $buildsByDrv->{$d};
        next unless defined $b;
        return $db->resultset('Builds')->find($b);
    }
    return undef;
}


sub checkBuilds {
    # print "looking for runnable builds...\n";

    my @buildsStarted;

    my $machines = getMachines;

    my %maxConcurrent;

    foreach my $machineName (keys %{$machines}) {
        foreach my $system (@{${$machines}{$machineName}{'systemTypes'}}) {
            $maxConcurrent{$system} = (${$machines}{$machineName}{'maxJobs'} or 0) + ($maxConcurrent{$system} or 0)
        }
    }

    txn_do($db, sub {

        # Cache scheduled builds by derivation path to speed up
        # findBuildDependencyInQueue.
        my $buildsByDrv = {};
        $buildsByDrv->{$_->drvpath} = $_->id
            foreach $db->resultset('Builds')->search({ finished => 0 }, { join => ['project'] });

        # Get the system types for the runnable builds.
        my @systemTypes = $db->resultset('Builds')->search(
            { finished => 0, busy => 0 },
            { join => ['project'], select => ['system'], as => ['system'], distinct => 1 });

        # Get the total number of scheduling shares.
        my $totalShares = getTotalShares($db);

        # For each system type, select up to the maximum number of
        # concurrent build for that system type.
        foreach my $system (@systemTypes) {
            # How many builds are already currently executing for this
            # system type?
            my $nrActive = $db->resultset('Builds')->search(
                {finished => 0, busy => 1, system => $system->system})->count;

            (my $systemTypeInfo) = $db->resultset('SystemTypes')->search({system => $system->system});
            my $max = defined $systemTypeInfo ? $systemTypeInfo->maxconcurrent : $maxConcurrent{$system->system} // 2;

            my $extraAllowed = $max - $nrActive;
            next if $extraAllowed <= 0;

            print STDERR "starting at most $extraAllowed builds for system ${\$system->system}\n";

            my $timeSpentPerJobset;

            j: while ($extraAllowed-- > 0) {

                my @runnableJobsets = $db->resultset('Builds')->search(
                    { finished => 0, busy => 0, system => $system->system },
                    { select => ['project', 'jobset'], distinct => 1 });

                next if @runnableJobsets == 0;

                my $windowSize = 24 * 3600;
                my $costPerBuild = 30;
                my $totalWindowSize = $windowSize * $max;

                my @res;

                foreach my $b (@runnableJobsets) {
                    my $jobset = $db->resultset('Jobsets')->find($b->get_column('project'), $b->get_column('jobset')) or die;

                    my $timeSpent = $timeSpentPerJobset->{$b->get_column('project')}->{$b->get_column('jobset')};

                    if (!defined $timeSpent) {
                        $timeSpent = $jobset->builds->search(
                            { },
                            { where => \ ("(finished = 0 or (me.stoptime >= " . (time() - $windowSize) . "))")
                            , join => 'buildsteps'
                            , select => \ "sum(coalesce(buildsteps.stoptime, ${\time}) - buildsteps.starttime)"
                            , as => "sum" })->single->get_column("sum") // 0;

                        # Add a 30s penalty for each started build.  This
                        # is to account for jobsets that have running
                        # builds but no build steps yet.
                        $timeSpent += $jobset->builds->search({ finished => 0, busy => 1 })->count * $costPerBuild;

                        $timeSpentPerJobset->{$b->get_column('project')}->{$b->get_column('jobset')} = $timeSpent;
                    }

                    my $share = $jobset->schedulingshares || 1; # prevent division by zero
                    my $used = $timeSpent / ($totalWindowSize * ($share / $totalShares));

                    #printf STDERR "%s:%s: %d s, total used = %.2f%%, share used = %.2f%%\n", $jobset->get_column('project'), $jobset->name, $timeSpent, $timeSpent / $totalWindowSize * 100, $used * 100;

                    push @res, { jobset => $jobset, used => $used };
                }

                foreach my $r (sort { $a->{used} <=> $b->{used} } @res) {
                    my $jobset = $r->{jobset};
                    #print STDERR "selected ", $jobset->get_column('project'), ':', $jobset->name, "\n";

                    # Select the highest-priority build for this jobset.
                    my @builds = $jobset->builds->search(
                        { finished => 0, busy => 0, system => $system->system },
                        { order_by => ["priority DESC", "id"] });

                    foreach my $build (@builds) {
                        # Find a dependency of $build that has no queued
                        # dependencies itself.  This isn't strictly necessary,
                        # but it ensures that Nix builds are done as part of
                        # their corresponding Hydra builds, rather than as a
                        # dependency of some other Hydra build.
                        while (my $dep = findBuildDependencyInQueue($buildsByDrv, $build)) {
                            $build = $dep;
                        }
                        next if $build->busy;

                        printf STDERR "starting build %d (%s:%s:%s) on %s; jobset at %.2f%% of its share\n", 
                            $build->id, $build->project->name, $build->jobset->name, $build->job->name, $build->system, $r->{used} * 100;

                        my $logfile = getcwd . "/logs/" . $build->id;
                        mkdir(dirname $logfile);
                        unlink($logfile);
                        $build->update(
                            { busy => 1
                            , locker => $$
                            , logfile => $logfile
                            });
                        push @buildsStarted, $build;

                        $timeSpentPerJobset->{$jobset->get_column('project')}->{$jobset->name} += $costPerBuild;

                        next j;
                    }
                }

                last; # nothing found, give up on this system type
            }
        }

        $lastTime = time();

        $_->update({ starttime => time() }) foreach @buildsStarted;
    });

    # Actually start the builds we just selected.  We need to do this
    # outside the transaction in case it aborts or something.
    foreach my $build (@buildsStarted) {
        my $id = $build->id;
        eval {
            my $logfile = $build->logfile;
            my $child = fork();
            die unless defined $child;
            if ($child == 0) {
                eval {
                    open LOG, ">$logfile" or die "cannot create logfile $logfile";
                    POSIX::dup2(fileno(LOG), 1) or die;
                    POSIX::dup2(fileno(LOG), 2) or die;
                    exec("hydra-build", $id);
                };
                warn "cannot start build $id: $@";
                POSIX::_exit(1);
            }
        };
        if ($@) {
            warn $@;
            txn_do($db, sub {
                $build->update({ busy => 0, locker => $$ });
            });
        }
    }
}


if (scalar(@ARGV) == 1 && $ARGV[0] eq "--unlock") {
    unlockDeadBuilds;
    exit 0;
}


while (1) {
    eval {
        # Clean up zombies.
        while ((waitpid(-1, &WNOHANG)) > 0) { };

        unlockDeadBuilds;

        checkBuilds;
    };
    warn $@ if $@;

    # print "sleeping...\n";
    sleep(5);
}