Add a plugin for backing up builds in s3
In your hydra config, you can add an arbitrary number of <s3config> sections, with the following options: * name (required): Bucket name * jobs (required): A regex to match job names (in project:jobset:job format) that should be backed up to this bucket * compression_type: bzip2 (default), xz, or none * prefix: String to prepend to all hydra-created s3 keys (if this is meant to represent a directory, you should include the trailing slash, e.g. "cache/"). Default "". After each build with an output (i.e. successful or failed-with-output builds), the output path and its closure are uploaded to the bucket as .nar files, with corresponding .narinfos to enable use as a binary cache. This plugin requires that s3 credentials be available. It uses Net::Amazon::S3, which as of this commit the nixpkgs version can retrieve s3 credentials from the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables, or from ec2 instance metadata when using an IAM role. This commit also adds a hydra-s3-backup-collect-garbage program, which uses hydra's gc roots directory to determine which paths are live, and then deletes all files except nix-cache-info and any .nar or .narinfo files corresponding to live paths. hydra-s3-backup-collect-garbage respects the prefix configuration option, so it won't delete anything outside of the hierarchy you give it, and it has the same credential requirements as the plugin. Probably a timer unit running the garbage collection periodically should be added to hydra-module.nix Note that two of the added tests fail, due to a bug in the interaction between Net::Amazon::S3 and fake-s3. Those behaviors work against real s3 though, so I'm committing this even with the broken tests. Signed-off-by: Shea Levy <shea@shealevy.com>
This commit is contained in:
		
							
								
								
									
										41
									
								
								release.nix
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								release.nix
									
									
									
									
									
								
							| @@ -100,6 +100,7 @@ in rec { | ||||
|             TextDiff | ||||
|             TextTable | ||||
|             XMLSimple | ||||
|             NetAmazonS3 | ||||
|             nix git | ||||
|           ]; | ||||
|       }; | ||||
| @@ -143,6 +144,7 @@ in rec { | ||||
|       ''; # */ | ||||
|  | ||||
|       meta.description = "Build of Hydra on ${system}"; | ||||
|       passthru.perlDeps = perlDeps; | ||||
|     }); | ||||
|  | ||||
|  | ||||
| @@ -211,4 +213,43 @@ in rec { | ||||
|           $machine->mustSucceed("perl ${./tests/api-test.pl} >&2"); | ||||
|         ''; | ||||
|   }); | ||||
|  | ||||
|   tests.s3backup = genAttrs' (system: | ||||
|     with import <nixos/lib/testing.nix> { inherit system; }; | ||||
|     let hydra = builtins.getAttr system build; in # build."${system}" | ||||
|     simpleTest { | ||||
|       machine = | ||||
|         { config, pkgs, ... }: | ||||
|         { services.postgresql.enable = true; | ||||
|           services.postgresql.package = pkgs.postgresql92; | ||||
|           environment.systemPackages = [ hydra pkgs.rubyLibs.fakes3 ]; | ||||
|           virtualisation.memorySize = 2047; | ||||
|           boot.kernelPackages = pkgs.linuxPackages_3_10; | ||||
|           virtualisation.writableStore = true; | ||||
|           networking.extraHosts = '' | ||||
|             127.0.0.1 hydra.s3.amazonaws.com | ||||
|           ''; | ||||
|         }; | ||||
|  | ||||
|       testScript = | ||||
|         '' | ||||
|           $machine->waitForJob("postgresql"); | ||||
|  | ||||
|           # Initialise the database and the state. | ||||
|           $machine->mustSucceed | ||||
|               ( "createdb -O root hydra" | ||||
|               , "psql hydra -f ${hydra}/libexec/hydra/sql/hydra-postgresql.sql" | ||||
|               , "mkdir /var/lib/hydra" | ||||
|               , "mkdir /tmp/jobs" | ||||
|               , "cp ${./tests/s3-backup-test.pl} /tmp/s3-backup-test.pl" | ||||
|               , "cp ${./tests/api-test.nix} /tmp/jobs/default.nix" | ||||
|               ); | ||||
|  | ||||
|           # start fakes3 | ||||
|           $machine->mustSucceed("fakes3 --root /tmp/s3 --port 80 &>/dev/null &"); | ||||
|           $machine->waitForOpenPort("80"); | ||||
|  | ||||
|           $machine->mustSucceed("cd /tmp && LOGNAME=root AWS_ACCESS_KEY_ID=foo AWS_SECRET_ACCESS_KEY=bar HYDRA_DBI='dbi:Pg:dbname=hydra;user=root;' HYDRA_CONFIG=${./tests/s3-backup-test.config} perl -I ${hydra}/libexec/hydra/lib -I ${hydra.perlDeps}/lib/perl5/site_perl ./s3-backup-test.pl >&2"); | ||||
|         ''; | ||||
|   }); | ||||
| } | ||||
|   | ||||
							
								
								
									
										146
									
								
								src/lib/Hydra/Plugin/S3Backup.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								src/lib/Hydra/Plugin/S3Backup.pm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,146 @@ | ||||
| package Hydra::Plugin::S3Backup; | ||||
|  | ||||
| use strict; | ||||
| use parent 'Hydra::Plugin'; | ||||
| use File::Temp; | ||||
| use File::Basename; | ||||
| use Fcntl; | ||||
| use IO::File; | ||||
| use Net::Amazon::S3; | ||||
| use Net::Amazon::S3::Client; | ||||
| use Digest::SHA; | ||||
| use Nix::Config; | ||||
| use Nix::Store; | ||||
| use Hydra::Model::DB; | ||||
| use Hydra::Helper::CatalystUtils; | ||||
|  | ||||
| my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) ); | ||||
| my %compressors = ( | ||||
|     xz => "| $Nix::Config::xz", | ||||
|     bzip2 => "| $Nix::Config::bzip2", | ||||
|     none => "" | ||||
| ); | ||||
| my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock"; | ||||
|  | ||||
| sub buildFinished { | ||||
|     my ($self, $build, $dependents) = @_; | ||||
|  | ||||
|     return unless $build->buildstatus == 0 or $build->buildstatus == 6; | ||||
|  | ||||
|     my $jobName = showJobName $build; | ||||
|     my $job = $build->job; | ||||
|  | ||||
|     my $cfg = $self->{config}->{s3backup}; | ||||
|     my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : (); | ||||
|  | ||||
|     my @matching_configs = (); | ||||
|     foreach my $bucket_config (@config) { | ||||
|         push @matching_configs, $bucket_config if $jobName =~ /^$bucket_config->{jobs}$/; | ||||
|     } | ||||
|  | ||||
|     return unless @matching_configs; | ||||
|  | ||||
|     # !!! Maybe should do per-bucket locking? | ||||
|     my $lockhandle = IO::File->new; | ||||
|     open($lockhandle, "+>", $lockfile) or die "Opening $lockfile: $!"; | ||||
|     flock($lockhandle, Fcntl::LOCK_SH) or die "Read-locking $lockfile: $!"; | ||||
|  | ||||
|     my @needed_paths = (); | ||||
|     foreach my $output ($build->buildoutputs) { | ||||
|         push @needed_paths, $output->path; | ||||
|     } | ||||
|  | ||||
|     my %narinfos = (); | ||||
|     my %compression_types = (); | ||||
|     foreach my $bucket_config (@matching_configs) { | ||||
|         my $compression_type = | ||||
|           exists $bucket_config->{compression_type} ? $bucket_config->{compression_type} : "bzip2"; | ||||
|         die "Unsupported compression type $compression_type" unless exists $compressors{$compression_type}; | ||||
|         if (exists $compression_types{$compression_type}) { | ||||
|             push @{$compression_types{$compression_type}}, $bucket_config; | ||||
|         } else { | ||||
|             $compression_types{$compression_type} = [ $bucket_config ]; | ||||
|             $narinfos{$compression_type} = []; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     my $build_id = $build->id; | ||||
|     my $tempdir = File::Temp->newdir("s3-backup-nars-$build_id" . "XXXXX"); | ||||
|  | ||||
|     my %seen = (); | ||||
|     # Upload nars and build narinfos | ||||
|     while (@needed_paths) { | ||||
|         my $path = shift @needed_paths; | ||||
|         next if exists $seen{$path}; | ||||
|         $seen{$path} = undef; | ||||
|         my $hash = substr basename($path), 0, 32; | ||||
|         my ($deriver, $narHash, $time, $narSize, $refs) = queryPathInfo($path, 0); | ||||
|         my $system; | ||||
|         if (defined $deriver and isValidPath($deriver)) { | ||||
|             $system = derivationFromPath($deriver)->{platform}; | ||||
|         } | ||||
|         foreach my $reference (@{$refs}) { | ||||
|             push @needed_paths, $reference; | ||||
|         } | ||||
|         while (my ($compression_type, $configs) = each %compression_types) { | ||||
|             my @incomplete_buckets = (); | ||||
|             # Don't do any work if all the buckets have this path | ||||
|             foreach my $bucket_config (@{$configs}) { | ||||
|                 my $bucket = $client->bucket( name => $bucket_config->{name} ); | ||||
|                 my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; | ||||
|                 push @incomplete_buckets, $bucket_config | ||||
|                   unless $bucket->object( key => $prefix . "$hash.narinfo" )->exists; | ||||
|             } | ||||
|             next unless @incomplete_buckets; | ||||
|             my $compressor = $compressors{$compression_type}; | ||||
|             system("$Nix::Config::binDir/nix-store --export $path $compressor > $tempdir/nar") == 0 or die; | ||||
|             my $digest = Digest::SHA->new(256); | ||||
|             $digest->addfile("$tempdir/nar"); | ||||
|             my $file_hash = $digest->hexdigest; | ||||
|             my @stats = stat "$tempdir/nar" or die "Couldn't stat $tempdir/nar"; | ||||
|             my $file_size = $stats[7]; | ||||
|             my $narinfo = ""; | ||||
|             $narinfo .= "StorePath: $path\n"; | ||||
|             $narinfo .= "URL: $hash.nar\n"; | ||||
|             $narinfo .= "Compression: $compression_type\n"; | ||||
|             $narinfo .= "FileHash: sha256:$file_hash\n"; | ||||
|             $narinfo .= "FileSize: $file_size\n"; | ||||
|             $narinfo .= "NarHash: $narHash\n"; | ||||
|             $narinfo .= "NarSize: $narSize\n"; | ||||
|             $narinfo .= "References: " . join(" ", map { basename $_ } @{$refs}) . "\n"; | ||||
|             if (defined $deriver) { | ||||
|                 $narinfo .= "Deriver: " . basename $deriver . "\n"; | ||||
|                 if (defined $system) { | ||||
|                     $narinfo .= "System: $system\n"; | ||||
|                 } | ||||
|             } | ||||
|             push @{$narinfos{$compression_type}}, { hash => $hash, info => $narinfo }; | ||||
|             foreach my $bucket_config (@incomplete_buckets) { | ||||
|                 my $bucket = $client->bucket( name => $bucket_config->{name} ); | ||||
|                 my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; | ||||
|                 my $nar_object = $bucket->object( | ||||
|                     key => $prefix . "$hash.nar", | ||||
|                     content_type => "application/x-nix-archive" | ||||
|                 ); | ||||
|                 $nar_object->put_filename("$tempdir/nar"); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # Upload narinfos | ||||
|     while (my ($compression_type, $infos) = each %narinfos) { | ||||
|         foreach my $bucket_config (@{$compression_types{$compression_type}}) { | ||||
|             foreach my $info (@{$infos}) { | ||||
|                 my $bucket = $client->bucket( name => $bucket_config->{name} ); | ||||
|                 my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; | ||||
|                 my $narinfo_object = $bucket->object( | ||||
|                     key => $prefix . $info->{hash} . ".narinfo", | ||||
|                     content_type => "text/x-nix-narinfo" | ||||
|                 ); | ||||
|                 $narinfo_object->put($info->{info}) unless $narinfo_object->exists; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| 1; | ||||
| @@ -10,6 +10,7 @@ distributable_scripts =				\ | ||||
|   hydra-queue-runner				\ | ||||
|   hydra-server					\ | ||||
|   hydra-update-gc-roots				\ | ||||
|   hydra-s3-backup-collect-garbage		\ | ||||
|   nix-prefetch-git				\ | ||||
|   nix-prefetch-bzr				\ | ||||
|   nix-prefetch-hg | ||||
|   | ||||
							
								
								
									
										58
									
								
								src/script/hydra-s3-backup-collect-garbage
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										58
									
								
								src/script/hydra-s3-backup-collect-garbage
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| #! /var/run/current-system/sw/bin/perl -w | ||||
|  | ||||
| use strict; | ||||
| use File::Basename; | ||||
| use Fcntl; | ||||
| use IO::File; | ||||
| use Net::Amazon::S3; | ||||
| use Net::Amazon::S3::Client; | ||||
| use Nix::Config; | ||||
| use Nix::Store; | ||||
| use Hydra::Model::DB; | ||||
| use Hydra::Helper::Nix; | ||||
|  | ||||
| my $cfg = getHydraConfig()->{s3backup}; | ||||
| my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : (); | ||||
|  | ||||
| exit 0 unless @config; | ||||
|  | ||||
| my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock"; | ||||
| my $lockhandle = IO::File->new; | ||||
| open($lockhandle, ">", $lockfile) or die "Opening $lockfile: $!"; | ||||
| flock($lockhandle, Fcntl::LOCK_EX) or die "Write-locking $lockfile: $!"; | ||||
|  | ||||
| my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) ); | ||||
| my $db = Hydra::Model::DB->new(); | ||||
|  | ||||
| my $gcRootsDir = getGCRootsDir; | ||||
| opendir DIR, $gcRootsDir or die; | ||||
| my @roots = readdir DIR; | ||||
| closedir DIR; | ||||
|  | ||||
| my @actual_roots = (); | ||||
| foreach my $link (@roots) { | ||||
|     next if $link eq "." || $link eq ".."; | ||||
|     push @actual_roots, $Nix::Config::storeDir . "/$link"; | ||||
| } | ||||
|  | ||||
| # Don't delete a nix-cache-info file, if present | ||||
| my %closure = ( "nix-cache-info" => undef ); | ||||
| foreach my $path (computeFSClosure(0, 0, @actual_roots)) { | ||||
|     my $hash = substr basename($path), 0, 32; | ||||
|     $closure{"$hash.narinfo"} = undef; | ||||
|     $closure{"$hash.nar"} = undef; | ||||
| } | ||||
|  | ||||
| foreach my $bucket_config (@config) { | ||||
|     my $bucket = $client->bucket( name => $bucket_config->{name} ); | ||||
|     my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : ""; | ||||
|  | ||||
|     my $cache_stream = $bucket->list({ prefix => $prefix }); | ||||
|     until ($cache_stream->is_done) { | ||||
|         foreach my $object ($cache_stream->items) { | ||||
|             $object->delete unless exists $closure{basename($object->key)}; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| 1; | ||||
| @@ -61,7 +61,7 @@ sub createJobsetWithOneInput { | ||||
|  | ||||
| sub evalSucceeds { | ||||
|     my ($jobset) = @_; | ||||
|     my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("../src/script/hydra-evaluator", $jobset->project->name, $jobset->name)); | ||||
|     my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("hydra-evaluator", $jobset->project->name, $jobset->name)); | ||||
|     chomp $stdout; chomp $stderr; | ||||
|     print STDERR "Evaluation errors for jobset ".$jobset->project->name.":".$jobset->name.": \n".$jobset->errormsg."\n" if $jobset->errormsg; | ||||
|     print STDERR "STDOUT: $stdout\n" if $stdout ne ""; | ||||
| @@ -71,7 +71,7 @@ sub evalSucceeds { | ||||
|  | ||||
| sub runBuild { | ||||
|     my ($build) = @_; | ||||
|     my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("../src/script/hydra-build", $build->id)); | ||||
|     my ($res, $stdout, $stderr) = captureStdoutStderr(60, ("hydra-build", $build->id)); | ||||
|     print "STDERR: $stderr" if $stderr ne ""; | ||||
|     return !$res; | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| let | ||||
|   thisFile = builtins.toFile "default.nix" (builtins.readFile ./default.nix); | ||||
|   builder = builtins.toFile "builder.sh" '' | ||||
|     echo -n ${builtins.readFile ./default.nix} > $out | ||||
|     echo ${thisFile} > $out | ||||
|   ''; | ||||
| in { | ||||
|   job = derivation { | ||||
|   | ||||
							
								
								
									
										4
									
								
								tests/s3-backup-test.config
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								tests/s3-backup-test.config
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| <s3backup> | ||||
| 	jobs = tests:basic:job | ||||
| 	name = hydra | ||||
| </s3backup> | ||||
							
								
								
									
										49
									
								
								tests/s3-backup-test.pl
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										49
									
								
								tests/s3-backup-test.pl
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| use strict; | ||||
| use File::Basename; | ||||
| use Hydra::Model::DB; | ||||
| use Hydra::Helper::Nix; | ||||
| use Nix::Store; | ||||
| use Cwd; | ||||
|  | ||||
| my $db = Hydra::Model::DB->new; | ||||
|  | ||||
| use Test::Simple tests => 6; | ||||
|  | ||||
| $db->resultset('Users')->create({ username => "root", emailaddress => 'root@invalid.org', password => '' }); | ||||
|  | ||||
| $db->resultset('Projects')->create({name => "tests", displayname => "", owner => "root"}); | ||||
| my $project = $db->resultset('Projects')->update_or_create({name => "tests", displayname => "", owner => "root"}); | ||||
| my $jobset = $project->jobsets->create({name => "basic", nixexprinput => "jobs", nixexprpath => "default.nix", emailoverride => ""}); | ||||
|  | ||||
| my $jobsetinput; | ||||
|  | ||||
| $jobsetinput = $jobset->jobsetinputs->create({name => "jobs", type => "path"}); | ||||
| $jobsetinput->jobsetinputalts->create({altnr => 0, value => getcwd . "/jobs"}); | ||||
| system("hydra-evaluator " . $jobset->project->name . " " . $jobset->name); | ||||
|  | ||||
| my $successful_hash; | ||||
| foreach my $build ($jobset->builds->search({finished => 0})) { | ||||
|     system("hydra-build " . $build->id); | ||||
|     my @outputs = $build->buildoutputs->all; | ||||
|     my $hash = substr basename($outputs[0]->path), 0, 32; | ||||
|     if ($build->job->name eq "job") { | ||||
|         ok(-e "/tmp/s3/hydra/$hash.nar", "The nar of a successful matched build is uploaded"); | ||||
|         ok(-e "/tmp/s3/hydra/$hash.narinfo", "The narinfo of a successful matched build is uploaded"); | ||||
|         $successful_hash = $hash; | ||||
|     } | ||||
| } | ||||
|  | ||||
| system("hydra-s3-backup-collect-garbage"); | ||||
| ok(-e "/tmp/s3/hydra/$successful_hash.nar", "The nar of a build that's a root is not removed by gc"); | ||||
| ok(-e "/tmp/s3/hydra/$successful_hash.narinfo", "The narinfo of a build that's a root is not removed by gc"); | ||||
|  | ||||
| my $gcRootsDir = getGCRootsDir; | ||||
| opendir DIR, $gcRootsDir or die; | ||||
| while(readdir DIR) { | ||||
|     next if $_ eq "." or $_ eq ".."; | ||||
|     unlink "$gcRootsDir/$_"; | ||||
| } | ||||
| closedir DIR; | ||||
| system("hydra-s3-backup-collect-garbage"); | ||||
| ok(not -e "/tmp/s3/hydra/$successful_hash.nar", "The nar of a build that's not a root is removed by gc"); | ||||
| ok(not -e "/tmp/s3/hydra/$successful_hash.narinfo", "The narinfo of a build that's not a root is removed by gc"); | ||||
		Reference in New Issue
	
	Block a user