root_of[g.uuid] == current_user.uuid
end
end
+
+ # helper method to get links for given object or uuid
+ helper_method :links_for_object
+ def links_for_object object_or_uuid
+ raise ArgumentError, 'No input argument' unless object_or_uuid
+ preload_links_for_objects([object_or_uuid])
+ uuid = object_or_uuid.is_a?(String) ? object_or_uuid : object_or_uuid.uuid
+ @all_links_for[uuid] ||= []
+ end
+
+ # helper method to preload links for given objects and uuids
+ helper_method :preload_links_for_objects
+ def preload_links_for_objects objects_and_uuids
+ @all_links_for ||= {}
+
+ raise ArgumentError, 'Argument is not an array' unless objects_and_uuids.is_a? Array
+ return @all_links_for if objects_and_uuids.empty?
+
+ uuids = objects_and_uuids.collect { |x| x.is_a?(String) ? x : x.uuid }
+
+ # if already preloaded for all of these uuids, return
+ if not uuids.select { |x| @all_links_for[x].nil? }.any?
+ return @all_links_for
+ end
+
+ uuids.each do |x|
+ @all_links_for[x] = []
+ end
+
+ # TODO: make sure we get every page of results from API server
+ Link.filter([['head_uuid', 'in', uuids]]).each do |link|
+ @all_links_for[link.head_uuid] << link
+ end
+ @all_links_for
+ end
+
+ # helper method to get a certain number of objects of a specific type
+ # this can be used to replace any uses of: "dataclass.limit(n)"
+ helper_method :get_n_objects_of_class
+ def get_n_objects_of_class dataclass, size
+ @objects_map_for ||= {}
+
+ raise ArgumentError, 'Argument is not a data class' unless dataclass.is_a? Class
+ raise ArgumentError, 'Argument is not a valid limit size' unless (size && size>0)
+
+ # if the objects_map_for has a value for this dataclass, and the
+ # size used to retrieve those objects is equal, return it
+ size_key = "#{dataclass.name}_size"
+ if @objects_map_for[dataclass.name] && @objects_map_for[size_key] &&
+ (@objects_map_for[size_key] == size)
+ return @objects_map_for[dataclass.name]
+ end
+
+ @objects_map_for[size_key] = size
+ @objects_map_for[dataclass.name] = dataclass.limit(size)
+ end
+
+ # helper method to get collections for the given uuid
+ helper_method :collections_for_object
+ def collections_for_object uuid
+ raise ArgumentError, 'No input argument' unless uuid
+ preload_collections_for_objects([uuid])
+ @all_collections_for[uuid] ||= []
+ end
+
+ # helper method to preload collections for the given uuids
+ helper_method :preload_collections_for_objects
+ def preload_collections_for_objects uuids
+ @all_collections_for ||= {}
+
+ raise ArgumentError, 'Argument is not an array' unless uuids.is_a? Array
+ return @all_collections_for if uuids.empty?
+
+ # if already preloaded for all of these uuids, return
+ if not uuids.select { |x| @all_collections_for[x].nil? }.any?
+ return @all_collections_for
+ end
+
+ uuids.each do |x|
+ @all_collections_for[x] = []
+ end
+
+ # TODO: make sure we get every page of results from API server
+ Collection.where(uuid: uuids).each do |collection|
+ @all_collections_for[collection.uuid] << collection
+ end
+ @all_collections_for
+ end
+
+ # helper method to get log collections for the given log
+ helper_method :log_collections_for_object
+ def log_collections_for_object log
+ raise ArgumentError, 'No input argument' unless log
+
+ preload_log_collections_for_objects([log])
+
+ uuid = log
+ fixup = /([a-f0-9]{32}\+\d+)(\+?.*)/.match(log)
+ if fixup && fixup.size>1
+ uuid = fixup[1]
+ end
+
+ @all_log_collections_for[uuid] ||= []
+ end
+
+ # helper method to preload collections for the given uuids
+ helper_method :preload_log_collections_for_objects
+ def preload_log_collections_for_objects logs
+ @all_log_collections_for ||= {}
+
+ raise ArgumentError, 'Argument is not an array' unless logs.is_a? Array
+ return @all_log_collections_for if logs.empty?
+
+ uuids = []
+ logs.each do |log|
+ fixup = /([a-f0-9]{32}\+\d+)(\+?.*)/.match(log)
+ if fixup && fixup.size>1
+ uuids << fixup[1]
+ else
+ uuids << log
+ end
+ end
+
+ # if already preloaded for all of these uuids, return
+ if not uuids.select { |x| @all_log_collections_for[x].nil? }.any?
+ return @all_log_collections_for
+ end
+
+ uuids.each do |x|
+ @all_log_collections_for[x] = []
+ end
+
+ # TODO: make sure we get every page of results from API server
+ Collection.where(uuid: uuids).each do |collection|
+ @all_log_collections_for[collection.uuid] << collection
+ end
+ @all_log_collections_for
+ end
+
+ # helper method to get object of a given dataclass and uuid
+ helper_method :object_for_dataclass
+ def object_for_dataclass dataclass, uuid
+ raise ArgumentError, 'No input argument dataclass' unless (dataclass && uuid)
+ preload_objects_for_dataclass(dataclass, [uuid])
+ @objects_for[uuid]
+ end
+
+ # helper method to preload objects for given dataclass and uuids
+ helper_method :preload_objects_for_dataclass
+ def preload_objects_for_dataclass dataclass, uuids
+ @objects_for ||= {}
+
+ raise ArgumentError, 'Argument is not a data class' unless dataclass.is_a? Class
+ raise ArgumentError, 'Argument is not an array' unless uuids.is_a? Array
+
+ return @objects_for if uuids.empty?
+
+ # if already preloaded for all of these uuids, return
+ if not uuids.select { |x| @objects_for[x].nil? }.any?
+ return @objects_for
+ end
+
+ dataclass.where(uuid: uuids).each do |obj|
+ @objects_for[obj.uuid] = obj
+ end
+ @objects_for
+ end
+
end
link_name = attrvalue.friendly_link_name
else
begin
- link_name = resource_class.find(link_uuid).friendly_link_name
+ if resource_class.name == 'Collection'
+ link_name = collections_for_object(link_uuid).andand.first.andand.friendly_link_name
+ else
+ link_name = object_for_dataclass(resource_class, link_uuid).andand.friendly_link_name
+ end
rescue RuntimeError
# If that lookup failed, the link will too. So don't make one.
return attrvalue
link_name = "#{resource_class.to_s}: #{link_name}"
end
if !opts[:no_tags] and resource_class == Collection
- Link.where(head_uuid: link_uuid, link_class: ["tag", "identifier"]).each do |tag|
- link_name += ' <span class="label label-info">' + html_escape(tag.name) + '</span>'
+ links_for_object(link_uuid).each do |tag|
+ if tag.link_class.in? ["tag", "identifier"]
+ link_name += ' <span class="label label-info">' + html_escape(tag.name) + '</span>'
+ end
end
end
if opts[:thumbnail] and resource_class == Collection
# add an image thumbnail if the collection consists of a single image file.
- Collection.where(uuid: link_uuid).each do |c|
+ collections_for_object(link_uuid).each do |c|
if c.files.length == 1 and CollectionsHelper::is_image c.files.first[1]
link_name += " "
link_name += image_tag "#{url_for c}/#{CollectionsHelper::file_path c.files.first}", style: "height: 4em; width: auto"
dn += '[value]'
end
+ # preload data
+ preload_uuids = []
+ items = []
selectables = []
+
attrtext = attrvalue
if dataclass and dataclass.is_a? Class
+ objects = get_n_objects_of_class dataclass, 10
+ objects.each do |item|
+ items << item
+ preload_uuids << item.uuid
+ end
if attrvalue and !attrvalue.empty?
- Link.where(head_uuid: attrvalue, link_class: ["tag", "identifier"]).each do |tag|
- attrtext += " [#{tag.name}]"
+ preload_uuids << attrvalue
+ end
+ preload_links_for_objects preload_uuids
+
+ if attrvalue and !attrvalue.empty?
+ links_for_object(attrvalue).each do |link|
+ if link.link_class.in? ["tag", "identifier"]
+ attrtext += " [#{link.name}]"
+ end
end
selectables.append({name: attrtext, uuid: attrvalue, type: dataclass.to_s})
end
- #dataclass.where(uuid: attrvalue).each do |item|
- # selectables.append({name: item.uuid, uuid: item.uuid, type: dataclass.to_s})
- #end
itemuuids = []
- dataclass.limit(10).each do |item|
+ items.each do |item|
itemuuids << item.uuid
selectables.append({name: item.uuid, uuid: item.uuid, type: dataclass.to_s})
end
- Link.where(head_uuid: itemuuids, link_class: ["tag", "identifier"]).each do |tag|
- selectables.each do |selectable|
- if selectable['uuid'] == tag.head_uuid
- selectable['name'] += ' [' + tag.name + ']'
+
+ itemuuids.each do |itemuuid|
+ links_for_object(itemuuid).each do |link|
+ if link.link_class.in? ["tag", "identifier"]
+ selectables.each do |selectable|
+ if selectable['uuid'] == link.head_uuid
+ selectable['name'] += ' [' + link.name + ']'
+ end
+ end
end
end
end
<% outgoing = Link.where(tail_uuid: @object.uuid) %>
<% incoming = Link.where(head_uuid: @object.uuid) %>
+<%
+ preload_uuids = []
+ preload_head_uuids = []
+ outgoing.results.each do |link|
+ preload_uuids << link.uuid
+ preload_uuids << link.head_uuid
+ preload_head_uuids << link.head_uuid
+ end
+ preload_collections_for_objects preload_uuids
+ preload_links_for_objects preload_head_uuids
+%>
+
<h3>Metadata about this object</h3>
<% if outgoing.items_available > 0 %>
<table class="table topalign">
<th>Progress</th>
</tr>
+ <%# Preload collections, logs, and pipeline instance objects %>
+ <%
+ collection_uuids = []
+ log_uuids = []
+ @my_jobs[0..6].each do |j|
+ collection_uuids << j.output
+ log_uuids << j.log
+ end
+
+ @my_collections[0..6].each do |c|
+ collection_uuids << c.uuid
+ end
+
+ preload_collections_for_objects collection_uuids
+ preload_log_collections_for_objects log_uuids
+
+ pi_uuids = []
+ @my_pipelines[0..6].each do |p|
+ pi_uuids << p.uuid
+ end
+ resource_class = resource_class_for_uuid(pi_uuids.first, friendly_name: true)
+ preload_objects_for_dataclass resource_class, pi_uuids
+ %>
+
<% @my_jobs[0..6].each do |j| %>
<tr data-object-uuid="<%= j.uuid %>">
<td>
<td>
<small>
<% if j.success and j.output %>
-
<a href="<%= collection_path(j.output) %>">
- <% Collection.limit(1).where(uuid: j.output).each do |c| %>
- <% c.files.each do |file| %>
- <%= file[0] == '.' ? file[1] : "#{file[0]}/#{file[1]}" %>
- <% end %>
- <% end %>
- </a>
-
- <% end %>
- </small>
-</td>
+ <% collections = collections_for_object(j.output) %>
+ <% if collections && !collections.empty? %>
+ <% c = collections.first %>
+ <% c.files.each do |file| %>
+ <%= file[0] == '.' ? file[1] : "#{file[0]}/#{file[1]}" %>
+ <% end %>
+ <% end %>
+ </a>
+ <% end %>
+ </small>
+ </td>
<td>
<small>
<% if j.log %>
- <% fixup = /([a-f0-9]{32}\+\d+)(\+?.*)/.match(j.log)%>
- <% Collection.limit(1).where(uuid: fixup[1]).each do |c| %>
+ <% log_collections = log_collections_for_object(j.log) %>
+ <% if log_collections && !log_collections.empty? %>
+ <% c = log_collections.first %>
<% c.files.each do |file| %>
<a href="<%= collection_path(j.log) %>/<%= file[1] %>?disposition=inline&size=<%= file[2] %>">Log</a>
<% end %>
--- /dev/null
+require 'test_helper'
+
+class ApplicationControllerTest < ActionController::TestCase
+
+ setup do
+ @user_dataclass = ArvadosBase.resource_class_for_uuid(api_fixture('users')['active']['uuid'])
+ end
+
+ test "links for object" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ link_head_uuid = api_fixture('links')['foo_file_readable_by_active']['head_uuid']
+
+ links = ac.send :links_for_object, link_head_uuid
+
+ assert links, 'Expected links'
+ assert links.is_a?(Array), 'Expected an array'
+ assert links.size > 0, 'Expected at least one link'
+ assert links[0][:uuid], 'Expected uuid for the head_link'
+ end
+
+ test "preload links for objects and uuids" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ link1_head_uuid = api_fixture('links')['foo_file_readable_by_active']['head_uuid']
+ link2_uuid = api_fixture('links')['bar_file_readable_by_active']['uuid']
+ link3_head_uuid = api_fixture('links')['bar_file_readable_by_active']['head_uuid']
+
+ link2_object = User.find(api_fixture('users')['active']['uuid'])
+ link2_object_uuid = link2_object['uuid']
+
+ uuids = [link1_head_uuid, link2_object, link3_head_uuid]
+ links = ac.send :preload_links_for_objects, uuids
+
+ assert links, 'Expected links'
+ assert links.is_a?(Hash), 'Expected a hash'
+ assert links.size == 3, 'Expected two objects in the preloaded links hash'
+ assert links[link1_head_uuid], 'Expected links for the passed in link head_uuid'
+ assert links[link2_object_uuid], 'Expected links for the passed in object uuid'
+ assert links[link3_head_uuid], 'Expected links for the passed in link head_uuid'
+
+ # invoke again for this same input. this time, the preloaded data will be returned
+ links = ac.send :preload_links_for_objects, uuids
+ assert links, 'Expected links'
+ assert links.is_a?(Hash), 'Expected a hash'
+ assert links.size == 3, 'Expected two objects in the preloaded links hash'
+ assert links[link1_head_uuid], 'Expected links for the passed in link head_uuid'
+ end
+
+ [ [:preload_links_for_objects, [] ],
+ [:preload_collections_for_objects, [] ],
+ [:preload_log_collections_for_objects, [] ],
+ [:preload_objects_for_dataclass, [] ],
+ ].each do |input|
+ test "preload data for empty array input #{input}" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ if input[0] == :preload_objects_for_dataclass
+ objects = ac.send input[0], @user_dataclass, input[1]
+ else
+ objects = ac.send input[0], input[1]
+ end
+
+ assert objects, 'Expected objects'
+ assert objects.is_a?(Hash), 'Expected a hash'
+ assert objects.size == 0, 'Expected no objects in the preloaded hash'
+ end
+ end
+
+ [ [:preload_links_for_objects, 'input not an array'],
+ [:preload_links_for_objects, nil],
+ [:links_for_object, nil],
+ [:preload_collections_for_objects, 'input not an array'],
+ [:preload_collections_for_objects, nil],
+ [:collections_for_object, nil],
+ [:preload_log_collections_for_objects, 'input not an array'],
+ [:preload_log_collections_for_objects, nil],
+ [:log_collections_for_object, nil],
+ [:preload_objects_for_dataclass, 'input not an array'],
+ [:preload_objects_for_dataclass, nil],
+ [:object_for_dataclass, 'some_dataclass', nil],
+ [:object_for_dataclass, nil, 'some_uuid'],
+ ].each do |input|
+ test "preload data for wrong type input #{input}" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ if input[0] == :object_for_dataclass
+ assert_raise ArgumentError do
+ ac.send input[0], input[1], input[2]
+ end
+ else
+ assert_raise ArgumentError do
+ ac.send input[0], input[1]
+ end
+ end
+ end
+ end
+
+ [ [:links_for_object, 'no-such-uuid' ],
+ [:collections_for_object, 'no-such-uuid' ],
+ [:log_collections_for_object, 'no-such-uuid' ],
+ [:object_for_dataclass, 'no-such-uuid' ],
+ ].each do |input|
+ test "get data for no such uuid #{input}" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ if input[0] == :object_for_dataclass
+ object = ac.send input[0], @user_dataclass, input[1]
+ assert_not object, 'Expected no object'
+ else
+ objects = ac.send input[0], input[1]
+ assert objects, 'Expected objects'
+ assert objects.is_a?(Array), 'Expected a array'
+ end
+ end
+ end
+
+ test "get 10 objects of data class user" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ objects = ac.send :get_n_objects_of_class, @user_dataclass, 10
+
+ assert objects, 'Expected objects'
+ assert objects.is_a?(ArvadosResourceList), 'Expected an ArvadosResourceList'
+
+ first_object = objects.first
+ assert first_object, 'Expected at least one object'
+ assert_equal 'User', first_object.class.name, 'Expected user object'
+
+ # invoke it again. this time, the preloaded info will be returned
+ objects = ac.send :get_n_objects_of_class, @user_dataclass, 10
+ assert objects, 'Expected objects'
+ assert_equal 'User', objects.first.class.name, 'Expected user object'
+ end
+
+ [ ['User', 10],
+ [nil, 10],
+ [@user_dataclass, 0],
+ [@user_dataclass, -1],
+ [@user_dataclass, nil] ].each do |input|
+ test "get_n_objects for incorrect input #{input}" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ assert_raise ArgumentError do
+ ac.send :get_n_objects_of_class, input[0], input[1]
+ end
+ end
+ end
+
+ test "collections for object" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ uuid = api_fixture('collections')['foo_file']['uuid']
+
+ collections = ac.send :collections_for_object, uuid
+
+ assert collections, 'Expected collections'
+ assert collections.is_a?(Array), 'Expected an array'
+ assert collections.size == 1, 'Expected one collection object'
+ assert_equal collections[0][:uuid], uuid, 'Expected uuid not found in collections'
+ end
+
+ test "preload collections for given uuids" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ uuid1 = api_fixture('collections')['foo_file']['uuid']
+ uuid2 = api_fixture('collections')['bar_file']['uuid']
+
+ uuids = [uuid1, uuid2]
+ collections = ac.send :preload_collections_for_objects, uuids
+
+ assert collections, 'Expected collection'
+ assert collections.is_a?(Hash), 'Expected a hash'
+ assert collections.size == 2, 'Expected two objects in the preloaded collection hash'
+ assert collections[uuid1], 'Expected collections for the passed in uuid'
+ assert_equal collections[uuid1].size, 1, 'Expected one collection for the passed in uuid'
+ assert collections[uuid2], 'Expected collections for the passed in uuid'
+ assert_equal collections[uuid2].size, 1, 'Expected one collection for the passed in uuid'
+
+ # invoke again for this same input. this time, the preloaded data will be returned
+ collections = ac.send :preload_collections_for_objects, uuids
+ assert collections, 'Expected collection'
+ assert collections.is_a?(Hash), 'Expected a hash'
+ assert collections.size == 2, 'Expected two objects in the preloaded collection hash'
+ assert collections[uuid1], 'Expected collections for the passed in uuid'
+ end
+
+ test "log collections for object" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ uuid = api_fixture('logs')['log4']['object_uuid']
+
+ collections = ac.send :log_collections_for_object, uuid
+
+ assert collections, 'Expected collections'
+ assert collections.is_a?(Array), 'Expected an array'
+ assert collections.size == 1, 'Expected one collection object'
+ assert_equal collections[0][:uuid], uuid, 'Expected uuid not found in collections'
+ end
+
+ test "preload log collections for given uuids" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ uuid1 = api_fixture('logs')['log4']['object_uuid']
+ uuid2 = api_fixture('collections')['bar_file']['uuid']
+
+ uuids = [uuid1, uuid2]
+ collections = ac.send :preload_log_collections_for_objects, uuids
+
+ assert collections, 'Expected collection'
+ assert collections.is_a?(Hash), 'Expected a hash'
+ assert collections.size == 2, 'Expected two objects in the preloaded collection hash'
+ assert collections[uuid1], 'Expected collections for the passed in uuid'
+ assert_equal collections[uuid1].size, 1, 'Expected one collection for the passed in uuid'
+ assert collections[uuid2], 'Expected collections for the passed in uuid'
+ assert_equal collections[uuid2].size, 1, 'Expected one collection for the passed in uuid'
+
+ # invoke again for this same input. this time, the preloaded data will be returned
+ collections = ac.send :preload_log_collections_for_objects, uuids
+ assert collections, 'Expected collection'
+ assert collections.is_a?(Hash), 'Expected a hash'
+ assert collections.size == 2, 'Expected two objects in the preloaded collection hash'
+ assert collections[uuid1], 'Expected collections for the passed in uuid'
+ end
+
+ test "object for dataclass" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ dataclass = ArvadosBase.resource_class_for_uuid(api_fixture('jobs')['running']['uuid'])
+ uuid = api_fixture('jobs')['running']['uuid']
+
+ obj = ac.send :object_for_dataclass, dataclass, uuid
+
+ assert obj, 'Expected object'
+ assert 'Job', obj.class
+ assert_equal uuid, obj['uuid'], 'Expected uuid not found'
+ assert_equal api_fixture('jobs')['running']['script_version'], obj['script_version'],
+ 'Expected script_version not found'
+ end
+
+ test "preload objects for dataclass" do
+ use_token :active
+
+ ac = ApplicationController.new
+
+ dataclass = ArvadosBase.resource_class_for_uuid(api_fixture('jobs')['running']['uuid'])
+
+ uuid1 = api_fixture('jobs')['running']['uuid']
+ uuid2 = api_fixture('jobs')['running_cancelled']['uuid']
+
+ uuids = [uuid1, uuid2]
+ users = ac.send :preload_objects_for_dataclass, dataclass, uuids
+
+ assert users, 'Expected objects'
+ assert users.is_a?(Hash), 'Expected a hash'
+
+ assert users.size == 2, 'Expected two objects in the preloaded hash'
+ assert users[uuid1], 'Expected user object for the passed in uuid'
+ assert users[uuid2], 'Expected user object for the passed in uuid'
+
+ # invoke again for this same input. this time, the preloaded data will be returned
+ users = ac.send :preload_objects_for_dataclass, dataclass, uuids
+ assert users, 'Expected objects'
+ assert users.is_a?(Hash), 'Expected a hash'
+ assert users.size == 2, 'Expected two objects in the preloaded hash'
+
+ # invoke again for this with one more uuid
+ uuids << api_fixture('jobs')['foobar']['uuid']
+ users = ac.send :preload_objects_for_dataclass, dataclass, uuids
+ assert users, 'Expected objects'
+ assert users.is_a?(Hash), 'Expected a hash'
+ assert users.size == 3, 'Expected two objects in the preloaded hash'
+ end
+
+end
elsif ['less', 'check'].index @sub then
# wh* shims
exec `which wh#{@sub}`.strip, *ARGV
+ elsif @sub == 'docker'
+ exec `which arv-keepdocker`.strip, *ARGV
else
puts "Usage: \n" +
"#{$0} keep ls\n" +
"#{$0} keep get\n" +
"#{$0} keep put\n" +
"#{$0} keep less\n" +
- "#{$0} keep check\n"
+ "#{$0} keep check\n" +
+ "#{$0} keep docker\n"
end
abort
when 'pipeline'
$command .= "&& exec arv-mount --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
if ($docker_image)
{
- $command .= "crunchstat -cgroup-parent=/sys/fs/cgroup/lxc -cgroup-cid=$ENV{TASK_WORK}/docker.cid -poll=1000 ";
- $command .= "$docker_bin run -i -a stdin -a stdout -a stderr -cidfile=$ENV{TASK_WORK}/docker.cid ";
+ $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$ENV{TASK_WORK}/docker.cid -poll=10000 ";
+ $command .= "$docker_bin run -i -a stdin -a stdout -a stderr --cidfile=$ENV{TASK_WORK}/docker.cid ";
# Dynamically configure the container to use the host system as its
# DNS server. Get the host's global addresses from the ip command,
# and turn them into docker --dns options using gawk.
}
$command .= "\Q$docker_image\E ";
} else {
- $command .= "crunchstat -cgroup-path=/sys/fs/cgroup "
+ $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 "
}
+ $command .= "stdbuf -o0 -e0 ";
$command .= "$ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
my @execargs = ('bash', '-c', $command);
srun (\@srunargs, \@execargs, undef, $build_script_to_send);
or die "failed to get collated manifest: $!";
# Read the original manifest, and strip permission hints from it,
# so we can put the result in a Collection.
- my @manifest_lines = ();
+ my @stripped_manifest_lines = ();
+ my $orig_manifest_text = '';
while (my $manifest_line = <$orig_manifest>) {
+ $orig_manifest_text .= $manifest_line;
my @words = split(/ /, $manifest_line, -1);
foreach my $ii (0..$#words) {
if ($words[$ii] =~ /^[0-9a-f]{32}\+/) {
$words[$ii] =~ s/\+A[0-9a-f]{40}@[0-9a-f]{8}\b//;
}
}
- push(@manifest_lines, join(" ", @words));
+ push(@stripped_manifest_lines, join(" ", @words));
}
- my $manifest_text = join("", @manifest_lines);
+ my $stripped_manifest_text = join("", @stripped_manifest_lines);
my $output = $arv->{'collections'}->{'create'}->execute('collection' => {
- 'uuid' => md5_hex($manifest_text),
- 'manifest_text' => $manifest_text,
+ 'uuid' => md5_hex($stripped_manifest_text),
+ 'manifest_text' => $orig_manifest_text,
});
$Job->update_attributes('output' => $output->{uuid});
if ($Job->{'output_is_persistent'}) {
--- /dev/null
+#!/usr/bin/env python
+
+import errno
+import os
+
+def _ignore_error(error):
+ return None
+
+def _raise_error(error):
+ raise error
+
+def make_home_conf_dir(path, mode=None, errors='ignore'):
+ # Make the directory path under the user's home directory, making parent
+ # directories as needed.
+ # If the directory is newly created, and a mode is specified, chmod it
+ # with those permissions.
+ # If there's an error, return None if errors is 'ignore', else raise an
+ # exception.
+ error_handler = _ignore_error if (errors == 'ignore') else _raise_error
+ tilde_path = os.path.join('~', path)
+ abs_path = os.path.expanduser(tilde_path)
+ if abs_path == tilde_path:
+ return error_handler(ValueError("no home directory available"))
+ try:
+ os.makedirs(abs_path)
+ except OSError as error:
+ if error.errno != errno.EEXIST:
+ return error_handler(error)
+ else:
+ if mode is not None:
+ os.chmod(abs_path, mode)
+ return abs_path
--- /dev/null
+#!/usr/bin/env python
+
+import argparse
+import errno
+import json
+import os
+import subprocess
+import sys
+import tarfile
+import tempfile
+
+from collections import namedtuple
+from stat import *
+
+import arvados
+import arvados.commands._util as arv_cmd
+import arvados.commands.put as arv_put
+
+STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
+
+DockerImage = namedtuple('DockerImage',
+ ['repo', 'tag', 'hash', 'created', 'vsize'])
+
+opt_parser = argparse.ArgumentParser(add_help=False)
+opt_parser.add_argument(
+ '-f', '--force', action='store_true', default=False,
+ help="Re-upload the image even if it already exists on the server")
+
+_group = opt_parser.add_mutually_exclusive_group()
+_group.add_argument(
+ '--pull', action='store_true', default=True,
+ help="Pull the latest image from Docker repositories first (default)")
+_group.add_argument(
+ '--no-pull', action='store_false', dest='pull',
+ help="Don't pull images from Docker repositories")
+
+opt_parser.add_argument(
+ 'image',
+ help="Docker image to upload, as a repository name or hash")
+opt_parser.add_argument(
+ 'tag', nargs='?', default='latest',
+ help="Tag of the Docker image to upload (default 'latest')")
+
+arg_parser = argparse.ArgumentParser(
+ description="Upload a Docker image to Arvados",
+ parents=[opt_parser, arv_put.run_opts])
+
+class DockerError(Exception):
+ pass
+
+
+def popen_docker(cmd, *args, **kwargs):
+ manage_stdin = ('stdin' not in kwargs)
+ kwargs.setdefault('stdin', subprocess.PIPE)
+ kwargs.setdefault('stdout', sys.stderr)
+ try:
+ docker_proc = subprocess.Popen(['docker.io'] + cmd, *args, **kwargs)
+ except OSError: # No docker.io in $PATH
+ docker_proc = subprocess.Popen(['docker'] + cmd, *args, **kwargs)
+ if manage_stdin:
+ docker_proc.stdin.close()
+ return docker_proc
+
+def check_docker(proc, description):
+ proc.wait()
+ if proc.returncode != 0:
+ raise DockerError("docker {} returned status code {}".
+ format(description, proc.returncode))
+
+def docker_images():
+ # Yield a DockerImage tuple for each installed image.
+ list_proc = popen_docker(['images', '--no-trunc'], stdout=subprocess.PIPE)
+ list_output = iter(list_proc.stdout)
+ next(list_output) # Ignore the header line
+ for line in list_output:
+ words = line.split()
+ size_index = len(words) - 2
+ repo, tag, imageid = words[:3]
+ ctime = ' '.join(words[3:size_index])
+ vsize = ' '.join(words[size_index:])
+ yield DockerImage(repo, tag, imageid, ctime, vsize)
+ list_proc.stdout.close()
+ check_docker(list_proc, "images")
+
+def find_image_hashes(image_search, image_tag=None):
+ # Given one argument, search for Docker images with matching hashes,
+ # and return their full hashes in a set.
+ # Given two arguments, also search for a Docker image with the
+ # same repository and tag. If one is found, return its hash in a
+ # set; otherwise, fall back to the one-argument hash search.
+ # Returns None if no match is found, or a hash search is ambiguous.
+ hash_search = image_search.lower()
+ hash_matches = set()
+ for image in docker_images():
+ if (image.repo == image_search) and (image.tag == image_tag):
+ return set([image.hash])
+ elif image.hash.startswith(hash_search):
+ hash_matches.add(image.hash)
+ return hash_matches
+
+def find_one_image_hash(image_search, image_tag=None):
+ hashes = find_image_hashes(image_search, image_tag)
+ hash_count = len(hashes)
+ if hash_count == 1:
+ return hashes.pop()
+ elif hash_count == 0:
+ raise DockerError("no matching image found")
+ else:
+ raise DockerError("{} images match {}".format(hash_count, image_search))
+
+def stat_cache_name(image_file):
+ return getattr(image_file, 'name', image_file) + '.stat'
+
+def pull_image(image_name, image_tag):
+ check_docker(popen_docker(['pull', '-t', image_tag, image_name]), "pull")
+
+def save_image(image_hash, image_file):
+ # Save the specified Docker image to image_file, then try to save its
+ # stats so we can try to resume after interruption.
+ check_docker(popen_docker(['save', image_hash], stdout=image_file),
+ "save")
+ image_file.flush()
+ try:
+ with open(stat_cache_name(image_file), 'w') as statfile:
+ json.dump(tuple(os.fstat(image_file.fileno())), statfile)
+ except STAT_CACHE_ERRORS:
+ pass # We won't resume from this cache. No big deal.
+
+def prep_image_file(filename):
+ # Return a file object ready to save a Docker image,
+ # and a boolean indicating whether or not we need to actually save the
+ # image (False if a cached save is available).
+ cache_dir = arv_cmd.make_home_conf_dir(
+ os.path.join('.cache', 'arvados', 'docker'), 0o700)
+ if cache_dir is None:
+ image_file = tempfile.NamedTemporaryFile(suffix='.tar')
+ need_save = True
+ else:
+ file_path = os.path.join(cache_dir, filename)
+ try:
+ with open(stat_cache_name(file_path)) as statfile:
+ prev_stat = json.load(statfile)
+ now_stat = os.stat(file_path)
+ need_save = any(prev_stat[field] != now_stat[field]
+ for field in [ST_MTIME, ST_SIZE])
+ except STAT_CACHE_ERRORS + (AttributeError, IndexError):
+ need_save = True # We couldn't compare against old stats
+ image_file = open(file_path, 'w+b' if need_save else 'rb')
+ return image_file, need_save
+
+def make_link(link_class, link_name, **link_attrs):
+ link_attrs.update({'link_class': link_class, 'name': link_name})
+ return arvados.api('v1').links().create(body=link_attrs).execute()
+
+def main(arguments=None):
+ args = arg_parser.parse_args(arguments)
+
+ # Pull the image if requested, unless the image is specified as a hash
+ # that we already have.
+ if args.pull and not find_image_hashes(args.image):
+ pull_image(args.image, args.tag)
+
+ try:
+ image_hash = find_one_image_hash(args.image, args.tag)
+ except DockerError as error:
+ print >>sys.stderr, "arv-keepdocker:", error.message
+ sys.exit(1)
+ if not args.force:
+ # Abort if this image is already in Arvados.
+ existing_links = arvados.api('v1').links().list(
+ filters=[['link_class', '=', 'docker_image_hash'],
+ ['name', '=', image_hash]]).execute()['items']
+ if existing_links:
+ message = [
+ "arv-keepdocker: Image {} already stored in collection(s):".
+ format(image_hash)]
+ message.extend(link['head_uuid'] for link in existing_links)
+ print >>sys.stderr, "\n".join(message)
+ sys.exit(0)
+
+ # Open a file for the saved image, and write it if needed.
+ outfile_name = '{}.tar'.format(image_hash)
+ image_file, need_save = prep_image_file(outfile_name)
+ if need_save:
+ save_image(image_hash, image_file)
+
+ # Call arv-put with switches we inherited from it
+ # (a.k.a., switches that aren't our own).
+ put_args = opt_parser.parse_known_args(arguments)[1]
+ coll_uuid = arv_put.main(
+ put_args + ['--filename', outfile_name, image_file.name]).strip()
+
+ # Read the image metadata and make Arvados links from it.
+ image_file.seek(0)
+ image_tar = tarfile.open(fileobj=image_file)
+ json_file = image_tar.extractfile(image_tar.getmember(image_hash + '/json'))
+ image_metadata = json.load(json_file)
+ json_file.close()
+ image_tar.close()
+ link_base = {'head_uuid': coll_uuid, 'properties': {}}
+ if 'created' in image_metadata:
+ link_base['properties']['image_timestamp'] = image_metadata['created']
+
+ make_link('docker_image_hash', image_hash, **link_base)
+ if not image_hash.startswith(args.image.lower()):
+ make_link('docker_image_repository', args.image, **link_base)
+ make_link('docker_image_tag', args.tag, **link_base)
+
+ # Clean up.
+ image_file.close()
+ for filename in [stat_cache_name(image_file), image_file.name]:
+ try:
+ os.unlink(filename)
+ except OSError as error:
+ if error.errno != errno.ENOENT:
+ raise
+
+if __name__ == '__main__':
+ main()
import sys
import tempfile
-CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
-
-def parse_arguments(arguments):
- parser = argparse.ArgumentParser(
- description='Copy data from the local filesystem to Keep.')
-
- parser.add_argument('paths', metavar='path', type=str, nargs='*',
- help="""
- Local file or directory. Default: read from standard input.
- """)
-
- parser.add_argument('--max-manifest-depth', type=int, metavar='N',
- default=-1, help="""
- Maximum depth of directory tree to represent in the manifest
- structure. A directory structure deeper than this will be represented
- as a single stream in the manifest. If N=0, the manifest will contain
- a single stream. Default: -1 (unlimited), i.e., exactly one manifest
- stream per filesystem directory that contains files.
- """)
-
- group = parser.add_mutually_exclusive_group()
-
- group.add_argument('--as-stream', action='store_true', dest='stream',
- help="""
- Synonym for --stream.
- """)
-
- group.add_argument('--stream', action='store_true',
- help="""
- Store the file content and display the resulting manifest on
- stdout. Do not write the manifest to Keep or save a Collection object
- in Arvados.
- """)
-
- group.add_argument('--as-manifest', action='store_true', dest='manifest',
- help="""
- Synonym for --manifest.
- """)
-
- group.add_argument('--in-manifest', action='store_true', dest='manifest',
- help="""
- Synonym for --manifest.
- """)
-
- group.add_argument('--manifest', action='store_true',
- help="""
- Store the file data and resulting manifest in Keep, save a Collection
- object in Arvados, and display the manifest locator (Collection uuid)
- on stdout. This is the default behavior.
- """)
-
- group.add_argument('--as-raw', action='store_true', dest='raw',
- help="""
- Synonym for --raw.
- """)
-
- group.add_argument('--raw', action='store_true',
- help="""
- Store the file content and display the data block locators on stdout,
- separated by commas, with a trailing newline. Do not store a
- manifest.
- """)
+import arvados.commands._util as arv_cmd
- parser.add_argument('--use-filename', type=str, default=None,
- dest='filename', help="""
- Synonym for --filename.
- """)
-
- parser.add_argument('--filename', type=str, default=None,
- help="""
- Use the given filename in the manifest, instead of the name of the
- local file. This is useful when "-" or "/dev/stdin" is given as an
- input file. It can be used only if there is exactly one path given and
- it is not a directory. Implies --manifest.
- """)
-
- group = parser.add_mutually_exclusive_group()
- group.add_argument('--progress', action='store_true',
- help="""
- Display human-readable progress on stderr (bytes and, if possible,
- percentage of total data size). This is the default behavior when
- stderr is a tty.
- """)
-
- group.add_argument('--no-progress', action='store_true',
- help="""
- Do not display human-readable progress on stderr, even if stderr is a
- tty.
- """)
-
- group.add_argument('--batch-progress', action='store_true',
- help="""
- Display machine-readable progress on stderr (bytes and, if known,
- total data size).
- """)
+CAUGHT_SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]
- group = parser.add_mutually_exclusive_group()
- group.add_argument('--resume', action='store_true', default=True,
- help="""
- Continue interrupted uploads from cached state (default).
- """)
- group.add_argument('--no-resume', action='store_false', dest='resume',
- help="""
- Do not continue interrupted uploads from cached state.
- """)
+upload_opts = argparse.ArgumentParser(add_help=False)
+
+upload_opts.add_argument('paths', metavar='path', type=str, nargs='*',
+ help="""
+Local file or directory. Default: read from standard input.
+""")
+
+upload_opts.add_argument('--max-manifest-depth', type=int, metavar='N',
+ default=-1, help="""
+Maximum depth of directory tree to represent in the manifest
+structure. A directory structure deeper than this will be represented
+as a single stream in the manifest. If N=0, the manifest will contain
+a single stream. Default: -1 (unlimited), i.e., exactly one manifest
+stream per filesystem directory that contains files.
+""")
+
+_group = upload_opts.add_mutually_exclusive_group()
+
+_group.add_argument('--as-stream', action='store_true', dest='stream',
+ help="""
+Synonym for --stream.
+""")
+
+_group.add_argument('--stream', action='store_true',
+ help="""
+Store the file content and display the resulting manifest on
+stdout. Do not write the manifest to Keep or save a Collection object
+in Arvados.
+""")
+
+_group.add_argument('--as-manifest', action='store_true', dest='manifest',
+ help="""
+Synonym for --manifest.
+""")
+
+_group.add_argument('--in-manifest', action='store_true', dest='manifest',
+ help="""
+Synonym for --manifest.
+""")
+
+_group.add_argument('--manifest', action='store_true',
+ help="""
+Store the file data and resulting manifest in Keep, save a Collection
+object in Arvados, and display the manifest locator (Collection uuid)
+on stdout. This is the default behavior.
+""")
+
+_group.add_argument('--as-raw', action='store_true', dest='raw',
+ help="""
+Synonym for --raw.
+""")
+
+_group.add_argument('--raw', action='store_true',
+ help="""
+Store the file content and display the data block locators on stdout,
+separated by commas, with a trailing newline. Do not store a
+manifest.
+""")
+
+upload_opts.add_argument('--use-filename', type=str, default=None,
+ dest='filename', help="""
+Synonym for --filename.
+""")
+
+upload_opts.add_argument('--filename', type=str, default=None,
+ help="""
+Use the given filename in the manifest, instead of the name of the
+local file. This is useful when "-" or "/dev/stdin" is given as an
+input file. It can be used only if there is exactly one path given and
+it is not a directory. Implies --manifest.
+""")
+
+run_opts = argparse.ArgumentParser(add_help=False)
+_group = run_opts.add_mutually_exclusive_group()
+_group.add_argument('--progress', action='store_true',
+ help="""
+Display human-readable progress on stderr (bytes and, if possible,
+percentage of total data size). This is the default behavior when
+stderr is a tty.
+""")
+
+_group.add_argument('--no-progress', action='store_true',
+ help="""
+Do not display human-readable progress on stderr, even if stderr is a
+tty.
+""")
+
+_group.add_argument('--batch-progress', action='store_true',
+ help="""
+Display machine-readable progress on stderr (bytes and, if known,
+total data size).
+""")
+
+_group = run_opts.add_mutually_exclusive_group()
+_group.add_argument('--resume', action='store_true', default=True,
+ help="""
+Continue interrupted uploads from cached state (default).
+""")
+_group.add_argument('--no-resume', action='store_false', dest='resume',
+ help="""
+Do not continue interrupted uploads from cached state.
+""")
+
+arg_parser = argparse.ArgumentParser(
+ description='Copy data from the local filesystem to Keep.',
+ parents=[upload_opts, run_opts])
- args = parser.parse_args(arguments)
+def parse_arguments(arguments):
+ args = arg_parser.parse_args(arguments)
if len(args.paths) == 0:
args.paths += ['/dev/stdin']
if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
if args.filename:
- parser.error("""
+ arg_parser.error("""
--filename argument cannot be used when storing a directory or
multiple files.
""")
class ResumeCache(object):
- CACHE_DIR = os.path.expanduser('~/.cache/arvados/arv-put')
+ CACHE_DIR = '.cache/arvados/arv-put'
@classmethod
def setup_user_cache(cls):
- try:
- os.makedirs(cls.CACHE_DIR)
- except OSError as error:
- if error.errno != errno.EEXIST:
- raise
- else:
- os.chmod(cls.CACHE_DIR, 0o700)
+ return arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700)
def __init__(self, file_spec):
self.cache_file = open(file_spec, 'a+')
def exit_signal_handler(sigcode, frame):
sys.exit(-sigcode)
-def main(arguments=None):
+def main(arguments=None, output_to=sys.stdout):
args = parse_arguments(arguments)
if args.progress:
reporter = None
bytes_expected = expected_bytes_for(args.paths)
+ resume_cache = None
try:
- ResumeCache.setup_user_cache()
- resume_cache = ResumeCache(ResumeCache.make_path(args))
+ if ResumeCache.setup_user_cache() is not None:
+ resume_cache = ResumeCache(ResumeCache.make_path(args))
except (IOError, OSError):
- # Couldn't open cache directory/file. Continue without it.
- resume_cache = None
- writer = ArvPutCollectionWriter(resume_cache, reporter, bytes_expected)
+ pass # Couldn't open cache directory/file. Continue without it.
except ResumeCacheConflict:
- print "arv-put: Another process is already uploading this data."
+ output_to.write(
+ "arv-put: Another process is already uploading this data.\n")
sys.exit(1)
+
+ if resume_cache is None:
+ writer = ArvPutCollectionWriter(resume_cache, reporter, bytes_expected)
else:
if not args.resume:
resume_cache.restart()
print >>sys.stderr
if args.stream:
- print writer.manifest_text(),
+ output = writer.manifest_text()
elif args.raw:
- print ','.join(writer.data_locators())
+ output = ','.join(writer.data_locators())
else:
# Register the resulting collection in Arvados.
collection = arvados.api().collections().create(
).execute()
# Print the locator (uuid) of the new collection.
- print collection['uuid']
+ output = collection['uuid']
+
+ output_to.write(output)
+ if not output.endswith('\n'):
+ output_to.write('\n')
for sigcode, orig_handler in orig_signal_handlers.items():
signal.signal(sigcode, orig_handler)
if resume_cache is not None:
resume_cache.destroy()
+ return output
+
if __name__ == '__main__':
main()
from api import *
import config
import arvados.errors
+import arvados.util
class KeepLocator(object):
EPOCH_DATETIME = datetime.datetime.utcfromtimestamp(0)
- HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
def __init__(self, locator_str):
self.size = None
self.permission_hint()]
if s is not None)
- def _is_hex_length(self, s, *size_spec):
- if len(size_spec) == 1:
- good_len = (len(s) == size_spec[0])
- else:
- good_len = (size_spec[0] <= len(s) <= size_spec[1])
- return good_len and self.HEX_RE.match(s)
-
def _make_hex_prop(name, length):
# Build and return a new property with the given name that
# must be a hex string of the given length.
def getter(self):
return getattr(self, data_name)
def setter(self, hex_str):
- if not self._is_hex_length(hex_str, length):
+ if not arvados.util.is_hex(hex_str, length):
raise ValueError("{} must be a {}-digit hex string: {}".
format(name, length, hex_str))
setattr(self, data_name, hex_str)
@perm_expiry.setter
def perm_expiry(self, value):
- if not self._is_hex_length(value, 1, 8):
+ if not arvados.util.is_hex(value, 1, 8):
raise ValueError(
"permission timestamp must be a hex Unix timestamp: {}".
format(value))
import sys
from arvados.collection import *
+HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+
def clear_tmpdir(path=None):
"""
Ensure the given directory (or TASK_TMPDIR if none given)
else:
allfiles += [ent_base]
return allfiles
+
+def is_hex(s, *length_args):
+ """is_hex(s[, length[, max_length]]) -> boolean
+
+ Return True if s is a string of hexadecimal digits.
+ If one length argument is given, the string must contain exactly
+ that number of digits.
+ If two length arguments are given, the string must contain a number of
+ digits between those two lengths, inclusive.
+ Return False otherwise.
+ """
+ num_length_args = len(length_args)
+ if num_length_args > 2:
+ raise ArgumentError("is_hex accepts up to 3 arguments ({} given)".
+ format(1 + num_length_args))
+ elif num_length_args == 2:
+ good_len = (length_args[0] <= len(s) <= length_args[1])
+ elif num_length_args == 1:
+ good_len = (len(s) == length_args[0])
+ else:
+ good_len = True
+ return bool(good_len and HEX_RE.match(s))
--- /dev/null
+#!/usr/bin/env python
+
+from arvados.commands.keepdocker import main
+main()
packages=find_packages(),
scripts=[
'bin/arv-get',
- 'bin/arv-put',
+ 'bin/arv-keepdocker',
'bin/arv-ls',
'bin/arv-normalize',
+ 'bin/arv-put',
],
install_requires=[
'python-gflags',
import unittest
import yaml
+from cStringIO import StringIO
+
import arvados
import arvados.commands.put as arv_put
class ArvadosPutTest(ArvadosKeepLocalStoreTestCase):
def call_main_on_test_file(self):
+ self.main_output = StringIO()
with self.make_test_file() as testfile:
path = testfile.name
- arv_put.main(['--stream', '--no-progress', path])
+ arv_put.main(['--stream', '--no-progress', path], self.main_output)
self.assertTrue(
os.path.exists(os.path.join(os.environ['KEEP_LOCAL_STORE'],
'098f6bcd4621d373cade4e832627b4f6')),
end
def create
+ # Note: the user could specify a owner_uuid for a different user, which on
+ # the surface appears to be a security hole. However, the record will be
+ # rejected before being saved to the database by the ApiClientAuthorization
+ # model which enforces that user_id == current user or the user is an
+ # admin.
+
if resource_attrs[:owner_uuid]
# The model has an owner_id attribute instead of owner_uuid, but
# we can't expect the client to know the local numeric ID. We
# Check permissions on the collection manifest.
# If any signature cannot be verified, return 403 Permission denied.
- perms_ok = true
api_token = current_api_client_authorization.andand.api_token
signing_opts = {
key: Rails.configuration.blob_signing_key,
}
resource_attrs[:manifest_text].lines.each do |entry|
entry.split[1..-1].each do |tok|
- # TODO(twp): in Phase 4, fail the request if the locator
- # lacks a permission signature. (see #2755)
- loc = Locator.parse(tok)
- if loc and loc.signature
- if !api_token
- logger.warn "No API token present; cannot verify signature on #{loc}"
- perms_ok = false
- elsif !Blob.verify_signature tok, signing_opts
- logger.warn "Invalid signature on locator #{loc}"
- perms_ok = false
- end
+ if /^[[:digit:]]+:[[:digit:]]+:/.match tok
+ # This is a filename token, not a blob locator. Note that we
+ # keep checking tokens after this, even though manifest
+ # format dictates that all subsequent tokens will also be
+ # filenames. Safety first!
+ elsif Blob.verify_signature tok, signing_opts
+ # OK.
+ elsif Locator.parse(tok).andand.signature
+ # Signature provided, but verify_signature did not like it.
+ logger.warn "Invalid signature on locator #{tok}"
+ raise ArvadosModel::PermissionDeniedError
+ elsif Rails.configuration.permit_create_collection_with_unsigned_manifest
+ # No signature provided, but we are running in insecure mode.
+ logger.debug "Missing signature on locator #{tok} ignored"
+ elsif Blob.new(tok).empty?
+ # No signature provided -- but no data to protect, either.
+ else
+ logger.warn "Missing signature on locator #{tok}"
+ raise ArvadosModel::PermissionDeniedError
end
end
end
- unless perms_ok
- raise ArvadosModel::PermissionDeniedError
- end
# Remove any permission signatures from the manifest.
resource_attrs[:manifest_text]
class Blob
+ def initialize locator
+ @locator = locator
+ end
+
+ def empty?
+ !!@locator.match(/^d41d8cd98f00b204e9800998ecf8427e(\+.*)?$/)
+ end
+
# In order to get a Blob from Keep, you have to prove either
# [a] you have recently written it to Keep yourself, or
# [b] apiserver has recently decided that you should be able to read it
# Amount of time (in seconds) for which a blob permission signature
# remains valid. Default: 2 weeks (1209600 seconds)
blob_signing_ttl: 1209600
+
+ # Allow clients to create collections by providing a manifest with
+ # unsigned data blob locators. IMPORTANT: This effectively disables
+ # access controls for data stored in Keep: a client who knows a hash
+ # can write a manifest that references the hash, pass it to
+ # collections.create (which will create a permission link), use
+ # collections.get to obtain a signature for that data locator, and
+ # use that signed locator to retrieve the data from Keep. Therefore,
+ # do not turn this on if your users expect to keep data private from
+ # one another!
+ permit_create_collection_with_unsigned_manifest: false
class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
+ setup do
+ # Unless otherwise specified in the test, we want normal/secure behavior.
+ permit_unsigned_manifests false
+ end
+
+ teardown do
+ # Reset to secure behavior after each test.
+ permit_unsigned_manifests false
+ end
+
+ def permit_unsigned_manifests isok=true
+ # Set security model for the life of a test.
+ Rails.configuration.permit_create_collection_with_unsigned_manifest = isok
+ end
+
test "should get index" do
authorize_with :active
get :index
assert_equal 99999, resp['offset']
end
- test "should create" do
+ test "create with unsigned manifest" do
+ permit_unsigned_manifests
authorize_with :active
test_collection = {
manifest_text: <<-EOS
end
test "create with owner_uuid set to owned group" do
+ permit_unsigned_manifests
authorize_with :active
manifest_text = ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n"
post :create, {
end
test "create with owner_uuid set to group i can_manage" do
+ permit_unsigned_manifests
authorize_with :active
manifest_text = ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n"
post :create, {
end
test "create with owner_uuid set to group with no can_manage permission" do
+ permit_unsigned_manifests
authorize_with :active
manifest_text = ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n"
post :create, {
end
test "admin create with owner_uuid set to group with no permission" do
+ permit_unsigned_manifests
authorize_with :admin
manifest_text = ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n"
post :create, {
end
test "should create with collection passed as json" do
+ permit_unsigned_manifests
authorize_with :active
post :create, {
collection: <<-EOS
end
test "should fail to create with checksum mismatch" do
+ permit_unsigned_manifests
authorize_with :active
post :create, {
collection: <<-EOS
end
test "collection UUID is normalized when created" do
+ permit_unsigned_manifests
authorize_with :active
post :create, {
collection: {
assert_equal true, !!found.index('1f4b0bc7583c2a7f9102c395f4ffc5e3+45')
end
- test "create collection with signed manifest" do
- authorize_with :active
- locators = %w(
+ [false, true].each do |permit_unsigned|
+ test "create collection with signed manifest, permit_unsigned=#{permit_unsigned}" do
+ permit_unsigned_manifests permit_unsigned
+ authorize_with :active
+ locators = %w(
d41d8cd98f00b204e9800998ecf8427e+0
acbd18db4cc2f85cedef654fccc4a4d8+3
ea10d51bcf88862dbcc36eb292017dfd+45)
- unsigned_manifest = locators.map { |loc|
- ". " + loc + " 0:0:foo.txt\n"
- }.join()
- manifest_uuid = Digest::MD5.hexdigest(unsigned_manifest) +
- '+' +
- unsigned_manifest.length.to_s
-
- # build a manifest with both signed and unsigned locators.
- # TODO(twp): in phase 4, all locators will need to be signed, so
- # this test should break and will need to be rewritten. Issue #2755.
- signing_opts = {
- key: Rails.configuration.blob_signing_key,
- api_token: api_token(:active),
- }
- signed_manifest =
- ". " + locators[0] + " 0:0:foo.txt\n" +
- ". " + Blob.sign_locator(locators[1], signing_opts) + " 0:0:foo.txt\n" +
- ". " + Blob.sign_locator(locators[2], signing_opts) + " 0:0:foo.txt\n"
-
- post :create, {
- collection: {
- manifest_text: signed_manifest,
- uuid: manifest_uuid,
+ unsigned_manifest = locators.map { |loc|
+ ". " + loc + " 0:0:foo.txt\n"
+ }.join()
+ manifest_uuid = Digest::MD5.hexdigest(unsigned_manifest) +
+ '+' +
+ unsigned_manifest.length.to_s
+
+ # Build a manifest with both signed and unsigned locators.
+ signing_opts = {
+ key: Rails.configuration.blob_signing_key,
+ api_token: api_token(:active),
}
- }
- assert_response :success
- assert_not_nil assigns(:object)
- resp = JSON.parse(@response.body)
- assert_equal manifest_uuid, resp['uuid']
- assert_equal 48, resp['data_size']
- # All of the locators in the output must be signed.
- resp['manifest_text'].lines.each do |entry|
- m = /([[:xdigit:]]{32}\+\S+)/.match(entry)
- if m
- assert Blob.verify_signature m[0], signing_opts
+ signed_locators = locators.collect do |x|
+ Blob.sign_locator x, signing_opts
+ end
+ if permit_unsigned
+ # Leave a non-empty blob unsigned.
+ signed_locators[1] = locators[1]
+ else
+ # Leave the empty blob unsigned. This should still be allowed.
+ signed_locators[0] = locators[0]
+ end
+ signed_manifest =
+ ". " + signed_locators[0] + " 0:0:foo.txt\n" +
+ ". " + signed_locators[1] + " 0:0:foo.txt\n" +
+ ". " + signed_locators[2] + " 0:0:foo.txt\n"
+
+ post :create, {
+ collection: {
+ manifest_text: signed_manifest,
+ uuid: manifest_uuid,
+ }
+ }
+ assert_response :success
+ assert_not_nil assigns(:object)
+ resp = JSON.parse(@response.body)
+ assert_equal manifest_uuid, resp['uuid']
+ assert_equal 48, resp['data_size']
+ # All of the locators in the output must be signed.
+ resp['manifest_text'].lines.each do |entry|
+ m = /([[:xdigit:]]{32}\+\S+)/.match(entry)
+ if m
+ assert Blob.verify_signature m[0], signing_opts
+ end
end
end
end
end
test "multiple locators per line" do
+ permit_unsigned_manifests
authorize_with :active
locators = %w(
d41d8cd98f00b204e9800998ecf8427e+0
end
test "multiple signed locators per line" do
+ permit_unsigned_manifests
authorize_with :active
locators = %w(
d41d8cd98f00b204e9800998ecf8427e+0
end
assert_equal locators.count, returned_locator_count
end
+
+ test 'Reject manifest with unsigned blob' do
+ authorize_with :active
+ unsigned_manifest = ". 0cc175b9c0f1b6a831c399e269772661+1 0:1:a.txt\n"
+ manifest_uuid = Digest::MD5.hexdigest(unsigned_manifest)
+ post :create, {
+ collection: {
+ manifest_text: unsigned_manifest,
+ uuid: manifest_uuid,
+ }
+ }
+ assert_response 403,
+ "Creating a collection with unsigned blobs should respond 403"
+ assert_empty Collection.where('uuid like ?', manifest_uuid+'%'),
+ "Collection should not exist in database after failed create"
+ end
end
end
test "store collection as json" do
+ signing_opts = {
+ key: Rails.configuration.blob_signing_key,
+ api_token: api_token(:active),
+ }
+ signed_locator = Blob.sign_locator('bad42fa702ae3ea7d888fef11b46f450+44',
+ signing_opts)
post "/arvados/v1/collections", {
format: :json,
- collection: "{\"manifest_text\":\". bad42fa702ae3ea7d888fef11b46f450+44 0:44:md5sum.txt\\n\",\"uuid\":\"ad02e37b6a7f45bbe2ead3c29a109b8a+54\"}"
+ collection: "{\"manifest_text\":\". #{signed_locator} 0:44:md5sum.txt\\n\",\"uuid\":\"ad02e37b6a7f45bbe2ead3c29a109b8a+54\"}"
}, auth(:active)
assert_response 200
assert_equal 'ad02e37b6a7f45bbe2ead3c29a109b8a+54', json_response['uuid']
}
}
-func PollCgroupStats(cgroup_path string, stderr chan string, poll int64) {
+func FindStat(cgroup_root string, cgroup_parent string, container_id string, statgroup string, stat string) string {
+ var path string
+ path = fmt.Sprintf("%s/%s/%s/%s/%s.%s", cgroup_root, statgroup, cgroup_parent, container_id, statgroup, stat)
+ if _, err := os.Stat(path); err == nil {
+ return path
+ }
+ path = fmt.Sprintf("%s/%s/%s/%s.%s", cgroup_root, cgroup_parent, container_id, statgroup, stat)
+ if _, err := os.Stat(path); err == nil {
+ return path
+ }
+ path = fmt.Sprintf("%s/%s/%s.%s", cgroup_root, statgroup, statgroup, stat)
+ if _, err := os.Stat(path); err == nil {
+ return path
+ }
+ path = fmt.Sprintf("%s/%s.%s", cgroup_root, statgroup, stat)
+ if _, err := os.Stat(path); err == nil {
+ return path
+ }
+ return ""
+}
+
+func PollCgroupStats(cgroup_root string, cgroup_parent string, container_id string, stderr chan string, poll int64) {
//var last_usage int64 = 0
var last_user int64 = 0
var last_sys int64 = 0
disk := make(map[string]*Disk)
- //cpuacct_usage := fmt.Sprintf("%s/cpuacct.usage", cgroup_path)
- cpuacct_stat := fmt.Sprintf("%s/cpuacct.stat", cgroup_path)
- blkio_io_service_bytes := fmt.Sprintf("%s/blkio.io_service_bytes", cgroup_path)
- cpuset_cpus := fmt.Sprintf("%s/cpuset.cpus", cgroup_path)
- memory_stat := fmt.Sprintf("%s/memory.stat", cgroup_path)
+ //cpuacct_usage := FindStat(cgroup_path, "cpuacct", "usage")
+ cpuacct_stat := FindStat(cgroup_root, cgroup_parent, container_id, "cpuacct", "stat")
+ blkio_io_service_bytes := FindStat(cgroup_root, cgroup_parent, container_id, "blkio", "io_service_bytes")
+ cpuset_cpus := FindStat(cgroup_root, cgroup_parent, container_id, "cpuset", "cpus")
+ memory_stat := FindStat(cgroup_root, cgroup_parent, container_id, "memory", "stat")
+
+ if cpuacct_stat != "" {
+ stderr <- fmt.Sprintf("crunchstat: reading stats from %s", cpuacct_stat)
+ }
+ if blkio_io_service_bytes != "" {
+ stderr <- fmt.Sprintf("crunchstat: reading stats from %s", blkio_io_service_bytes)
+ }
+ if cpuset_cpus != "" {
+ stderr <- fmt.Sprintf("crunchstat: reading stats from %s", cpuset_cpus)
+ }
+ if memory_stat != "" {
+ stderr <- fmt.Sprintf("crunchstat: reading stats from %s", memory_stat)
+ }
var elapsed int64 = poll
c.Close()
}*/
var cpus int64 = 0
- {
+ if cpuset_cpus != "" {
c, _ := os.Open(cpuset_cpus)
b, _ := ioutil.ReadAll(c)
sp := strings.Split(string(b), ",")
if cpus == 0 {
cpus = 1
}
- {
+ if cpuacct_stat != "" {
c, _ := os.Open(cpuacct_stat)
b, _ := ioutil.ReadAll(c)
var next_user int64
last_user = next_user
last_sys = next_sys
}
- {
+ if blkio_io_service_bytes != "" {
c, _ := os.Open(blkio_io_service_bytes)
b := bufio.NewScanner(c)
var device, op string
c.Close()
}
- {
+ if memory_stat != "" {
c, _ := os.Open(memory_stat)
b := bufio.NewScanner(c)
var stat string
func main() {
var (
- cgroup_path string
+ cgroup_root string
cgroup_parent string
cgroup_cidfile string
wait int64
poll int64
)
- flag.StringVar(&cgroup_path, "cgroup-path", "", "Direct path to cgroup")
- flag.StringVar(&cgroup_parent, "cgroup-parent", "", "Path to parent cgroup")
+ flag.StringVar(&cgroup_root, "cgroup-root", "", "Root of cgroup tree")
+ flag.StringVar(&cgroup_parent, "cgroup-parent", "", "Name of container parent under cgroup")
flag.StringVar(&cgroup_cidfile, "cgroup-cid", "", "Path to container id file")
flag.Int64Var(&wait, "wait", 5, "Maximum time (in seconds) to wait for cid file to show up")
flag.Int64Var(&poll, "poll", 1000, "Polling frequency, in milliseconds")
logger := log.New(os.Stderr, "crunchstat: ", 0)
- if cgroup_path == "" && cgroup_cidfile == "" {
- logger.Fatal("Must provide either -cgroup-path or -cgroup-cid")
+ if cgroup_root == "" {
+ logger.Fatal("Must provide either -cgroup-root")
}
// Make output channel
}
// Read the cid file
+ var container_id string
if cgroup_cidfile != "" {
// wait up to 'wait' seconds for the cid file to appear
var i time.Duration
if err == nil {
cid, err2 := ioutil.ReadAll(f)
if err2 == nil && len(cid) > 0 {
- cgroup_path = string(cid)
+ container_id = string(cid)
f.Close()
break
}
}
time.Sleep(100 * time.Millisecond)
}
- if cgroup_path == "" {
+ if cgroup_root == "" {
logger.Printf("Could not read cid file %s", cgroup_cidfile)
}
}
- // add the parent prefix
- if cgroup_parent != "" {
- cgroup_path = fmt.Sprintf("%s/%s", cgroup_parent, cgroup_path)
- }
-
- logger.Print("Using cgroup ", cgroup_path)
-
- go PollCgroupStats(cgroup_path, stderr_chan, poll)
+ go PollCgroupStats(cgroup_root, cgroup_parent, container_id, stderr_chan, poll)
// Wait for each of stdout and stderr to drain
<-finish_chan