#!/usr/bin/env python # Import the Arvados sdk module import arvados # Get information about the task from the environment this_task = arvados.current_task() this_task_input = arvados.current_job()['script_parameters']['input'] # Create the object access to the collection referred to in the input collection = arvados.CollectionReader(this_task_input) # Create an object to write a new collection as output out = arvados.CollectionWriter() # Create a new file in the output collection with out.open('0-filter.txt') as out_file: # Iterate over every input file in the input collection for input_file in collection.all_files(): # Output every line in the file that starts with '0' out_file.writelines(line for line in input_file if line.startswith('0')) # Commit the output to keep. This returns a Keep id. output_id = out.finish() # Set the output for this task to the Keep id this_task.set_output(output_id) # Done!