X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/f339946832e0bb7ad175acaf59733445e6915f7a..HEAD:/sdk/python/arvados/__init__.py diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py index c8c7029807..8d9a5690b4 100644 --- a/sdk/python/arvados/__init__.py +++ b/sdk/python/arvados/__init__.py @@ -1,173 +1,56 @@ # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 +"""Arvados Python SDK -from __future__ import print_function -from __future__ import absolute_import -from future import standard_library -standard_library.install_aliases() -from builtins import object -import bz2 -import fcntl -import hashlib -import http.client -import httplib2 -import json -import logging +This module provides the entire Python SDK for Arvados. The most useful modules +include: + +* arvados.api - After you `import arvados`, you can call `arvados.api` as a + shortcut to the client constructor function `arvados.api.api`. + +* arvados.collection - The `arvados.collection.Collection` class provides a + high-level interface to read and write collections. It coordinates sending + data to and from Keep, and synchronizing updates with the collection object. + +* arvados.util - Utility functions to use mostly in conjunction with the API + client object and the results it returns. + +Other submodules provide lower-level functionality. +""" + +import logging as stdliblog import os -import pprint -import re -import string import sys -import time import types -import zlib -if sys.version_info >= (3, 0): - from collections import UserDict -else: - from UserDict import UserDict +from collections import UserDict -from .api import api, api_from_config, http_cache -from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter +from . import api, errors, util +from .api import api_from_config, http_cache +from .collection import CollectionReader from arvados.keep import * -from arvados.stream import * from .arvfile import StreamFileReader +from .logging import log_format, log_date_format, log_handler from .retry import RetryLoop -import arvados.errors as errors -import arvados.util as util + +# Previous versions of the PySDK used to say `from .api import api`. This +# made it convenient to call the API client constructor, but difficult to +# access the rest of the `arvados.api` module. The magic below fixes that +# bug while retaining backwards compatibility: `arvados.api` is now the +# module and you can import it normally, but we make that module callable so +# all the existing code that says `arvados.api('v1', ...)` still works. +class _CallableAPIModule(api.__class__): + __call__ = staticmethod(api.api) +api.__class__ = _CallableAPIModule + +# Override logging module pulled in via `from ... import *` +# so users can `import arvados.logging`. +logging = sys.modules['arvados.logging'] # Set up Arvados logging based on the user's configuration. # All Arvados code should log under the arvados hierarchy. -log_format = '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s' -log_date_format = '%Y-%m-%d %H:%M:%S' -log_handler = logging.StreamHandler() -log_handler.setFormatter(logging.Formatter(log_format, log_date_format)) -logger = logging.getLogger('arvados') +logger = stdliblog.getLogger('arvados') logger.addHandler(log_handler) -logger.setLevel(logging.DEBUG if config.get('ARVADOS_DEBUG') - else logging.WARNING) - -def task_set_output(self, s, num_retries=5): - for tries_left in RetryLoop(num_retries=num_retries, backoff_start=0): - try: - return api('v1').job_tasks().update( - uuid=self['uuid'], - body={ - 'output':s, - 'success':True, - 'progress':1.0 - }).execute() - except errors.ApiError as error: - if retry.check_http_response_success(error.resp.status) is None and tries_left > 0: - logger.debug("task_set_output: job_tasks().update() raised {}, retrying with {} tries left".format(repr(error),tries_left)) - else: - raise - -_current_task = None -def current_task(num_retries=5): - global _current_task - if _current_task: - return _current_task - - for tries_left in RetryLoop(num_retries=num_retries, backoff_start=2): - try: - task = api('v1').job_tasks().get(uuid=os.environ['TASK_UUID']).execute() - task = UserDict(task) - task.set_output = types.MethodType(task_set_output, task) - task.tmpdir = os.environ['TASK_WORK'] - _current_task = task - return task - except errors.ApiError as error: - if retry.check_http_response_success(error.resp.status) is None and tries_left > 0: - logger.debug("current_task: job_tasks().get() raised {}, retrying with {} tries left".format(repr(error),tries_left)) - else: - raise - -_current_job = None -def current_job(num_retries=5): - global _current_job - if _current_job: - return _current_job - - for tries_left in RetryLoop(num_retries=num_retries, backoff_start=2): - try: - job = api('v1').jobs().get(uuid=os.environ['JOB_UUID']).execute() - job = UserDict(job) - job.tmpdir = os.environ['JOB_WORK'] - _current_job = job - return job - except errors.ApiError as error: - if retry.check_http_response_success(error.resp.status) is None and tries_left > 0: - logger.debug("current_job: jobs().get() raised {}, retrying with {} tries left".format(repr(error),tries_left)) - else: - raise - -def getjobparam(*args): - return current_job()['script_parameters'].get(*args) - -def get_job_param_mount(*args): - return os.path.join(os.environ['TASK_KEEPMOUNT'], current_job()['script_parameters'].get(*args)) - -def get_task_param_mount(*args): - return os.path.join(os.environ['TASK_KEEPMOUNT'], current_task()['parameters'].get(*args)) - -class JobTask(object): - def __init__(self, parameters=dict(), runtime_constraints=dict()): - print("init jobtask %s %s" % (parameters, runtime_constraints)) - -class job_setup(object): - @staticmethod - def one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=False, api_client=None): - if if_sequence != current_task()['sequence']: - return - - if not api_client: - api_client = api('v1') - - job_input = current_job()['script_parameters']['input'] - cr = CollectionReader(job_input, api_client=api_client) - cr.normalize() - for s in cr.all_streams(): - for f in s.all_files(): - if input_as_path: - task_input = os.path.join(job_input, s.name(), f.name()) - else: - task_input = f.as_manifest() - new_task_attrs = { - 'job_uuid': current_job()['uuid'], - 'created_by_job_task_uuid': current_task()['uuid'], - 'sequence': if_sequence + 1, - 'parameters': { - 'input':task_input - } - } - api_client.job_tasks().create(body=new_task_attrs).execute() - if and_end_task: - api_client.job_tasks().update(uuid=current_task()['uuid'], - body={'success':True} - ).execute() - exit(0) - - @staticmethod - def one_task_per_input_stream(if_sequence=0, and_end_task=True): - if if_sequence != current_task()['sequence']: - return - job_input = current_job()['script_parameters']['input'] - cr = CollectionReader(job_input) - for s in cr.all_streams(): - task_input = s.tokens() - new_task_attrs = { - 'job_uuid': current_job()['uuid'], - 'created_by_job_task_uuid': current_task()['uuid'], - 'sequence': if_sequence + 1, - 'parameters': { - 'input':task_input - } - } - api('v1').job_tasks().create(body=new_task_attrs).execute() - if and_end_task: - api('v1').job_tasks().update(uuid=current_task()['uuid'], - body={'success':True} - ).execute() - exit(0) +logger.setLevel(stdliblog.DEBUG if config.get('ARVADOS_DEBUG') + else stdliblog.WARNING)