sdk/python/discovery2pydoc.py

   1 #!/usr/bin/env python3
   2 # Copyright (C) The Arvados Authors. All rights reserved.
   3 #
   4 # SPDX-License-Identifier: Apache-2.0
   5 """discovery2pydoc - Build skeleton Python from the Arvados discovery document
   6
   7 This tool reads the Arvados discovery document and writes a Python source file
   8 with classes and methods that correspond to the resources that
   9 google-api-python-client builds dynamically. This source does not include any
  10 implementation, but it does include real method signatures and documentation
  11 strings, so it's useful as documentation for tools that read Python source,
  12 including pydoc and pdoc.
  13
  14 If you run this tool with the path to a discovery document, it uses no
  15 dependencies outside the Python standard library. If it needs to read
  16 configuration to find the discovery document dynamically, it'll load the
  17 `arvados` module to do that.
  18 """
  19
  20 import argparse
  21 import inspect
  22 import json
  23 import keyword
  24 import operator
  25 import os
  26 import pathlib
  27 import re
  28 import sys
  29 import urllib.parse
  30 import urllib.request
  31
  32 from typing import (
  33     Any,
  34     Callable,
  35     Mapping,
  36     Optional,
  37     Sequence,
  38 )
  39
  40 LOWERCASE = operator.methodcaller('lower')
  41 NAME_KEY = operator.attrgetter('name')
  42 STDSTREAM_PATH = pathlib.Path('-')
  43 TITLECASE = operator.methodcaller('title')
  44
  45 _ALIASED_METHODS = frozenset([
  46     'destroy',
  47     'index',
  48     'show',
  49 ])
  50 _DEPRECATED_NOTICE = '''
  51
  52 !!! deprecated
  53     This resource is deprecated in the Arvados API.
  54 '''
  55 _DEPRECATED_RESOURCES = frozenset([
  56     'Humans',
  57     'JobTasks',
  58     'Jobs',
  59     'KeepDisks',
  60     'Nodes',
  61     'PipelineInstances',
  62     'PipelineTemplates',
  63     'Specimens'
  64     'Traits',
  65 ])
  66 _DEPRECATED_SCHEMAS = frozenset([
  67     *(name[:-1] for name in _DEPRECATED_RESOURCES),
  68     *(f'{name[:-1]}List' for name in _DEPRECATED_RESOURCES),
  69 ])
  70
  71 _LIST_PYDOC = '''
  72
  73 This is the dictionary object returned when you call `{cls_name}s.list`.
  74 If you just want to iterate all objects that match your search criteria,
  75 consider using `arvados.util.keyset_list_all`.
  76 If you work with this raw object, the keys of the dictionary are documented
  77 below, along with their types. The `items` key maps to a list of matching
  78 `{cls_name}` objects.
  79 '''
  80 _MODULE_PYDOC = '''Arvados API client documentation skeleton
  81
  82 This module documents the methods and return types provided by the Arvados API
  83 client. Start with `ArvadosAPIClient`, which documents the methods available
  84 from the API client objects constructed by `arvados.api`. The implementation is
  85 generated dynamically at runtime when the client object is built.
  86 '''
  87 _SCHEMA_PYDOC = '''
  88
  89 This is the dictionary object that represents a single {cls_name} in Arvados.
  90 The keys of the dictionary are documented below, along with their types.
  91 Not every key may appear in every dictionary returned by an API call.
  92 Refer to the API documentation for details about how to retrieve specific keys
  93 if you need them.
  94 '''
  95
  96 _TYPE_MAP = {
  97     # Map the API's JavaScript-based type names to Python annotations.
  98     # Some of these may disappear after Arvados issue #19795 is fixed.
  99     'Array': 'list',
 100     'array': 'list',
 101     'boolean': 'bool',
 102     # datetime fields are strings in ISO 8601 format.
 103     'datetime': 'str',
 104     'Hash': 'dict[str, Any]',
 105     'integer': 'int',
 106     'object': 'dict[str, Any]',
 107     'string': 'str',
 108     'text': 'str',
 109 }
 110
 111 def get_type_annotation(name: str) -> str:
 112     return _TYPE_MAP.get(name, name)
 113
 114 def to_docstring(s: str, indent: int) -> str:
 115     prefix = ' ' * indent
 116     s = s.replace('"""', '""\"')
 117     s = re.sub(r'(\n+)', r'\1' + prefix, s)
 118     s = s.strip()
 119     if '\n' in s:
 120         return f'{prefix}"""{s}\n{prefix}"""'
 121     else:
 122         return f'{prefix}"""{s}"""'
 123
 124 def transform_name(s: str, sep: str, fix_part: Callable[[str], str]) -> str:
 125     return sep.join(fix_part(part) for part in s.split('_'))
 126
 127 def classify_name(s: str) -> str:
 128     return transform_name(s, '', TITLECASE)
 129
 130 def humanize_name(s: str) -> str:
 131     return transform_name(s, ' ', LOWERCASE)
 132
 133 class Parameter(inspect.Parameter):
 134     def __init__(self, name: str, spec: Mapping[str, Any]) -> None:
 135         self.api_name = name
 136         self._spec = spec
 137         if keyword.iskeyword(name):
 138             name += '_'
 139         super().__init__(
 140             name,
 141             inspect.Parameter.KEYWORD_ONLY,
 142             annotation=get_type_annotation(self._spec['type']),
 143             # In normal Python the presence of a default tells you whether or
 144             # not an argument is required. In the API the `required` flag tells
 145             # us that, and defaults are specified inconsistently. Don't show
 146             # defaults in the signature: it adds noise and makes things more
 147             # confusing for the reader about what's required and what's
 148             # optional. The docstring can explain in better detail, including
 149             # the default value.
 150             default=inspect.Parameter.empty,
 151         )
 152
 153     def default_value(self) -> object:
 154         try:
 155             src_value: str = self._spec['default']
 156         except KeyError:
 157             return None
 158         if src_value == 'true':
 159             return True
 160         elif src_value == 'false':
 161             return False
 162         elif src_value.isdigit():
 163             return int(src_value)
 164         else:
 165             return src_value
 166
 167     def is_required(self) -> bool:
 168         return self._spec['required']
 169
 170     def doc(self) -> str:
 171         default_value = self.default_value()
 172         if default_value is None:
 173             default_doc = ''
 174         else:
 175             default_doc = f" Default {default_value!r}."
 176         # If there is no description, use a zero-width space to help Markdown
 177         # parsers retain the definition list structure.
 178         description = self._spec['description'] or '\u200b'
 179         return f'''
 180 {self.api_name}: {self.annotation}
 181 : {description}{default_doc}
 182 '''
 183
 184
 185 class Method:
 186     def __init__(self, name: str, spec: Mapping[str, Any]) -> None:
 187         self.name = name
 188         self._spec = spec
 189         self._required_params = []
 190         self._optional_params = []
 191         for param_name, param_spec in spec['parameters'].items():
 192             param = Parameter(param_name, param_spec)
 193             if param.is_required():
 194                 param_list = self._required_params
 195             else:
 196                 param_list = self._optional_params
 197             param_list.append(param)
 198         self._required_params.sort(key=NAME_KEY)
 199         self._optional_params.sort(key=NAME_KEY)
 200
 201     def signature(self) -> inspect.Signature:
 202         parameters = [
 203             inspect.Parameter('self', inspect.Parameter.POSITIONAL_ONLY),
 204             *self._required_params,
 205             *self._optional_params,
 206         ]
 207         try:
 208             returns = get_type_annotation(self._spec['response']['$ref'])
 209         except KeyError:
 210             returns = 'dict[str, Any]'
 211         return inspect.Signature(parameters, return_annotation=returns)
 212
 213     def doc(self, doc_slice: slice=slice(None)) -> str:
 214         doc_lines = self._spec['description'].splitlines(keepends=True)[doc_slice]
 215         if not doc_lines[-1].endswith('\n'):
 216             doc_lines.append('\n')
 217         if self._required_params:
 218             doc_lines.append("\nRequired parameters:\n")
 219             doc_lines.extend(param.doc() for param in self._required_params)
 220         if self._optional_params:
 221             doc_lines.append("\nOptional parameters:\n")
 222             doc_lines.extend(param.doc() for param in self._optional_params)
 223         return f'''
 224     def {self.name}{self.signature()}:
 225 {to_docstring(''.join(doc_lines), 8)}
 226 '''
 227
 228
 229 def document_schema(name: str, spec: Mapping[str, Any]) -> str:
 230     description = spec['description']
 231     if name in _DEPRECATED_SCHEMAS:
 232         description += _DEPRECATED_NOTICE
 233     if name.endswith('List'):
 234         desc_fmt = _LIST_PYDOC
 235         cls_name = name[:-4]
 236     else:
 237         desc_fmt = _SCHEMA_PYDOC
 238         cls_name = name
 239     description += desc_fmt.format(cls_name=cls_name)
 240     lines = [
 241         f"class {name}(TypedDict, total=False):",
 242         to_docstring(description, 4),
 243     ]
 244     for field_name, field_spec in spec['properties'].items():
 245         field_type = get_type_annotation(field_spec['type'])
 246         try:
 247             subtype = field_spec['items']['$ref']
 248         except KeyError:
 249             pass
 250         else:
 251             field_type += f"[{get_type_annotation(subtype)}]"
 252
 253         field_line = f"    {field_name}: {field_type!r}"
 254         try:
 255             field_line += f" = {field_spec['default']!r}"
 256         except KeyError:
 257             pass
 258         lines.append(field_line)
 259
 260         field_doc: str = field_spec.get('description', '')
 261         if field_spec['type'] == 'datetime':
 262             field_doc += "\n\nString in ISO 8601 datetime format. Pass it to `ciso8601.parse_datetime` to build a `datetime.datetime`."
 263         if field_doc:
 264             lines.append(to_docstring(field_doc, 4))
 265     lines.append('\n')
 266     return '\n'.join(lines)
 267
 268 def document_resource(name: str, spec: Mapping[str, Any]) -> str:
 269     class_name = classify_name(name)
 270     docstring = f"Methods to query and manipulate Arvados {humanize_name(name)}"
 271     if class_name in _DEPRECATED_RESOURCES:
 272         docstring += _DEPRECATED_NOTICE
 273     methods = [
 274         Method(key, meth_spec)
 275         for key, meth_spec in spec['methods'].items()
 276         if key not in _ALIASED_METHODS
 277     ]
 278     return f'''class {class_name}:
 279 {to_docstring(docstring, 4)}
 280 {''.join(method.doc(slice(1)) for method in sorted(methods, key=NAME_KEY))}
 281 '''
 282
 283 def parse_arguments(arglist: Optional[Sequence[str]]) -> argparse.Namespace:
 284     parser = argparse.ArgumentParser()
 285     parser.add_argument(
 286         '--output-file', '-O',
 287         type=pathlib.Path,
 288         metavar='PATH',
 289         default=STDSTREAM_PATH,
 290         help="""Path to write output. Specify `-` to use stdout (the default)
 291 """)
 292     parser.add_argument(
 293         'discovery_url',
 294         nargs=argparse.OPTIONAL,
 295         metavar='URL',
 296         help="""URL or file path of a discovery document to load.
 297 Specify `-` to use stdin.
 298 If not provided, retrieved dynamically from Arvados client configuration.
 299 """)
 300     args = parser.parse_args(arglist)
 301     if args.discovery_url is None:
 302         from arvados.api import api_kwargs_from_config
 303         discovery_fmt = api_kwargs_from_config('v1')['discoveryServiceUrl']
 304         args.discovery_url = discovery_fmt.format(api='arvados', apiVersion='v1')
 305     elif args.discovery_url == '-':
 306         args.discovery_url = 'file:///dev/stdin'
 307     else:
 308         parts = urllib.parse.urlsplit(args.discovery_url)
 309         if not (parts.scheme or parts.netloc):
 310             args.discovery_url = pathlib.Path(args.discovery_url).resolve().as_uri()
 311     if args.output_file == STDSTREAM_PATH:
 312         args.out_file = sys.stdout
 313     else:
 314         args.out_file = args.output_file.open('w')
 315     return args
 316
 317 def main(arglist: Optional[Sequence[str]]=None) -> int:
 318     args = parse_arguments(arglist)
 319     with urllib.request.urlopen(args.discovery_url) as discovery_file:
 320         status = discovery_file.getcode()
 321         if not (status is None or 200 <= status < 300):
 322             print(
 323                 f"error getting {args.discovery_url}: server returned {discovery_file.status}",
 324                 file=sys.stderr,
 325             )
 326             return os.EX_IOERR
 327         discovery_document = json.load(discovery_file)
 328     print(
 329         to_docstring(_MODULE_PYDOC, indent=0),
 330         '''from typing import Any, TypedDict''',
 331         sep='\n\n', end='\n\n', file=args.out_file,
 332     )
 333
 334     schemas = sorted(discovery_document['schemas'].items())
 335     for name, schema_spec in schemas:
 336         print(document_schema(name, schema_spec), file=args.out_file)
 337
 338     resources = sorted(discovery_document['resources'].items())
 339     for name, resource_spec in resources:
 340         print(document_resource(name, resource_spec), file=args.out_file)
 341
 342     print('''class ArvadosAPIClient:''', file=args.out_file)
 343     for name, _ in resources:
 344         class_name = classify_name(name)
 345         docstring = f"Return an instance of `{class_name}` to call methods via this client"
 346         if class_name in _DEPRECATED_RESOURCES:
 347             docstring += _DEPRECATED_NOTICE
 348         method_spec = {
 349             'description': docstring,
 350             'parameters': {},
 351             'response': {
 352                 '$ref': class_name,
 353             },
 354         }
 355         print(Method(name, method_spec).doc(), file=args.out_file)
 356
 357     return os.EX_OK
 358
 359 if __name__ == '__main__':
 360     sys.exit(main())