2 # Copyright (C) The Arvados Authors. All rights reserved.
4 # SPDX-License-Identifier: Apache-2.0
5 """discovery2pydoc - Build skeleton Python from the Arvados discovery document
7 This tool reads the Arvados discovery document and writes a Python source file
8 with classes and methods that correspond to the resources that
9 google-api-python-client builds dynamically. This source does not include any
10 implementation, but it does include real method signatures and documentation
11 strings, so it's useful as documentation for tools that read Python source,
12 including pydoc and pdoc.
14 If you run this tool with the path to a discovery document, it uses no
15 dependencies outside the Python standard library. If it needs to read
16 configuration to find the discovery document dynamically, it'll load the
17 `arvados` module to do that.
41 RESOURCE_SCHEMA_MAP = {
42 # Special cases for iter_resource_schemas that can't be generated
43 # automatically. Note these schemas may not actually be defined.
45 'vocabularies': 'Vocabulary',
48 def iter_resource_schemas(name: str) -> Iterator[str]:
50 schema_name = RESOURCE_SCHEMA_MAP[name]
53 schema_name = name[:-1]
56 lambda match: match.group(2).capitalize(),
60 yield f'{schema_name}List'
62 LOWERCASE = operator.methodcaller('lower')
63 NAME_KEY = operator.attrgetter('name')
64 STDSTREAM_PATH = pathlib.Path('-')
65 TITLECASE = operator.methodcaller('title')
67 _ALIASED_METHODS = frozenset([
72 _DEPRECATED_NOTICE = '''
74 .. WARNING:: Deprecated
75 This resource is deprecated in the Arvados API.
77 # _DEPRECATED_RESOURCES contains string keys of resources in the discovery
78 # document that are currently deprecated.
79 _DEPRECATED_RESOURCES = frozenset()
80 _DEPRECATED_SCHEMAS = frozenset(
82 for resource_name in _DEPRECATED_RESOURCES
83 for schema_name in iter_resource_schemas(resource_name)
86 _LIST_UTIL_METHODS = {
87 'ComputedPermissionList': 'arvados.util.iter_computed_permissions',
88 'ComputedPermissions': 'arvados.util.iter_computed_permissions',
90 _LIST_METHOD_PYDOC = '''
91 This method returns a single page of `{cls_name}` objects that match your search
92 criteria. If you just want to iterate all objects that match your search
93 criteria, consider using `{list_util_func}`.
95 _LIST_SCHEMA_PYDOC = '''
97 This is the dictionary object returned when you call `{cls_name}s.list`.
98 If you just want to iterate all objects that match your search criteria,
99 consider using `{list_util_func}`.
100 If you work with this raw object, the keys of the dictionary are documented
101 below, along with their types. The `items` key maps to a list of matching
102 `{cls_name}` objects.
104 _MODULE_PYDOC = '''Arvados API client reference documentation
106 This module provides reference documentation for the interface of the
107 Arvados API client, including method signatures and type information for
108 returned objects. However, the functions in `arvados.api` will return
109 different classes at runtime that are generated dynamically from the Arvados
110 API discovery document. The classes in this module do not have any
111 implementation, and you should not instantiate them in your code.
113 If you're just starting out, `ArvadosAPIClient` documents the methods
114 available from the client object. From there, you can follow the trail into
115 resource methods, request objects, and finally the data dictionaries returned
120 This is the dictionary object that represents a single {cls_name} in Arvados
121 and is returned by most `{cls_name}s` methods.
122 The keys of the dictionary are documented below, along with their types.
123 Not every key may appear in every dictionary returned by an API call.
124 When a method doesn't return all the data, you can use its `select` parameter
125 to list the specific keys you need. Refer to the API documentation for details.
128 _MODULE_PRELUDE = '''
129 import googleapiclient.discovery
130 import googleapiclient.http
133 from typing import Any, Dict, Generic, List, Literal, Optional, TypedDict, TypeVar
135 # ST represents an API response type
136 ST = TypeVar('ST', bound=TypedDict)
139 class ArvadosAPIRequest(googleapiclient.http.HttpRequest, Generic[ST]):
140 """Generic API request object
142 When you call an API method in the Arvados Python SDK, it returns a
143 request object. You usually call `execute()` on this object to submit the
144 request to your Arvados API server and retrieve the response. `execute()`
145 will return the type of object annotated in the subscript of
149 def execute(self, http: Optional[httplib2.Http]=None, num_retries: int=0) -> ST:
150 """Execute this request and return the response
154 * http: httplib2.Http | None --- The HTTP client object to use to
155 execute the request. If not specified, uses the HTTP client object
156 created with the API client object.
158 * num_retries: int --- The maximum number of times to retry this
159 request if the server returns a retryable failure. The API client
160 object also has a maximum number of retries specified when it is
161 instantiated (see `arvados.api.api_client`). This request is run
162 with the larger of that number and this argument. Default 0.
167 # Annotation represents a valid Python type annotation. Future development
168 # could expand this to include other valid types like `type`.
170 _TYPE_MAP: Mapping[str, Annotation] = {
171 # Map the API's JavaScript-based type names to Python annotations.
172 # Some of these may disappear after Arvados issue #19795 is fixed.
176 # datetime fields are strings in ISO 8601 format.
178 'Hash': 'Dict[str, Any]',
180 'object': 'Dict[str, Any]',
185 def get_type_annotation(name: str) -> str:
186 return _TYPE_MAP.get(name, name)
188 def to_docstring(s: str, indent: int) -> str:
189 prefix = ' ' * indent
190 s = s.replace('"""', '""\"')
191 s = re.sub(r'(\n+)', r'\1' + prefix, s)
194 return f'{prefix}"""{s}\n{prefix}"""'
196 return f'{prefix}"""{s}"""'
198 def transform_name(s: str, sep: str, fix_part: Callable[[str], str]) -> str:
199 return sep.join(fix_part(part) for part in s.split('_'))
201 def classify_name(s: str) -> str:
202 return transform_name(s, '', TITLECASE)
204 def humanize_name(s: str) -> str:
205 return transform_name(s, ' ', LOWERCASE)
207 class Parameter(inspect.Parameter):
208 def __init__(self, name: str, spec: Mapping[str, Any]) -> None:
211 if keyword.iskeyword(name):
213 annotation = get_type_annotation(self._spec['type'])
214 if self.is_required():
215 default = inspect.Parameter.empty
217 default = self.default_value()
219 annotation = f'Optional[{annotation}]'
222 inspect.Parameter.KEYWORD_ONLY,
223 annotation=annotation,
228 def from_request(cls, spec: Mapping[str, Any]) -> 'Parameter':
230 # Unpack the single key and value out of properties
231 (key, val_spec), = spec['properties'].items()
232 except (KeyError, ValueError):
233 # ValueError if there was not exactly one property
234 raise NotImplementedError(
235 "only exactly one request parameter is currently supported",
237 val_type = get_type_annotation(val_spec['$ref'])
239 'description': f"""A dictionary with a single item `{key!r}`.
240 Its value is a `{val_type}` dictionary defining the attributes to set.""",
241 'required': spec['required'],
242 'type': f'Dict[Literal[{key!r}], {val_type}]',
245 def default_value(self) -> object:
247 src_value: str = self._spec['default']
251 return json.loads(src_value)
255 def is_required(self) -> bool:
256 return self._spec['required']
258 def doc(self) -> str:
259 if self.default is None or self.default is inspect.Parameter.empty:
262 default_doc = f"Default `{self.default!r}`."
263 description = self._spec['description'].rstrip()
264 # Does the description contain multiple paragraphs of real text
265 # (excluding, e.g., hyperlink targets)?
266 if re.search(r'\n\s*\n\s*[\w*]', description):
267 # Yes: append the default doc as a separate paragraph.
268 description += f'\n\n{default_doc}'
270 # No: append the default doc to the first (and only) paragraph.
271 description = re.sub(
273 rf' {default_doc}\1',
277 # Align all lines with the list bullet we're formatting it in.
278 description = re.sub(r'\n(\S)', r'\n \1', description)
280 * {self.api_name}: {self.annotation} --- {description}
288 spec: Mapping[str, Any],
289 cls_name: Optional[str]=None,
290 annotate: Callable[[Annotation], Annotation]=str,
294 self.cls_name = cls_name
295 self._annotate = annotate
296 self._required_params = []
297 self._optional_params = []
298 for param in self._iter_parameters():
299 if param.is_required():
300 param_list = self._required_params
302 param_list = self._optional_params
303 param_list.append(param)
304 self._required_params.sort(key=NAME_KEY)
305 self._optional_params.sort(key=NAME_KEY)
307 def _iter_parameters(self) -> Iterator[Parameter]:
309 body = self._spec['request']
313 yield Parameter.from_request(body)
314 for name, spec in self._spec['parameters'].items():
315 yield Parameter(name, spec)
317 def signature(self) -> inspect.Signature:
319 inspect.Parameter('self', inspect.Parameter.POSITIONAL_OR_KEYWORD),
320 *self._required_params,
321 *self._optional_params,
324 returns = get_type_annotation(self._spec['response']['$ref'])
326 returns = 'Dict[str, Any]'
327 returns = self._annotate(returns)
328 return inspect.Signature(parameters, return_annotation=returns)
330 def doc(self, doc_slice: slice=slice(None)) -> str:
331 doc_lines = self._spec['description'].splitlines(keepends=True)[doc_slice]
332 if not doc_lines[-1].endswith('\n'):
333 doc_lines.append('\n')
335 returns_list = self._spec['response']['$ref'].endswith('List')
338 if returns_list and self.cls_name is not None:
339 doc_lines.append(_LIST_METHOD_PYDOC.format(
340 cls_name=self.cls_name[:-1],
341 list_util_func=_LIST_UTIL_METHODS.get(self.cls_name, 'arvados.util.keyset_list_all'),
343 if self._required_params:
344 doc_lines.append("\nRequired parameters:\n")
345 doc_lines.extend(param.doc() for param in self._required_params)
346 if self._optional_params:
347 doc_lines.append("\nOptional parameters:\n")
348 doc_lines.extend(param.doc() for param in self._optional_params)
350 def {self.name}{self.signature()}:
351 {to_docstring(''.join(doc_lines), 8)}
355 def document_schema(name: str, spec: Mapping[str, Any]) -> str:
356 description = spec['description']
357 if name in _DEPRECATED_SCHEMAS:
358 description += _DEPRECATED_NOTICE
359 if name.endswith('List'):
360 description += _LIST_SCHEMA_PYDOC.format(
362 list_util_func=_LIST_UTIL_METHODS.get(name, 'arvados.util.keyset_list_all'),
365 description += _SCHEMA_PYDOC.format(cls_name=name)
367 f"class {name}(TypedDict, total=False):",
368 to_docstring(description, 4),
370 for field_name, field_spec in spec['properties'].items():
371 field_type = get_type_annotation(field_spec['type'])
373 subtype = field_spec['items']['$ref']
377 field_type += f"[{get_type_annotation(subtype)}]"
379 field_line = f" {field_name}: {field_type!r}"
381 field_line += f" = {field_spec['default']!r}"
384 lines.append(field_line)
386 field_doc: str = field_spec.get('description', '')
387 if field_spec['type'] == 'datetime':
388 field_doc += " Pass this to `ciso8601.parse_datetime` to build a `datetime.datetime`."
390 lines.append(to_docstring(field_doc, 4))
392 return '\n'.join(lines)
394 def document_resource(name: str, spec: Mapping[str, Any]) -> str:
395 class_name = classify_name(name)
396 docstring = f"Methods to query and manipulate Arvados {humanize_name(name)}"
397 if class_name in _DEPRECATED_RESOURCES:
398 docstring += _DEPRECATED_NOTICE
400 Method(key, meth_spec, class_name, 'ArvadosAPIRequest[{}]'.format)
401 for key, meth_spec in spec['methods'].items()
402 if key not in _ALIASED_METHODS
404 return f'''class {class_name}:
405 {to_docstring(docstring, 4)}
406 {''.join(method.doc() for method in sorted(methods, key=NAME_KEY))}
409 def parse_arguments(arglist: Optional[Sequence[str]]) -> argparse.Namespace:
410 parser = argparse.ArgumentParser()
412 '--output-file', '-O',
415 default=STDSTREAM_PATH,
416 help="""Path to write output. Specify `-` to use stdout (the default)
420 nargs=argparse.OPTIONAL,
422 help="""URL or file path of a discovery document to load.
423 Specify `-` to use stdin.
424 If not provided, retrieved dynamically from Arvados client configuration.
426 args = parser.parse_args(arglist)
427 if args.discovery_url is None:
428 from arvados.api import api_kwargs_from_config
429 discovery_fmt = api_kwargs_from_config('v1')['discoveryServiceUrl']
430 args.discovery_url = discovery_fmt.format(api='arvados', apiVersion='v1')
431 elif args.discovery_url == '-':
432 args.discovery_url = 'file:///dev/stdin'
434 parts = urllib.parse.urlsplit(args.discovery_url)
435 if not (parts.scheme or parts.netloc):
436 args.discovery_url = pathlib.Path(args.discovery_url).resolve().as_uri()
437 # Our output is Python source, so it should be UTF-8 regardless of locale.
438 if args.output_file == STDSTREAM_PATH:
439 args.out_file = open(sys.stdout.fileno(), 'w', encoding='utf-8', closefd=False)
441 args.out_file = args.output_file.open('w', encoding='utf-8')
444 def main(arglist: Optional[Sequence[str]]=None) -> int:
445 args = parse_arguments(arglist)
446 with urllib.request.urlopen(args.discovery_url) as discovery_file:
447 status = discovery_file.getcode()
448 if not (status is None or 200 <= status < 300):
450 f"error getting {args.discovery_url}: server returned {discovery_file.status}",
454 discovery_document = json.load(discovery_file)
456 to_docstring(_MODULE_PYDOC, indent=0),
459 sep='\n', file=args.out_file,
462 schemas = dict(discovery_document['schemas'])
463 resources = sorted(discovery_document['resources'].items())
464 for name, resource_spec in resources:
465 for schema_name in iter_resource_schemas(name):
467 schema_spec = schemas.pop(schema_name)
471 print(document_schema(schema_name, schema_spec), file=args.out_file)
472 print(document_resource(name, resource_spec), file=args.out_file)
473 for name, schema_spec in sorted(schemas.items()):
474 print(document_schema(name, schema_spec), file=args.out_file)
477 '''class ArvadosAPIClient(googleapiclient.discovery.Resource):''',
478 sep='\n', file=args.out_file,
480 for name, _ in resources:
481 class_name = classify_name(name)
482 docstring = f"Return an instance of `{class_name}` to call methods via this client"
483 if class_name in _DEPRECATED_RESOURCES:
484 docstring += _DEPRECATED_NOTICE
486 'description': docstring,
492 print(Method(name, method_spec).doc(), end='', file=args.out_file)
494 args.out_file.close()
497 if __name__ == '__main__':