2 # Copyright (C) The Arvados Authors. All rights reserved.
4 # SPDX-License-Identifier: Apache-2.0
5 """discovery2pydoc - Build skeleton Python from the Arvados discovery document
7 This tool reads the Arvados discovery document and writes a Python source file
8 with classes and methods that correspond to the resources that
9 google-api-python-client builds dynamically. This source does not include any
10 implementation, but it does include real method signatures and documentation
11 strings, so it's useful as documentation for tools that read Python source,
12 including pydoc and pdoc.
14 If you run this tool with the path to a discovery document, it uses no
15 dependencies outside the Python standard library. If it needs to read
16 configuration to find the discovery document dynamically, it'll load the
17 `arvados` module to do that.
41 RESOURCE_SCHEMA_MAP = {
42 # Special cases for iter_resource_schemas that can't be generated
43 # automatically. Note these schemas may not actually be defined.
45 'vocabularies': 'Vocabulary',
48 def iter_resource_schemas(name: str) -> Iterator[str]:
50 schema_name = RESOURCE_SCHEMA_MAP[name]
53 schema_name = name[:-1]
56 lambda match: match.group(2).capitalize(),
60 yield f'{schema_name}List'
62 LOWERCASE = operator.methodcaller('lower')
63 NAME_KEY = operator.attrgetter('name')
64 STDSTREAM_PATH = pathlib.Path('-')
65 TITLECASE = operator.methodcaller('title')
67 _ALIASED_METHODS = frozenset([
72 _DEPRECATED_NOTICE = '''
74 .. WARNING:: Deprecated
75 This resource is deprecated in the Arvados API.
77 # _DEPRECATED_RESOURCES contains string keys of resources in the discovery
78 # document that are currently deprecated.
79 _DEPRECATED_RESOURCES = frozenset()
80 _DEPRECATED_SCHEMAS = frozenset(
82 for resource_name in _DEPRECATED_RESOURCES
83 for schema_name in iter_resource_schemas(resource_name)
88 This is the dictionary object returned when you call `{cls_name}s.list`.
89 If you just want to iterate all objects that match your search criteria,
90 consider using `arvados.util.keyset_list_all`.
91 If you work with this raw object, the keys of the dictionary are documented
92 below, along with their types. The `items` key maps to a list of matching
95 _MODULE_PYDOC = '''Arvados API client reference documentation
97 This module provides reference documentation for the interface of the
98 Arvados API client, including method signatures and type information for
99 returned objects. However, the functions in `arvados.api` will return
100 different classes at runtime that are generated dynamically from the Arvados
101 API discovery document. The classes in this module do not have any
102 implementation, and you should not instantiate them in your code.
104 If you're just starting out, `ArvadosAPIClient` documents the methods
105 available from the client object. From there, you can follow the trail into
106 resource methods, request objects, and finally the data dictionaries returned
111 This is the dictionary object that represents a single {cls_name} in Arvados
112 and is returned by most `{cls_name}s` methods.
113 The keys of the dictionary are documented below, along with their types.
114 Not every key may appear in every dictionary returned by an API call.
115 When a method doesn't return all the data, you can use its `select` parameter
116 to list the specific keys you need. Refer to the API documentation for details.
119 _MODULE_PRELUDE = '''
120 import googleapiclient.discovery
121 import googleapiclient.http
124 from typing import Any, Dict, Generic, List, Literal, Optional, TypedDict, TypeVar
126 # ST represents an API response type
127 ST = TypeVar('ST', bound=TypedDict)
130 class ArvadosAPIRequest(googleapiclient.http.HttpRequest, Generic[ST]):
131 """Generic API request object
133 When you call an API method in the Arvados Python SDK, it returns a
134 request object. You usually call `execute()` on this object to submit the
135 request to your Arvados API server and retrieve the response. `execute()`
136 will return the type of object annotated in the subscript of
140 def execute(self, http: Optional[httplib2.Http]=None, num_retries: int=0) -> ST:
141 """Execute this request and return the response
145 * http: httplib2.Http | None --- The HTTP client object to use to
146 execute the request. If not specified, uses the HTTP client object
147 created with the API client object.
149 * num_retries: int --- The maximum number of times to retry this
150 request if the server returns a retryable failure. The API client
151 object also has a maximum number of retries specified when it is
152 instantiated (see `arvados.api.api_client`). This request is run
153 with the larger of that number and this argument. Default 0.
158 # Annotation represents a valid Python type annotation. Future development
159 # could expand this to include other valid types like `type`.
161 _TYPE_MAP: Mapping[str, Annotation] = {
162 # Map the API's JavaScript-based type names to Python annotations.
163 # Some of these may disappear after Arvados issue #19795 is fixed.
167 # datetime fields are strings in ISO 8601 format.
169 'Hash': 'Dict[str, Any]',
171 'object': 'Dict[str, Any]',
176 def get_type_annotation(name: str) -> str:
177 return _TYPE_MAP.get(name, name)
179 def to_docstring(s: str, indent: int) -> str:
180 prefix = ' ' * indent
181 s = s.replace('"""', '""\"')
182 s = re.sub(r'(\n+)', r'\1' + prefix, s)
185 return f'{prefix}"""{s}\n{prefix}"""'
187 return f'{prefix}"""{s}"""'
189 def transform_name(s: str, sep: str, fix_part: Callable[[str], str]) -> str:
190 return sep.join(fix_part(part) for part in s.split('_'))
192 def classify_name(s: str) -> str:
193 return transform_name(s, '', TITLECASE)
195 def humanize_name(s: str) -> str:
196 return transform_name(s, ' ', LOWERCASE)
198 class Parameter(inspect.Parameter):
199 def __init__(self, name: str, spec: Mapping[str, Any]) -> None:
202 if keyword.iskeyword(name):
206 inspect.Parameter.KEYWORD_ONLY,
207 annotation=get_type_annotation(self._spec['type']),
208 # In normal Python the presence of a default tells you whether or
209 # not an argument is required. In the API the `required` flag tells
210 # us that, and defaults are specified inconsistently. Don't show
211 # defaults in the signature: it adds noise and makes things more
212 # confusing for the reader about what's required and what's
213 # optional. The docstring can explain in better detail, including
215 default=inspect.Parameter.empty,
219 def from_request(cls, spec: Mapping[str, Any]) -> 'Parameter':
221 # Unpack the single key and value out of properties
222 (key, val_spec), = spec['properties'].items()
223 except (KeyError, ValueError):
224 # ValueError if there was not exactly one property
225 raise NotImplementedError(
226 "only exactly one request parameter is currently supported",
228 val_type = get_type_annotation(val_spec['$ref'])
230 'description': f"""A dictionary with a single item `{key!r}`.
231 Its value is a `{val_type}` dictionary defining the attributes to set.""",
232 'required': spec['required'],
233 'type': f'Dict[Literal[{key!r}], {val_type}]',
236 def default_value(self) -> object:
238 src_value: str = self._spec['default']
241 if src_value == 'true':
243 elif src_value == 'false':
245 elif src_value.isdigit():
246 return int(src_value)
250 def is_required(self) -> bool:
251 return self._spec['required']
253 def doc(self) -> str:
254 default_value = self.default_value()
255 if default_value is None:
258 default_doc = f"Default {default_value!r}."
259 description = self._spec['description']
260 doc_parts = [f'{self.api_name}: {self.annotation}']
261 if description or default_doc:
262 doc_parts.append('---')
264 doc_parts.append(description)
266 doc_parts.append(default_doc)
268 * {' '.join(doc_parts)}
276 spec: Mapping[str, Any],
277 annotate: Callable[[Annotation], Annotation]=str,
281 self._annotate = annotate
282 self._required_params = []
283 self._optional_params = []
284 for param in self._iter_parameters():
285 if param.is_required():
286 param_list = self._required_params
288 param_list = self._optional_params
289 param_list.append(param)
290 self._required_params.sort(key=NAME_KEY)
291 self._optional_params.sort(key=NAME_KEY)
293 def _iter_parameters(self) -> Iterator[Parameter]:
295 body = self._spec['request']
299 yield Parameter.from_request(body)
300 for name, spec in self._spec['parameters'].items():
301 yield Parameter(name, spec)
303 def signature(self) -> inspect.Signature:
305 inspect.Parameter('self', inspect.Parameter.POSITIONAL_OR_KEYWORD),
306 *self._required_params,
307 *self._optional_params,
310 returns = get_type_annotation(self._spec['response']['$ref'])
312 returns = 'Dict[str, Any]'
313 returns = self._annotate(returns)
314 return inspect.Signature(parameters, return_annotation=returns)
316 def doc(self, doc_slice: slice=slice(None)) -> str:
317 doc_lines = self._spec['description'].splitlines(keepends=True)[doc_slice]
318 if not doc_lines[-1].endswith('\n'):
319 doc_lines.append('\n')
320 if self._required_params:
321 doc_lines.append("\nRequired parameters:\n")
322 doc_lines.extend(param.doc() for param in self._required_params)
323 if self._optional_params:
324 doc_lines.append("\nOptional parameters:\n")
325 doc_lines.extend(param.doc() for param in self._optional_params)
327 def {self.name}{self.signature()}:
328 {to_docstring(''.join(doc_lines), 8)}
332 def document_schema(name: str, spec: Mapping[str, Any]) -> str:
333 description = spec['description']
334 if name in _DEPRECATED_SCHEMAS:
335 description += _DEPRECATED_NOTICE
336 if name.endswith('List'):
337 desc_fmt = _LIST_PYDOC
340 desc_fmt = _SCHEMA_PYDOC
342 description += desc_fmt.format(cls_name=cls_name)
344 f"class {name}(TypedDict, total=False):",
345 to_docstring(description, 4),
347 for field_name, field_spec in spec['properties'].items():
348 field_type = get_type_annotation(field_spec['type'])
350 subtype = field_spec['items']['$ref']
354 field_type += f"[{get_type_annotation(subtype)}]"
356 field_line = f" {field_name}: {field_type!r}"
358 field_line += f" = {field_spec['default']!r}"
361 lines.append(field_line)
363 field_doc: str = field_spec.get('description', '')
364 if field_spec['type'] == 'datetime':
365 field_doc += " Pass this to `ciso8601.parse_datetime` to build a `datetime.datetime`."
367 lines.append(to_docstring(field_doc, 4))
369 return '\n'.join(lines)
371 def document_resource(name: str, spec: Mapping[str, Any]) -> str:
372 class_name = classify_name(name)
373 docstring = f"Methods to query and manipulate Arvados {humanize_name(name)}"
374 if class_name in _DEPRECATED_RESOURCES:
375 docstring += _DEPRECATED_NOTICE
377 Method(key, meth_spec, 'ArvadosAPIRequest[{}]'.format)
378 for key, meth_spec in spec['methods'].items()
379 if key not in _ALIASED_METHODS
381 return f'''class {class_name}:
382 {to_docstring(docstring, 4)}
383 {''.join(method.doc() for method in sorted(methods, key=NAME_KEY))}
386 def parse_arguments(arglist: Optional[Sequence[str]]) -> argparse.Namespace:
387 parser = argparse.ArgumentParser()
389 '--output-file', '-O',
392 default=STDSTREAM_PATH,
393 help="""Path to write output. Specify `-` to use stdout (the default)
397 nargs=argparse.OPTIONAL,
399 help="""URL or file path of a discovery document to load.
400 Specify `-` to use stdin.
401 If not provided, retrieved dynamically from Arvados client configuration.
403 args = parser.parse_args(arglist)
404 if args.discovery_url is None:
405 from arvados.api import api_kwargs_from_config
406 discovery_fmt = api_kwargs_from_config('v1')['discoveryServiceUrl']
407 args.discovery_url = discovery_fmt.format(api='arvados', apiVersion='v1')
408 elif args.discovery_url == '-':
409 args.discovery_url = 'file:///dev/stdin'
411 parts = urllib.parse.urlsplit(args.discovery_url)
412 if not (parts.scheme or parts.netloc):
413 args.discovery_url = pathlib.Path(args.discovery_url).resolve().as_uri()
414 # Our output is Python source, so it should be UTF-8 regardless of locale.
415 if args.output_file == STDSTREAM_PATH:
416 args.out_file = open(sys.stdout.fileno(), 'w', encoding='utf-8', closefd=False)
418 args.out_file = args.output_file.open('w', encoding='utf-8')
421 def main(arglist: Optional[Sequence[str]]=None) -> int:
422 args = parse_arguments(arglist)
423 with urllib.request.urlopen(args.discovery_url) as discovery_file:
424 status = discovery_file.getcode()
425 if not (status is None or 200 <= status < 300):
427 f"error getting {args.discovery_url}: server returned {discovery_file.status}",
431 discovery_document = json.load(discovery_file)
433 to_docstring(_MODULE_PYDOC, indent=0),
436 sep='\n', file=args.out_file,
439 schemas = dict(discovery_document['schemas'])
440 resources = sorted(discovery_document['resources'].items())
441 for name, resource_spec in resources:
442 for schema_name in iter_resource_schemas(name):
444 schema_spec = schemas.pop(schema_name)
448 print(document_schema(schema_name, schema_spec), file=args.out_file)
449 print(document_resource(name, resource_spec), file=args.out_file)
450 for name, schema_spec in sorted(schemas.items()):
451 print(document_schema(name, schema_spec), file=args.out_file)
454 '''class ArvadosAPIClient(googleapiclient.discovery.Resource):''',
455 sep='\n', file=args.out_file,
457 for name, _ in resources:
458 class_name = classify_name(name)
459 docstring = f"Return an instance of `{class_name}` to call methods via this client"
460 if class_name in _DEPRECATED_RESOURCES:
461 docstring += _DEPRECATED_NOTICE
463 'description': docstring,
469 print(Method(name, method_spec).doc(), file=args.out_file)
471 args.out_file.close()
474 if __name__ == '__main__':