X-Git-Url: https://git.arvados.org/rnaseq-cwl-training.git/blobdiff_plain/e577ea88692b33e86d5d7eebdefa3af00efa6c03..d0b3cf479e28a9c308fa9c2040b749c969edfe6a:/bin/extract_figures.py diff --git a/bin/extract_figures.py b/bin/extract_figures.py index e89c552..63a7752 100755 --- a/bin/extract_figures.py +++ b/bin/extract_figures.py @@ -1,28 +1,19 @@ #!/usr/bin/env python +from __future__ import print_function import sys import os import glob from optparse import OptionParser -from util import Reporter, read_markdown - - -# Things an image file's name can end with. -PATH_SUFFICES = { - '.gif', - '.jpg', - '.png', - '.svg' -} - +from util import Reporter, read_markdown, IMAGE_FILE_SUFFIX def main(): """Main driver.""" args = parse_args() images = [] - for filename in get_filenames(args.source_dir): + for filename in args.filenames: images += get_images(args.parser, filename) save(sys.stdout, images) @@ -35,19 +26,14 @@ def parse_args(): default=None, dest='parser', help='path to Markdown parser') - parser.add_option('-s', '--source', - default=None, - dest='source_dir', - help='source directory') args, extras = parser.parse_args() require(args.parser is not None, 'Path to Markdown parser not provided') - require(args.source_dir is not None, - 'Source directory not provided') - require(not extras, - 'Unexpected trailing command-line arguments "{0}"'.format(extras)) + require(extras, + 'No filenames specified') + args.filenames = extras return args @@ -70,20 +56,23 @@ def get_images(parser, filename): def find_image_nodes(doc, result): """Find all nested nodes representing images.""" - if (doc["type"] == "img") or \ - ((doc["type"] == "html_element") and (doc["value"] == "img")): - result.append({'alt': doc['attr']['alt'], 'src': doc['attr']['src']}) + if (doc['type'] == 'img') or \ + ((doc['type'] == 'html_element') and (doc['value'] == 'img')): + alt = doc['attr'].get('alt', '') + result.append({'alt': alt, 'src': doc['attr']['src']}) else: - for child in doc.get("children", []): + for child in doc.get('children', []): find_image_nodes(child, result) def find_image_links(doc, result): """Find all links to files in the 'fig' directory.""" - if (doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr']): + if ((doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr'])) \ + or \ + ((doc['type'] == 'html_element') and (doc['value'] == 'a') and ('href' in doc['attr'])): path = doc['attr']['href'] - if os.path.splitext(path)[1].lower() in PATH_SUFFICES: + if os.path.splitext(path)[1].lower() in IMAGE_FILE_SUFFIX: result.append({'alt':'', 'src': doc['attr']['href']}) else: for child in doc.get('children', []):