X-Git-Url: https://git.arvados.org/rnaseq-cwl-training.git/blobdiff_plain/e577ea88692b33e86d5d7eebdefa3af00efa6c03..d0b3cf479e28a9c308fa9c2040b749c969edfe6a:/bin/extract_figures.py

diff --git a/bin/extract_figures.py b/bin/extract_figures.py
index e89c552..63a7752 100755
--- a/bin/extract_figures.py
+++ b/bin/extract_figures.py
@@ -1,28 +1,19 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
 import sys
 import os
 import glob
 from optparse import OptionParser
 
-from util import Reporter, read_markdown
-
-
-# Things an image file's name can end with.
-PATH_SUFFICES = {
-    '.gif',
-    '.jpg',
-    '.png',
-    '.svg'
-}
-
+from util import Reporter, read_markdown, IMAGE_FILE_SUFFIX
 
 def main():
     """Main driver."""
 
     args = parse_args()
     images = []
-    for filename in get_filenames(args.source_dir):
+    for filename in args.filenames:
         images += get_images(args.parser, filename)
     save(sys.stdout, images)
 
@@ -35,19 +26,14 @@ def parse_args():
                       default=None,
                       dest='parser',
                       help='path to Markdown parser')
-    parser.add_option('-s', '--source',
-                      default=None,
-                      dest='source_dir',
-                      help='source directory')
 
     args, extras = parser.parse_args()
     require(args.parser is not None,
             'Path to Markdown parser not provided')
-    require(args.source_dir is not None,
-            'Source directory not provided')
-    require(not extras,
-            'Unexpected trailing command-line arguments "{0}"'.format(extras))
+    require(extras,
+            'No filenames specified')
 
+    args.filenames = extras
     return args
 
 
@@ -70,20 +56,23 @@ def get_images(parser, filename):
 def find_image_nodes(doc, result):
     """Find all nested nodes representing images."""
 
-    if (doc["type"] == "img") or \
-       ((doc["type"] == "html_element") and (doc["value"] == "img")):
-        result.append({'alt': doc['attr']['alt'], 'src': doc['attr']['src']})
+    if (doc['type'] == 'img') or \
+       ((doc['type'] == 'html_element') and (doc['value'] == 'img')):
+        alt = doc['attr'].get('alt', '')
+        result.append({'alt': alt, 'src': doc['attr']['src']})
     else:
-        for child in doc.get("children", []):
+        for child in doc.get('children', []):
             find_image_nodes(child, result)
 
 
 def find_image_links(doc, result):
     """Find all links to files in the 'fig' directory."""
 
-    if (doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr']):
+    if ((doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr'])) \
+       or \
+       ((doc['type'] == 'html_element') and (doc['value'] == 'a') and ('href' in doc['attr'])):
         path = doc['attr']['href']
-        if os.path.splitext(path)[1].lower() in PATH_SUFFICES:
+        if os.path.splitext(path)[1].lower() in IMAGE_FILE_SUFFIX:
             result.append({'alt':'', 'src': doc['attr']['href']})
     else:
         for child in doc.get('children', []):