Checking for links to figures
authorGreg Wilson <gvwilson@third-bit.com>
Thu, 7 Jul 2016 19:35:33 +0000 (15:35 -0400)
committerGreg Wilson <gvwilson@third-bit.com>
Thu, 7 Jul 2016 19:35:33 +0000 (15:35 -0400)
bin/extract_figures.py

index 5fefcbdfa9a7b5caa2d85f39a73eeb16ae3519ce..e89c5528ceaa24d14243cb29c8569f2dc30c0b94 100755 (executable)
@@ -8,6 +8,15 @@ from optparse import OptionParser
 from util import Reporter, read_markdown
 
 
+# Things an image file's name can end with.
+PATH_SUFFICES = {
+    '.gif',
+    '.jpg',
+    '.png',
+    '.svg'
+}
+
+
 def main():
     """Main driver."""
 
@@ -54,6 +63,7 @@ def get_images(parser, filename):
     content = read_markdown(parser, filename)
     result = []
     find_image_nodes(content['doc'], result)
+    find_image_links(content['doc'], result)
     return result
 
 
@@ -68,6 +78,18 @@ def find_image_nodes(doc, result):
             find_image_nodes(child, result)
 
 
+def find_image_links(doc, result):
+    """Find all links to files in the 'fig' directory."""
+
+    if (doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr']):
+        path = doc['attr']['href']
+        if os.path.splitext(path)[1].lower() in PATH_SUFFICES:
+            result.append({'alt':'', 'src': doc['attr']['href']})
+    else:
+        for child in doc.get('children', []):
+            find_image_links(child, result)
+
+
 def save(stream, images):
     """Save results as Markdown."""