bin/extract_figures.py

   1 #!/usr/bin/env python
   2
   3 import sys
   4 import os
   5 import glob
   6 from optparse import OptionParser
   7
   8 from util import Reporter, read_markdown, IMAGE_FILE_SUFFIX
   9
  10
  11 def main():
  12     """Main driver."""
  13
  14     args = parse_args()
  15     images = []
  16     for filename in args.filenames:
  17         images += get_images(args.parser, filename)
  18     save(sys.stdout, images)
  19
  20
  21 def parse_args():
  22     """Parse command-line arguments."""
  23
  24     parser = OptionParser()
  25     parser.add_option('-p', '--parser',
  26                       default=None,
  27                       dest='parser',
  28                       help='path to Markdown parser')
  29
  30     args, extras = parser.parse_args()
  31     require(args.parser is not None,
  32             'Path to Markdown parser not provided')
  33     require(extras,
  34             'No filenames specified')
  35
  36     args.filenames = extras
  37     return args
  38
  39
  40 def get_filenames(source_dir):
  41     """Get all filenames to be searched for images."""
  42
  43     return glob.glob(os.path.join(source_dir, '*.md'))
  44
  45
  46 def get_images(parser, filename):
  47     """Extract all images from file."""
  48
  49     content = read_markdown(parser, filename)
  50     result = []
  51     find_image_nodes(content['doc'], result)
  52     find_image_links(content['doc'], result)
  53     return result
  54
  55
  56 def find_image_nodes(doc, result):
  57     """Find all nested nodes representing images."""
  58
  59     if (doc['type'] == 'img') or \
  60        ((doc['type'] == 'html_element') and (doc['value'] == 'img')):
  61         result.append({'alt': doc['attr']['alt'], 'src': doc['attr']['src']})
  62     else:
  63         for child in doc.get('children', []):
  64             find_image_nodes(child, result)
  65
  66
  67 def find_image_links(doc, result):
  68     """Find all links to files in the 'fig' directory."""
  69
  70     if ((doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr'])) \
  71        or \
  72        ((doc['type'] == 'html_element') and (doc['value'] == 'a')):
  73         path = doc['attr']['href']
  74         if os.path.splitext(path)[1].lower() in IMAGE_FILE_SUFFIX:
  75             result.append({'alt':'', 'src': doc['attr']['href']})
  76     else:
  77         for child in doc.get('children', []):
  78             find_image_links(child, result)
  79
  80
  81 def save(stream, images):
  82     """Save results as Markdown."""
  83
  84     text = '\n<hr/>\n'.join(['<p><img alt="{0}" src="{1}" /></p>'.format(img['alt'], img['src']) for img in images])
  85     print(text, file=stream)
  86
  87
  88 def require(condition, message):
  89     """Fail if condition not met."""
  90
  91     if not condition:
  92         print(message, file=sys.stderr)
  93         sys.exit(1)
  94
  95
  96 if __name__ == '__main__':
  97     main()