Looking for missing or superfluous images
authorGreg Wilson <gvwilson@third-bit.com>
Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)
committerGreg Wilson <gvwilson@third-bit.com>
Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)
bin/lesson_check.py

index aaec64fac6929c20adc27addf06701d95deedf97..6de771470a11a4ccdcdb6b25af8649b7885eb6b4 100755 (executable)
@@ -43,6 +43,9 @@ P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$')
 # Pattern to match lines ending with whitespace.
 P_TRAILING_WHITESPACE = re.compile(r'\s+$')
 
+# Pattern to match figure references in HTML.
+P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
+
 # What kinds of blockquotes are allowed?
 KNOWN_BLOCKQUOTES = {
     'callout',
@@ -100,6 +103,7 @@ def main():
     for filename in docs.keys():
         checker = create_checker(args, filename, docs[filename])
         checker.check()
+    check_figures(args.source_dir, args.reporter)
     args.reporter.report()
 
 
@@ -197,6 +201,38 @@ def check_fileset(source_dir, reporter, filenames_present):
                    seen)
 
 
+def check_figures(source_dir, reporter):
+    """Check that all figures are present and referenced."""
+
+    # Get references.
+    try:
+        all_figures_html = os.path.join(source_dir, '_includes', 'all_figures.html')
+        with open(all_figures_html, 'r') as reader:
+            text = reader.read()
+        figures = P_FIGURE_REFS.findall(text)
+        referenced = [os.path.split(f)[1] for f in figures if '/fig/' in f]
+    except FileNotFoundError as e:
+        reporter.add(all_figures_html,
+                     'File not found')
+        return
+
+    # Get actual files.
+    fig_dir_path = os.path.join(source_dir, 'fig')
+    actual = [f for f in os.listdir(fig_dir_path) if not f.startswith('.')]
+
+    # Report differences.
+    unexpected = set(actual) - set(referenced)
+    reporter.check(not unexpected,
+                   None,
+                   'Unexpected image files: {0}',
+                   ', '.join(sorted(unexpected)))
+    missing = set(referenced) - set(actual)
+    reporter.check(not missing,
+                   None,
+                   'Missing image files: {0}',
+                   ', '.join(sorted(missing)))
+
+
 def create_checker(args, filename, info):
     """Create appropriate checker for file."""