Looking for missing or superfluous images

author Greg Wilson <gvwilson@third-bit.com>

Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)

committer Greg Wilson <gvwilson@third-bit.com>

Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)
author Greg Wilson <gvwilson@third-bit.com>
Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)
committer Greg Wilson <gvwilson@third-bit.com>
Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)
diff --git a/bin/lesson_check.py b/bin/lesson_check.py

index aaec64fac6929c20adc27addf06701d95deedf97..6de771470a11a4ccdcdb6b25af8649b7885eb6b4 100755 (executable)
--- a/bin/lesson_check.py
+++ b/bin/lesson_check.py
@@ -43,6 +43,9 @@ P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$')
  # Pattern to match lines ending with whitespace.
  P_TRAILING_WHITESPACE = re.compile(r'\s+$')
  
+# Pattern to match figure references in HTML.
+P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
+
  # What kinds of blockquotes are allowed?
  KNOWN_BLOCKQUOTES = {
      'callout',
@@ -100,6 +103,7 @@ def main():
      for filename in docs.keys():
          checker = create_checker(args, filename, docs[filename])
          checker.check()
+    check_figures(args.source_dir, args.reporter)
      args.reporter.report()
  
  
@@ -197,6 +201,38 @@ def check_fileset(source_dir, reporter, filenames_present):
                     seen)
  
  
+def check_figures(source_dir, reporter):
+    """Check that all figures are present and referenced."""
+
+    # Get references.
+    try:
+        all_figures_html = os.path.join(source_dir, '_includes', 'all_figures.html')
+        with open(all_figures_html, 'r') as reader:
+            text = reader.read()
+        figures = P_FIGURE_REFS.findall(text)
+        referenced = [os.path.split(f)[1] for f in figures if '/fig/' in f]
+    except FileNotFoundError as e:
+        reporter.add(all_figures_html,
+                     'File not found')
+        return
+
+    # Get actual files.
+    fig_dir_path = os.path.join(source_dir, 'fig')
+    actual = [f for f in os.listdir(fig_dir_path) if not f.startswith('.')]
+
+    # Report differences.
+    unexpected = set(actual) - set(referenced)
+    reporter.check(not unexpected,
+                   None,
+                   'Unexpected image files: {0}',
+                   ', '.join(sorted(unexpected)))
+    missing = set(referenced) - set(actual)
+    reporter.check(not missing,
+                   None,
+                   'Missing image files: {0}',
+                   ', '.join(sorted(missing)))
+
+
  def create_checker(args, filename, info):
      """Create appropriate checker for file."""
author	Greg Wilson <gvwilson@third-bit.com>
	Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)
committer	Greg Wilson <gvwilson@third-bit.com>
	Sat, 2 Jul 2016 21:30:22 +0000 (17:30 -0400)