Checking more configuration values
[rnaseq-cwl-training.git] / bin / lesson_check.py
index aaec64fac6929c20adc27addf06701d95deedf97..016b395451659e299cdeb2f60af299cd0f9e96bc 100755 (executable)
@@ -11,7 +11,7 @@ import json
 import re
 from optparse import OptionParser
 
-from util import Reporter, read_markdown, load_yaml
+from util import Reporter, read_markdown, load_yaml, check_unwanted_files
 
 __version__ = '0.2'
 
@@ -43,6 +43,9 @@ P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$')
 # Pattern to match lines ending with whitespace.
 P_TRAILING_WHITESPACE = re.compile(r'\s+$')
 
+# Pattern to match figure references in HTML.
+P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
+
 # What kinds of blockquotes are allowed?
 KNOWN_BLOCKQUOTES = {
     'callout',
@@ -97,9 +100,11 @@ def main():
     check_config(args.reporter, args.source_dir)
     docs = read_all_markdown(args.source_dir, args.parser)
     check_fileset(args.source_dir, args.reporter, docs.keys())
+    check_unwanted_files(args.source_dir, args.reporter)
     for filename in docs.keys():
         checker = create_checker(args, filename, docs[filename])
         checker.check()
+    check_figures(args.source_dir, args.reporter)
     args.reporter.report()
 
 
@@ -141,6 +146,16 @@ def check_config(reporter, source_dir):
     config_file = os.path.join(source_dir, '_config.yml')
     config = load_yaml(config_file)
     reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson')
+    reporter.check_field(config_file, 'configuration', config, 'carpentry', ('swc', 'dc'))
+    reporter.check_field(config_file, 'configuration', config, 'title')
+    reporter.check_field(config_file, 'configuration', config, 'email')
+    reporter.check_field(config_file, 'configuration', config, 'repo')
+    reporter.check_field(config_file, 'configuration', config, 'root')
+    if ('repo' in config) and ('root' in config):
+        reporter.check(config['repo'].endswith(config['root']),
+                       config_file,
+                       'Repository name "{0}" not consistent with root "{1}"',
+                       config['repo'], config['root'])
 
 
 def read_all_markdown(source_dir, parser):
@@ -197,6 +212,38 @@ def check_fileset(source_dir, reporter, filenames_present):
                    seen)
 
 
+def check_figures(source_dir, reporter):
+    """Check that all figures are present and referenced."""
+
+    # Get references.
+    try:
+        all_figures_html = os.path.join(source_dir, '_includes', 'all_figures.html')
+        with open(all_figures_html, 'r') as reader:
+            text = reader.read()
+        figures = P_FIGURE_REFS.findall(text)
+        referenced = [os.path.split(f)[1] for f in figures if '/fig/' in f]
+    except FileNotFoundError as e:
+        reporter.add(all_figures_html,
+                     'File not found')
+        return
+
+    # Get actual files.
+    fig_dir_path = os.path.join(source_dir, 'fig')
+    actual = [f for f in os.listdir(fig_dir_path) if not f.startswith('.')]
+
+    # Report differences.
+    unexpected = set(actual) - set(referenced)
+    reporter.check(not unexpected,
+                   None,
+                   'Unexpected image files: {0}',
+                   ', '.join(sorted(unexpected)))
+    missing = set(referenced) - set(actual)
+    reporter.check(not missing,
+                   None,
+                   'Missing image files: {0}',
+                   ', '.join(sorted(missing)))
+
+
 def create_checker(args, filename, info):
     """Create appropriate checker for file."""