Remove check for all_figures.html now that we are using Javascript

[rnaseq-cwl-training.git] / bin / lesson_check.py
diff --git a/bin/lesson_check.py b/bin/lesson_check.py

index 016b395451659e299cdeb2f60af299cd0f9e96bc..66f6310c0d41e415c83d50e38a0b1aba92ee0d9f 100755 (executable)
--- a/bin/lesson_check.py
+++ b/bin/lesson_check.py
@@ -4,6 +4,7 @@
  Check lesson files and their contents.
  """
  
+from __future__ import print_function
  import sys
  import os
  import glob
@@ -11,9 +12,9 @@ import json
  import re
  from optparse import OptionParser
  
-from util import Reporter, read_markdown, load_yaml, check_unwanted_files
+from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX
  
-__version__ = '0.2'
+__version__ = '0.3'
  
  # Where to look for source Markdown files.
  SOURCE_DIRS = ['', '_episodes', '_extras']
@@ -30,7 +31,6 @@ REQUIRED_FILES = {
      '%/LICENSE.md': True,
      '%/README.md': False,
      '%/_extras/discuss.md': True,
-    '%/_extras/figures.md': True,
      '%/_extras/guide.md': True,
      '%/index.md': True,
      '%/reference.md': True,
@@ -46,6 +46,12 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$')
  # Pattern to match figure references in HTML.
  P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
  
+# Pattern to match internally-defined Markdown links.
+P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]')
+
+# Pattern to match reference links (to resolve internally-defined references).
+P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)')
+
  # What kinds of blockquotes are allowed?
  KNOWN_BLOCKQUOTES = {
      'callout',
@@ -65,11 +71,14 @@ KNOWN_CODEBLOCKS = {
      'error',
      'output',
      'source',
-    'bash',
-    'make',
-    'python',
-    'r',
-    'sql'
+    'language-bash',
+    'html',
+    'language-make',
+    'language-matlab',
+    'language-python',
+    'language-r',
+    'language-shell',
+    'language-sql'
  }
  
  # What fields are required in teaching episode metadata?
@@ -98,13 +107,15 @@ def main():
      args = parse_args()
      args.reporter = Reporter()
      check_config(args.reporter, args.source_dir)
+    args.references = read_references(args.reporter, args.reference_path)
+
      docs = read_all_markdown(args.source_dir, args.parser)
      check_fileset(args.source_dir, args.reporter, docs.keys())
      check_unwanted_files(args.source_dir, args.reporter)
      for filename in docs.keys():
          checker = create_checker(args, filename, docs[filename])
          checker.check()
-    check_figures(args.source_dir, args.reporter)
+
      args.reporter.report()
  
  
@@ -121,6 +132,10 @@ def parse_args():
                        default=None,
                        dest='parser',
                        help='path to Markdown parser')
+    parser.add_option('-r', '--references',
+                      default=None,
+                      dest='reference_path',
+                      help='path to Markdown file of external references')
      parser.add_option('-s', '--source',
                        default=os.curdir,
                        dest='source_dir',
@@ -146,16 +161,44 @@ def check_config(reporter, source_dir):
      config_file = os.path.join(source_dir, '_config.yml')
      config = load_yaml(config_file)
      reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson')
-    reporter.check_field(config_file, 'configuration', config, 'carpentry', ('swc', 'dc'))
+    reporter.check_field(config_file, 'configuration', config, 'carpentry', ('swc', 'dc', 'lc'))
      reporter.check_field(config_file, 'configuration', config, 'title')
      reporter.check_field(config_file, 'configuration', config, 'email')
-    reporter.check_field(config_file, 'configuration', config, 'repo')
-    reporter.check_field(config_file, 'configuration', config, 'root')
-    if ('repo' in config) and ('root' in config):
-        reporter.check(config['repo'].endswith(config['root']),
-                       config_file,
-                       'Repository name "{0}" not consistent with root "{1}"',
-                       config['repo'], config['root'])
+
+    reporter.check({'values': {'root': '..'}} in config.get('defaults', []),
+                   'configuration',
+                   '"root" not set to ".." in configuration')
+
+
+def read_references(reporter, ref_path):
+    """Read shared file of reference links, returning dictionary of valid references
+    {symbolic_name : URL}
+    """
+
+    result = {}
+    urls_seen = set()
+    if ref_path:
+        with open(ref_path, 'r') as reader:
+            for (num, line) in enumerate(reader):
+                line_num = num + 1
+                m = P_INTERNAL_LINK_DEF.search(line)
+                require(m,
+                        '{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip()))
+                name = m.group(1)
+                url = m.group(2)
+                require(name,
+                        'Empty reference at {0}:{1}'.format(ref_path, line_num))
+                reporter.check(name not in result,
+                               ref_path,
+                               'Duplicate reference {0} at line {1}',
+                               name, line_num)
+                reporter.check(url not in urls_seen,
+                               ref_path,
+                               'Duplicate definition of URL {0} at line {1}',
+                               url, line_num)
+                result[name] = url
+                urls_seen.add(url)
+    return result
  
  
  def read_all_markdown(source_dir, parser):
@@ -212,38 +255,6 @@ def check_fileset(source_dir, reporter, filenames_present):
                     seen)
  
  
-def check_figures(source_dir, reporter):
-    """Check that all figures are present and referenced."""
-
-    # Get references.
-    try:
-        all_figures_html = os.path.join(source_dir, '_includes', 'all_figures.html')
-        with open(all_figures_html, 'r') as reader:
-            text = reader.read()
-        figures = P_FIGURE_REFS.findall(text)
-        referenced = [os.path.split(f)[1] for f in figures if '/fig/' in f]
-    except FileNotFoundError as e:
-        reporter.add(all_figures_html,
-                     'File not found')
-        return
-
-    # Get actual files.
-    fig_dir_path = os.path.join(source_dir, 'fig')
-    actual = [f for f in os.listdir(fig_dir_path) if not f.startswith('.')]
-
-    # Report differences.
-    unexpected = set(actual) - set(referenced)
-    reporter.check(not unexpected,
-                   None,
-                   'Unexpected image files: {0}',
-                   ', '.join(sorted(unexpected)))
-    missing = set(referenced) - set(actual)
-    reporter.check(not missing,
-                   None,
-                   'Missing image files: {0}',
-                   ', '.join(sorted(missing)))
-
-
  def create_checker(args, filename, info):
      """Create appropriate checker for file."""
  
@@ -252,14 +263,6 @@ def create_checker(args, filename, info):
              return cls(args, filename, **info)
  
  
-def require(condition, message):
-    """Fail if condition not met."""
-
-    if not condition:
-        print(message, file=sys.stderr)
-        sys.exit(1)
-
-
  class CheckBase(object):
      """Base class for checking Markdown files."""
  
@@ -280,13 +283,14 @@ class CheckBase(object):
  
  
      def check(self):
-        """Run tests on metadata."""
+        """Run tests."""
  
          self.check_metadata()
          self.check_line_lengths()
          self.check_trailing_whitespace()
          self.check_blockquote_classes()
          self.check_codeblock_classes()
+        self.check_defined_link_references()
  
  
      def check_metadata(self):
@@ -344,6 +348,25 @@ class CheckBase(object):
                                  cls)
  
  
+    def check_defined_link_references(self):
+        """Check that defined links resolve in the file.
+
+        Internally-defined links match the pattern [text][label].
+        """
+
+        result = set()
+        for node in self.find_all(self.doc, {'type' : 'text'}):
+            for match in P_INTERNAL_LINK_REF.findall(node['value']):
+                text = match[0]
+                link = match[1]
+                if link not in self.args.references:
+                    result.add('"{0}"=>"{1}"'.format(text, link))
+        self.reporter.check(not result,
+                            self.filename,
+                            'Internally-defined links may be missing definitions: {0}',
+                            ', '.join(sorted(result)))
+
+
      def find_all(self, node, pattern, accum=None):
          """Find all matches for a pattern."""
  
@@ -413,6 +436,12 @@ class CheckIndex(CheckBase):
          super(CheckIndex, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
          self.layout = 'lesson'
  
+    def check_metadata(self):
+        super(CheckIndex, self).check_metadata()
+        self.reporter.check(self.metadata.get('root', '') == '.',
+                            self.filename,
+                            'Root not set to "."')
+
  
  class CheckEpisode(CheckBase):
      """Check an episode page."""
@@ -420,6 +449,14 @@ class CheckEpisode(CheckBase):
      def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
          super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
  
+
+    def check(self):
+        """Run extra tests."""
+
+        super(CheckEpisode, self).check()
+        self.check_reference_inclusion()
+
+
      def check_metadata(self):
          super(CheckEpisode, self).check_metadata()
          if self.metadata:
@@ -446,6 +483,26 @@ class CheckEpisode(CheckBase):
                                    name, type(self.metadata[name]), type_)
  
  
+    def check_reference_inclusion(self):
+        """Check that links file has been included."""
+
+        if not self.args.reference_path:
+            return
+
+        for (i, last_line, line_len) in reversed(self.lines):
+            if last_line:
+                break
+
+        require(last_line,
+                'No non-empty lines in {0}'.format(self.filename))
+
+        include_filename = os.path.split(self.args.reference_path)[-1]
+        if include_filename not in last_line:
+            self.reporter.add(self.filename,
+                              'episode does not include "{0}"',
+                              include_filename)
+
+
  class CheckReference(CheckBase):
      """Check the reference page."""