X-Git-Url: https://git.arvados.org/rnaseq-cwl-training.git/blobdiff_plain/570451a88f4071708bd73e83f0f6f42bdc6a5dfc..7f8e13d704e49b625b25b7b655d4c21db70c7ccf:/bin/lesson_check.py diff --git a/bin/lesson_check.py b/bin/lesson_check.py index 6de7714..311687e 100755 --- a/bin/lesson_check.py +++ b/bin/lesson_check.py @@ -4,6 +4,7 @@ Check lesson files and their contents. """ +from __future__ import print_function import sys import os import glob @@ -11,7 +12,7 @@ import json import re from optparse import OptionParser -from util import Reporter, read_markdown, load_yaml +from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX __version__ = '0.2' @@ -46,6 +47,9 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$') # Pattern to match figure references in HTML. P_FIGURE_REFS = re.compile(r']+src="([^"]+)"[^>]*>') +# Pattern to match internally-defined Markdown links. +P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]') + # What kinds of blockquotes are allowed? KNOWN_BLOCKQUOTES = { 'callout', @@ -67,6 +71,7 @@ KNOWN_CODEBLOCKS = { 'source', 'bash', 'make', + 'matlab', 'python', 'r', 'sql' @@ -100,6 +105,7 @@ def main(): check_config(args.reporter, args.source_dir) docs = read_all_markdown(args.source_dir, args.parser) check_fileset(args.source_dir, args.reporter, docs.keys()) + check_unwanted_files(args.source_dir, args.reporter) for filename in docs.keys(): checker = create_checker(args, filename, docs[filename]) checker.check() @@ -145,6 +151,13 @@ def check_config(reporter, source_dir): config_file = os.path.join(source_dir, '_config.yml') config = load_yaml(config_file) reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson') + reporter.check_field(config_file, 'configuration', config, 'carpentry', ('swc', 'dc')) + reporter.check_field(config_file, 'configuration', config, 'title') + reporter.check_field(config_file, 'configuration', config, 'email') + + reporter.check({'values': {'root': '..'}} in config.get('defaults', []), + 'configuration', + '"root" not set to ".." in configuration') def read_all_markdown(source_dir, parser): @@ -216,9 +229,9 @@ def check_figures(source_dir, reporter): 'File not found') return - # Get actual files. + # Get actual image files (ignore non-image files). fig_dir_path = os.path.join(source_dir, 'fig') - actual = [f for f in os.listdir(fig_dir_path) if not f.startswith('.')] + actual = [f for f in os.listdir(fig_dir_path) if os.path.splitext(f)[1] in IMAGE_FILE_SUFFIX] # Report differences. unexpected = set(actual) - set(referenced) @@ -241,14 +254,6 @@ def create_checker(args, filename, info): return cls(args, filename, **info) -def require(condition, message): - """Fail if condition not met.""" - - if not condition: - print(message, file=sys.stderr) - sys.exit(1) - - class CheckBase(object): """Base class for checking Markdown files.""" @@ -276,6 +281,7 @@ class CheckBase(object): self.check_trailing_whitespace() self.check_blockquote_classes() self.check_codeblock_classes() + self.check_defined_link_references() def check_metadata(self): @@ -333,6 +339,26 @@ class CheckBase(object): cls) + def check_defined_link_references(self): + """Check that defined links resolve in the file. + + Internally-defined links match the pattern [text][label]. If + the label contains '{{...}}', it is hopefully a references to + a configuration value - we should check that, but don't right + now. + """ + + result = set() + for node in self.find_all(self.doc, {'type' : 'text'}): + for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']): + if '{{' not in match: + result.add(match) + self.reporter.check(not result, + self.filename, + 'Internally-defined links may be missing definitions: {0}', + ', '.join(sorted(result))) + + def find_all(self, node, pattern, accum=None): """Find all matches for a pattern.""" @@ -402,6 +428,12 @@ class CheckIndex(CheckBase): super(CheckIndex, self).__init__(args, filename, metadata, metadata_len, text, lines, doc) self.layout = 'lesson' + def check_metadata(self): + super(CheckIndex, self).check_metadata() + self.reporter.check(self.metadata.get('root', '') == '.', + self.filename, + 'Root not set to "."') + class CheckEpisode(CheckBase): """Check an episode page."""