X-Git-Url: https://git.arvados.org/rnaseq-cwl-training.git/blobdiff_plain/cebd8558273a274f84b3b129c627f42a2ed09ef1..9259abf17b39c66f5e0912d127f5e4e5f63b8c57:/bin/lesson_check.py diff --git a/bin/lesson_check.py b/bin/lesson_check.py index 7bc09a3..66f6310 100755 --- a/bin/lesson_check.py +++ b/bin/lesson_check.py @@ -14,7 +14,7 @@ from optparse import OptionParser from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX -__version__ = '0.2' +__version__ = '0.3' # Where to look for source Markdown files. SOURCE_DIRS = ['', '_episodes', '_extras'] @@ -31,7 +31,6 @@ REQUIRED_FILES = { '%/LICENSE.md': True, '%/README.md': False, '%/_extras/discuss.md': True, - '%/_extras/figures.md': True, '%/_extras/guide.md': True, '%/index.md': True, '%/reference.md': True, @@ -48,7 +47,10 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$') P_FIGURE_REFS = re.compile(r']+src="([^"]+)"[^>]*>') # Pattern to match internally-defined Markdown links. -P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]') +P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]') + +# Pattern to match reference links (to resolve internally-defined references). +P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)') # What kinds of blockquotes are allowed? KNOWN_BLOCKQUOTES = { @@ -69,12 +71,14 @@ KNOWN_CODEBLOCKS = { 'error', 'output', 'source', - 'bash', - 'make', - 'matlab', - 'python', - 'r', - 'sql' + 'language-bash', + 'html', + 'language-make', + 'language-matlab', + 'language-python', + 'language-r', + 'language-shell', + 'language-sql' } # What fields are required in teaching episode metadata? @@ -103,13 +107,15 @@ def main(): args = parse_args() args.reporter = Reporter() check_config(args.reporter, args.source_dir) + args.references = read_references(args.reporter, args.reference_path) + docs = read_all_markdown(args.source_dir, args.parser) check_fileset(args.source_dir, args.reporter, docs.keys()) check_unwanted_files(args.source_dir, args.reporter) for filename in docs.keys(): checker = create_checker(args, filename, docs[filename]) checker.check() - check_figures(args.source_dir, args.reporter) + args.reporter.report() @@ -126,6 +132,10 @@ def parse_args(): default=None, dest='parser', help='path to Markdown parser') + parser.add_option('-r', '--references', + default=None, + dest='reference_path', + help='path to Markdown file of external references') parser.add_option('-s', '--source', default=os.curdir, dest='source_dir', @@ -151,10 +161,45 @@ def check_config(reporter, source_dir): config_file = os.path.join(source_dir, '_config.yml') config = load_yaml(config_file) reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson') - reporter.check_field(config_file, 'configuration', config, 'carpentry', ('swc', 'dc')) + reporter.check_field(config_file, 'configuration', config, 'carpentry', ('swc', 'dc', 'lc')) reporter.check_field(config_file, 'configuration', config, 'title') reporter.check_field(config_file, 'configuration', config, 'email') + reporter.check({'values': {'root': '..'}} in config.get('defaults', []), + 'configuration', + '"root" not set to ".." in configuration') + + +def read_references(reporter, ref_path): + """Read shared file of reference links, returning dictionary of valid references + {symbolic_name : URL} + """ + + result = {} + urls_seen = set() + if ref_path: + with open(ref_path, 'r') as reader: + for (num, line) in enumerate(reader): + line_num = num + 1 + m = P_INTERNAL_LINK_DEF.search(line) + require(m, + '{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip())) + name = m.group(1) + url = m.group(2) + require(name, + 'Empty reference at {0}:{1}'.format(ref_path, line_num)) + reporter.check(name not in result, + ref_path, + 'Duplicate reference {0} at line {1}', + name, line_num) + reporter.check(url not in urls_seen, + ref_path, + 'Duplicate definition of URL {0} at line {1}', + url, line_num) + result[name] = url + urls_seen.add(url) + return result + def read_all_markdown(source_dir, parser): """Read source files, returning @@ -210,38 +255,6 @@ def check_fileset(source_dir, reporter, filenames_present): seen) -def check_figures(source_dir, reporter): - """Check that all figures are present and referenced.""" - - # Get references. - try: - all_figures_html = os.path.join(source_dir, '_includes', 'all_figures.html') - with open(all_figures_html, 'r') as reader: - text = reader.read() - figures = P_FIGURE_REFS.findall(text) - referenced = [os.path.split(f)[1] for f in figures if '/fig/' in f] - except FileNotFoundError as e: - reporter.add(all_figures_html, - 'File not found') - return - - # Get actual image files (ignore non-image files). - fig_dir_path = os.path.join(source_dir, 'fig') - actual = [f for f in os.listdir(fig_dir_path) if os.path.splitext(f)[1] in IMAGE_FILE_SUFFIX] - - # Report differences. - unexpected = set(actual) - set(referenced) - reporter.check(not unexpected, - None, - 'Unexpected image files: {0}', - ', '.join(sorted(unexpected))) - missing = set(referenced) - set(actual) - reporter.check(not missing, - None, - 'Missing image files: {0}', - ', '.join(sorted(missing))) - - def create_checker(args, filename, info): """Create appropriate checker for file.""" @@ -270,7 +283,7 @@ class CheckBase(object): def check(self): - """Run tests on metadata.""" + """Run tests.""" self.check_metadata() self.check_line_lengths() @@ -338,17 +351,16 @@ class CheckBase(object): def check_defined_link_references(self): """Check that defined links resolve in the file. - Internally-defined links match the pattern [text][label]. If - the label contains '{{...}}', it is hopefully a references to - a configuration value - we should check that, but don't right - now. + Internally-defined links match the pattern [text][label]. """ result = set() for node in self.find_all(self.doc, {'type' : 'text'}): - for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']): - if '{{' not in match: - result.add(match) + for match in P_INTERNAL_LINK_REF.findall(node['value']): + text = match[0] + link = match[1] + if link not in self.args.references: + result.add('"{0}"=>"{1}"'.format(text, link)) self.reporter.check(not result, self.filename, 'Internally-defined links may be missing definitions: {0}', @@ -424,6 +436,12 @@ class CheckIndex(CheckBase): super(CheckIndex, self).__init__(args, filename, metadata, metadata_len, text, lines, doc) self.layout = 'lesson' + def check_metadata(self): + super(CheckIndex, self).check_metadata() + self.reporter.check(self.metadata.get('root', '') == '.', + self.filename, + 'Root not set to "."') + class CheckEpisode(CheckBase): """Check an episode page.""" @@ -431,6 +449,14 @@ class CheckEpisode(CheckBase): def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc) + + def check(self): + """Run extra tests.""" + + super(CheckEpisode, self).check() + self.check_reference_inclusion() + + def check_metadata(self): super(CheckEpisode, self).check_metadata() if self.metadata: @@ -457,6 +483,26 @@ class CheckEpisode(CheckBase): name, type(self.metadata[name]), type_) + def check_reference_inclusion(self): + """Check that links file has been included.""" + + if not self.args.reference_path: + return + + for (i, last_line, line_len) in reversed(self.lines): + if last_line: + break + + require(last_line, + 'No non-empty lines in {0}'.format(self.filename)) + + include_filename = os.path.split(self.args.reference_path)[-1] + if include_filename not in last_line: + self.reporter.add(self.filename, + 'episode does not include "{0}"', + include_filename) + + class CheckReference(CheckBase): """Check the reference page."""