X-Git-Url: https://git.arvados.org/rnaseq-cwl-training.git/blobdiff_plain/7fdc6f897104fc6e46a8ba17af25abbbaa2cb749..efa2cc93ffcce0bb335a8f2e5121c67c146fcb8c:/bin/lesson_check.py diff --git a/bin/lesson_check.py b/bin/lesson_check.py old mode 100755 new mode 100644 index 240b50a..1fd7118 --- a/bin/lesson_check.py +++ b/bin/lesson_check.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - """ Check lesson files and their contents. """ @@ -18,6 +16,9 @@ __version__ = '0.3' # Where to look for source Markdown files. SOURCE_DIRS = ['', '_episodes', '_extras'] +# Where to look for source Rmd files. +SOURCE_RMD_DIRS = ['_episodes_rmd'] + # Required files: each entry is ('path': YAML_required). # FIXME: We do not yet validate whether any files have the required # YAML headers, but should in the future. @@ -26,19 +27,19 @@ SOURCE_DIRS = ['', '_episodes', '_extras'] # specially. This list must include all the Markdown files listed in the # 'bin/initialize' script. REQUIRED_FILES = { - '%/CONDUCT.md': True, - '%/CONTRIBUTING.md': False, - '%/LICENSE.md': True, - '%/README.md': False, - '%/_extras/discuss.md': True, - '%/_extras/guide.md': True, - '%/index.md': True, - '%/reference.md': True, - '%/setup.md': True, + 'CODE_OF_CONDUCT.md': True, + 'CONTRIBUTING.md': False, + 'LICENSE.md': True, + 'README.md': False, + os.path.join('_extras', 'discuss.md'): True, + os.path.join('_extras', 'guide.md'): True, + 'index.md': True, + 'reference.md': True, + 'setup.md': True, } # Episode filename pattern. -P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$') +P_EPISODE_FILENAME = re.compile(r'(\d\d)-[-\w]+.md$') # Pattern to match lines ending with whitespace. P_TRAILING_WHITESPACE = re.compile(r'\s+$') @@ -52,6 +53,9 @@ P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]') # Pattern to match reference links (to resolve internally-defined references). P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)') +# Pattern to match {% include ... %} statements +P_INTERNAL_INCLUDE_LINK = re.compile(r'^{% include ([^ ]*) %}$') + # What kinds of blockquotes are allowed? KNOWN_BLOCKQUOTES = { 'callout', @@ -63,7 +67,8 @@ KNOWN_BLOCKQUOTES = { 'prereq', 'quotation', 'solution', - 'testimonial' + 'testimonial', + 'warning' } # What kinds of code fragments are allowed? @@ -73,12 +78,16 @@ KNOWN_CODEBLOCKS = { 'source', 'language-bash', 'html', + 'language-c', + 'language-cmake', + 'language-cpp', 'language-make', 'language-matlab', 'language-python', 'language-r', 'language-shell', - 'language-sql' + 'language-sql', + 'warning' } # What fields are required in teaching episode metadata? @@ -99,6 +108,7 @@ BREAK_METADATA_FIELDS = { } # How long are lines allowed to be? +# Please keep this in sync with .editorconfig! MAX_LINE_LEN = 100 @@ -108,6 +118,7 @@ def main(): args = parse_args() args.reporter = Reporter() check_config(args.reporter, args.source_dir) + check_source_rmd(args.reporter, args.source_dir, args.parser) args.references = read_references(args.reporter, args.reference_path) docs = read_all_markdown(args.source_dir, args.parser) @@ -171,43 +182,76 @@ def check_config(reporter, source_dir): reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson') reporter.check_field(config_file, 'configuration', - config, 'carpentry', ('swc', 'dc', 'lc')) + config, 'carpentry', ('swc', 'dc', 'lc', 'cp')) reporter.check_field(config_file, 'configuration', config, 'title') reporter.check_field(config_file, 'configuration', config, 'email') - reporter.check({'values': {'root': '..'}} in config.get('defaults', []), + for defaults in [ + {'values': {'root': '.', 'layout': 'page'}}, + {'values': {'root': '..', 'layout': 'episode'}, 'scope': {'type': 'episodes', 'path': ''}}, + {'values': {'root': '..', 'layout': 'page'}, 'scope': {'type': 'extras', 'path': ''}} + ]: + reporter.check(defaults in config.get('defaults', []), 'configuration', - '"root" not set to ".." in configuration') - + '"root" not set to "." in configuration') + +def check_source_rmd(reporter, source_dir, parser): + """Check that Rmd episode files include `source: Rmd`""" + + episode_rmd_dir = [os.path.join(source_dir, d) for d in SOURCE_RMD_DIRS] + episode_rmd_files = [os.path.join(d, '*.Rmd') for d in episode_rmd_dir] + results = {} + for pat in episode_rmd_files: + for f in glob.glob(pat): + data = read_markdown(parser, f) + dy = data['metadata'] + if dy: + reporter.check_field(f, 'episode_rmd', + dy, 'source', 'Rmd') def read_references(reporter, ref_path): """Read shared file of reference links, returning dictionary of valid references {symbolic_name : URL} """ + if not ref_path: + raise Warning("No filename has been provided.") + result = {} urls_seen = set() - if ref_path: - with open(ref_path, 'r') as reader: - for (num, line) in enumerate(reader): - line_num = num + 1 - m = P_INTERNAL_LINK_DEF.search(line) - require(m, - '{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip())) - name = m.group(1) - url = m.group(2) - require(name, - 'Empty reference at {0}:{1}'.format(ref_path, line_num)) - reporter.check(name not in result, - ref_path, - 'Duplicate reference {0} at line {1}', - name, line_num) - reporter.check(url not in urls_seen, - ref_path, - 'Duplicate definition of URL {0} at line {1}', - url, line_num) - result[name] = url - urls_seen.add(url) + + with open(ref_path, 'r', encoding='utf-8') as reader: + for (num, line) in enumerate(reader, 1): + + if P_INTERNAL_INCLUDE_LINK.search(line): continue + + m = P_INTERNAL_LINK_DEF.search(line) + + message = '{}: {} not a valid reference: {}' + require(m, message.format(ref_path, num, line.rstrip())) + + name = m.group(1) + url = m.group(2) + + message = 'Empty reference at {0}:{1}' + require(name, message.format(ref_path, num)) + + unique_name = name not in result + unique_url = url not in urls_seen + + reporter.check(unique_name, + ref_path, + 'Duplicate reference name {0} at line {1}', + name, num) + + reporter.check(unique_url, + ref_path, + 'Duplicate definition of URL {0} at line {1}', + url, num) + + result[name] = url + urls_seen.add(url) + return result @@ -231,7 +275,7 @@ def check_fileset(source_dir, reporter, filenames_present): """Are all required files present? Are extraneous files present?""" # Check files with predictable names. - required = [p.replace('%', source_dir) for p in REQUIRED_FILES] + required = [os.path.join(source_dir, p) for p in REQUIRED_FILES] missing = set(required) - set(filenames_present) for m in missing: reporter.add(None, 'Missing required file {0}', m) @@ -241,7 +285,10 @@ def check_fileset(source_dir, reporter, filenames_present): for filename in filenames_present: if '_episodes' not in filename: continue - m = P_EPISODE_FILENAME.search(filename) + + # split path to check episode name + base_name = os.path.basename(filename) + m = P_EPISODE_FILENAME.search(base_name) if m and m.group(1): seen.append(m.group(1)) else: @@ -271,15 +318,14 @@ def create_checker(args, filename, info): for (pat, cls) in CHECKERS: if pat.search(filename): return cls(args, filename, **info) + return NotImplemented - -class CheckBase(object): +class CheckBase: """Base class for checking Markdown files.""" def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): """Cache arguments for checking.""" - super(CheckBase, self).__init__() self.args = args self.reporter = self.args.reporter # for convenience self.filename = filename @@ -320,7 +366,7 @@ class CheckBase(object): n > MAX_LINE_LEN) and (not l.startswith('!'))] self.reporter.check(not over, self.filename, - 'Line(s) are too long: {0}', + 'Line(s) too long: {0}', ', '.join([str(i) for i in over])) def check_trailing_whitespace(self): @@ -399,7 +445,8 @@ class CheckBase(object): return False return True - def get_val(self, node, *chain): + @staticmethod + def get_val(node, *chain): """Get value one or more levels down.""" curr = node @@ -421,10 +468,6 @@ class CheckBase(object): class CheckNonJekyll(CheckBase): """Check a file that isn't translated by Jekyll.""" - def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): - super(CheckNonJekyll, self).__init__( - args, filename, metadata, metadata_len, text, lines, doc) - def check_metadata(self): self.reporter.check(self.metadata is None, self.filename, @@ -435,12 +478,11 @@ class CheckIndex(CheckBase): """Check the main index page.""" def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): - super(CheckIndex, self).__init__(args, filename, - metadata, metadata_len, text, lines, doc) + super().__init__(args, filename, metadata, metadata_len, text, lines, doc) self.layout = 'lesson' def check_metadata(self): - super(CheckIndex, self).check_metadata() + super().check_metadata() self.reporter.check(self.metadata.get('root', '') == '.', self.filename, 'Root not set to "."') @@ -449,18 +491,14 @@ class CheckIndex(CheckBase): class CheckEpisode(CheckBase): """Check an episode page.""" - def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): - super(CheckEpisode, self).__init__(args, filename, - metadata, metadata_len, text, lines, doc) - def check(self): """Run extra tests.""" - super(CheckEpisode, self).check() + super().check() self.check_reference_inclusion() def check_metadata(self): - super(CheckEpisode, self).check_metadata() + super().check_metadata() if self.metadata: if 'layout' in self.metadata: if self.metadata['layout'] == 'break': @@ -508,8 +546,7 @@ class CheckReference(CheckBase): """Check the reference page.""" def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): - super(CheckReference, self).__init__( - args, filename, metadata, metadata_len, text, lines, doc) + super().__init__(args, filename, metadata, metadata_len, text, lines, doc) self.layout = 'reference' @@ -517,9 +554,7 @@ class CheckGeneric(CheckBase): """Check a generic page.""" def __init__(self, args, filename, metadata, metadata_len, text, lines, doc): - super(CheckGeneric, self).__init__(args, filename, - metadata, metadata_len, text, lines, doc) - self.layout = 'page' + super().__init__(args, filename, metadata, metadata_len, text, lines, doc) CHECKERS = [ @@ -527,7 +562,7 @@ CHECKERS = [ (re.compile(r'README\.md'), CheckNonJekyll), (re.compile(r'index\.md'), CheckIndex), (re.compile(r'reference\.md'), CheckReference), - (re.compile(r'_episodes/.*\.md'), CheckEpisode), + (re.compile(os.path.join('_episodes', '*\.md')), CheckEpisode), (re.compile(r'.*\.md'), CheckGeneric) ]