X-Git-Url: https://git.arvados.org/rnaseq-cwl-training.git/blobdiff_plain/9c447b8e6ca87637b4ada4a6361c56d06cd8f0f8..a56ac2da1d6b7eb472212ae9c1d2c7057acf0f95:/bin/util.py diff --git a/bin/util.py b/bin/util.py index 23245be..92fd50d 100644 --- a/bin/util.py +++ b/bin/util.py @@ -1,24 +1,53 @@ import sys +import os import json -import yaml from subprocess import Popen, PIPE +# Import this way to produce a more useful error message. +try: + import yaml +except ImportError: + print('Unable to import YAML module: please install PyYAML', file=sys.stderr) + sys.exit(1) + + +# Things an image file's name can end with. +IMAGE_FILE_SUFFIX = { + '.gif', + '.jpg', + '.png', + '.svg' +} + +# Files that shouldn't be present. +UNWANTED_FILES = [ + '.nojekyll' +] + +# Marker to show that an expected value hasn't been provided. +# (Can't use 'None' because that might be a legitimate value.) +REPORTER_NOT_SET = [] class Reporter(object): """Collect and report errors.""" - def __init__(self, args): + def __init__(self): """Constructor.""" super(Reporter, self).__init__() self.messages = [] - def check_field(self, filename, name, values, key, expected): + def check_field(self, filename, name, values, key, expected=REPORTER_NOT_SET): """Check that a dictionary has an expected value.""" if key not in values: self.add(filename, '{0} does not contain {1}', name, key) + elif expected is REPORTER_NOT_SET: + pass + elif type(expected) in (tuple, set, list): + if values[key] not in expected: + self.add(filename, '{0} {1} value {2} is not in {3}', name, key, values[key], expected) elif values[key] != expected: self.add(filename, '{0} {1} is {2} not {3}', name, key, values[key], expected) @@ -51,27 +80,24 @@ class Reporter(object): if not self.messages: return - for m in self.messages: + for m in sorted(self.messages): print(m, file=stream) def read_markdown(parser, path): - """Get YAML and AST for Markdown file, returning {'metadata':yaml, 'text': text, 'doc':doc}.""" + """ + Get YAML and AST for Markdown file, returning + {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}. + """ # Split and extract YAML (if present). - metadata = None - metadata_len = None with open(path, 'r') as reader: body = reader.read() - pieces = body.split('---', 2) - if len(pieces) == 3: - try: - metadata = yaml.load(pieces[1]) - except yaml.YAMLError as e: - print('Unable to parse YAML header in {0}:\n{1}'.format(path, e)) - sys.exit(1) - metadata_len = pieces[1].count('\n') - body = pieces[2] + metadata_raw, metadata_yaml, body = split_metadata(path, body) + + # Split into lines. + metadata_len = 0 if metadata_raw is None else metadata_raw.count('\n') + lines = [(metadata_len+i+1, line, len(line)) for (i, line) in enumerate(body.split('\n'))] # Parse Markdown. cmd = 'ruby {0}'.format(parser) @@ -80,8 +106,66 @@ def read_markdown(parser, path): doc = json.loads(stdout_data) return { - 'metadata': metadata, + 'metadata': metadata_yaml, 'metadata_len': metadata_len, 'text': body, + 'lines': lines, 'doc': doc } + + +def split_metadata(path, text): + """ + Get raw (text) metadata, metadata as YAML, and rest of body. + If no metadata, return (None, None, body). + """ + + metadata_raw = None + metadata_yaml = None + metadata_len = None + + pieces = text.split('---', 2) + if len(pieces) == 3: + metadata_raw = pieces[1] + text = pieces[2] + try: + metadata_yaml = yaml.load(metadata_raw) + except yaml.YAMLError as e: + print('Unable to parse YAML header in {0}:\n{1}'.format(path, e), file=sys.stderr) + sys.exit(1) + + return metadata_raw, metadata_yaml, text + + +def load_yaml(filename): + """ + Wrapper around YAML loading so that 'import yaml' is only needed + in one file. + """ + + try: + with open(filename, 'r') as reader: + return yaml.load(reader) + except (yaml.YAMLError, FileNotFoundError) as e: + print('Unable to load YAML file {0}:\n{1}'.format(filename, e), file=sys.stderr) + sys.exit(1) + + +def check_unwanted_files(dir_path, reporter): + """ + Check that unwanted files are not present. + """ + + for filename in UNWANTED_FILES: + path = os.path.join(dir_path, filename) + reporter.check(not os.path.exists(path), + path, + "Unwanted file found") + + +def require(condition, message): + """Fail if condition not met.""" + + if not condition: + print(message, file=sys.stderr) + sys.exit(1)