Checking consolidated links
authorGreg Wilson <gvwilson@third-bit.com>
Sun, 15 Jan 2017 20:02:34 +0000 (15:02 -0500)
committerGreg Wilson <gvwilson@third-bit.com>
Sun, 15 Jan 2017 20:02:34 +0000 (15:02 -0500)
Makefile
bin/lesson_check.py

index 457de1ee85a2dc08129560ead9296d973d03be66..b5dfe2fa455b72c0bd029122cdc129a71cbd5d66 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -89,7 +89,7 @@ ${RMD_DST} : ${RMD_SRC}
 
 ## lesson-check     : validate lesson Markdown.
 lesson-check :
-       @bin/lesson_check.py -s . -p ${PARSER}
+       @bin/lesson_check.py -s . -p ${PARSER} -r _includes/links.md
 
 ## lesson-check-all : validate lesson Markdown, checking line lengths and trailing whitespace.
 lesson-check-all :
index a594f2f47c9a3fc0e27fe7452e399aefb9b2595f..8244222b5bd30e80516666d1dde76756c9c9300a 100755 (executable)
@@ -14,7 +14,7 @@ from optparse import OptionParser
 
 from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX
 
-__version__ = '0.2'
+__version__ = '0.3'
 
 # Where to look for source Markdown files.
 SOURCE_DIRS = ['', '_episodes', '_extras']
@@ -48,7 +48,10 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$')
 P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
 
 # Pattern to match internally-defined Markdown links.
-P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]')
+P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]')
+
+# Pattern to match reference links (to resolve internally-defined references).
+P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)')
 
 # What kinds of blockquotes are allowed?
 KNOWN_BLOCKQUOTES = {
@@ -103,6 +106,8 @@ def main():
     args = parse_args()
     args.reporter = Reporter()
     check_config(args.reporter, args.source_dir)
+    args.references = read_references(args.reporter, args.reference_path)
+
     docs = read_all_markdown(args.source_dir, args.parser)
     check_fileset(args.source_dir, args.reporter, docs.keys())
     check_unwanted_files(args.source_dir, args.reporter)
@@ -110,6 +115,7 @@ def main():
         checker = create_checker(args, filename, docs[filename])
         checker.check()
     check_figures(args.source_dir, args.reporter)
+
     args.reporter.report()
 
 
@@ -126,6 +132,10 @@ def parse_args():
                       default=None,
                       dest='parser',
                       help='path to Markdown parser')
+    parser.add_option('-r', '--references',
+                      default=None,
+                      dest='reference_path',
+                      help='path to Markdown file of external references')
     parser.add_option('-s', '--source',
                       default=os.curdir,
                       dest='source_dir',
@@ -160,6 +170,37 @@ def check_config(reporter, source_dir):
                    '"root" not set to ".." in configuration')
 
 
+def read_references(reporter, ref_path):
+    """Read shared file of reference links, returning dictionary of valid references
+    {symbolic_name : URL}
+    """
+
+    result = {}
+    urls_seen = set()
+    if ref_path:
+        with open(ref_path, 'r') as reader:
+            for (num, line) in enumerate(reader):
+                line_num = num + 1
+                m = P_INTERNAL_LINK_DEF.search(line)
+                require(m,
+                        '{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip()))
+                name = m.group(1)
+                url = m.group(2)
+                require(name,
+                        'Empty reference at {0}:{1}'.format(ref_path, line_num))
+                reporter.check(name not in result,
+                               ref_path,
+                               'Duplicate reference {0} at line {1}',
+                               name, line_num)
+                reporter.check(url not in urls_seen,
+                               ref_path,
+                               'Duplicate definition of URL {0} at line {1}',
+                               url, line_num)
+                result[name] = url
+                urls_seen.add(url)
+    return result
+
+
 def read_all_markdown(source_dir, parser):
     """Read source files, returning
     {path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}}
@@ -274,7 +315,7 @@ class CheckBase(object):
 
 
     def check(self):
-        """Run tests on metadata."""
+        """Run tests."""
 
         self.check_metadata()
         self.check_line_lengths()
@@ -342,17 +383,16 @@ class CheckBase(object):
     def check_defined_link_references(self):
         """Check that defined links resolve in the file.
 
-        Internally-defined links match the pattern [text][label].  If
-        the label contains '{{...}}', it is hopefully a references to
-        a configuration value - we should check that, but don't right
-        now.
+        Internally-defined links match the pattern [text][label].
         """
 
         result = set()
         for node in self.find_all(self.doc, {'type' : 'text'}):
-            for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']):
-                if '{{' not in match:
-                    result.add(match)
+            for match in P_INTERNAL_LINK_REF.findall(node['value']):
+                text = match[0]
+                link = match[1]
+                if link not in self.args.references:
+                    result.add('"{0}"=>"{1}"'.format(text, link))
         self.reporter.check(not result,
                             self.filename,
                             'Internally-defined links may be missing definitions: {0}',
@@ -441,6 +481,14 @@ class CheckEpisode(CheckBase):
     def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
         super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
 
+
+    def check(self):
+        """Run extra tests."""
+
+        super(CheckEpisode, self).check()
+        self.check_reference_inclusion()
+
+
     def check_metadata(self):
         super(CheckEpisode, self).check_metadata()
         if self.metadata:
@@ -467,6 +515,26 @@ class CheckEpisode(CheckBase):
                                   name, type(self.metadata[name]), type_)
 
 
+    def check_reference_inclusion(self):
+        """Check that links file has been included."""
+
+        if not self.args.reference_path:
+            return
+
+        for (i, last_line, line_len) in reversed(self.lines):
+            if last_line:
+                break
+
+        require(last_line,
+                'No non-empty lines in {0}'.format(self.filename))
+
+        include_filename = os.path.split(self.args.reference_path)[-1]
+        if include_filename not in last_line:
+            self.reporter.add(self.filename,
+                              'episode does not include "{0}"',
+                              include_filename)
+
+
 class CheckReference(CheckBase):
     """Check the reference page."""