Checking that internally-defined links resolve.
authorGreg Wilson <gvwilson@third-bit.com>
Sun, 24 Jul 2016 13:12:20 +0000 (09:12 -0400)
committerGreg Wilson <gvwilson@third-bit.com>
Sun, 24 Jul 2016 13:12:20 +0000 (09:12 -0400)
An internally-defined Markdown link has the form [xxx][yyy].  If the
ID 'yyy' doesn't resolve, the text is left as-is, so we check for
that, then subtract those that reference configuration values using
'{{'.  It's a hack, but it'll catch at least a few things.

bin/lesson_check.py

index ace8de6943c9a5552827f68952ed2d2eda0a72fc..7bc09a361c1729a8a3210ea9f31690320262ca8d 100755 (executable)
@@ -47,6 +47,9 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$')
 # Pattern to match figure references in HTML.
 P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
 
+# Pattern to match internally-defined Markdown links.
+P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]')
+
 # What kinds of blockquotes are allowed?
 KNOWN_BLOCKQUOTES = {
     'callout',
@@ -274,6 +277,7 @@ class CheckBase(object):
         self.check_trailing_whitespace()
         self.check_blockquote_classes()
         self.check_codeblock_classes()
+        self.check_defined_link_references()
 
 
     def check_metadata(self):
@@ -331,6 +335,26 @@ class CheckBase(object):
                                 cls)
 
 
+    def check_defined_link_references(self):
+        """Check that defined links resolve in the file.
+
+        Internally-defined links match the pattern [text][label].  If
+        the label contains '{{...}}', it is hopefully a references to
+        a configuration value - we should check that, but don't right
+        now.
+        """
+
+        result = set()
+        for node in self.find_all(self.doc, {'type' : 'text'}):
+            for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']):
+                if '{{' not in match:
+                    result.add(match)
+        self.reporter.check(not result,
+                            self.filename,
+                            'Internally-defined links may be missing definitions: {0}',
+                            ', '.join(sorted(result)))
+
+
     def find_all(self, node, pattern, accum=None):
         """Find all matches for a pattern."""