1 '''Check that a workshop's index.html metadata is valid. See the
2 docstrings on the checking functions for a summary of the checks.
9 from datetime import date
10 from util import Reporter, split_metadata, load_yaml, check_unwanted_files
12 # Metadata field patterns.
13 EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
14 HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
15 EVENTBRITE_PATTERN = r'\d{9,10}'
16 URL_PATTERN = r'https?://.+'
19 CARPENTRIES = ("dc", "swc", "lc", "cp")
20 DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org'
22 USAGE = 'Usage: "workshop_check.py path/to/root/directory"'
24 # Country and language codes. Note that codes mean different things: 'ar'
25 # is 'Arabic' as a language but 'Argentina' as a country.
28 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
29 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
30 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
31 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
32 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
33 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
34 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
35 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
36 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
37 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
38 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
39 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
40 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
41 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
42 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
43 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
44 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
45 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
46 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
47 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
48 'ye', 'yt', 'za', 'zm', 'zw'
52 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
53 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
54 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
55 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
56 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
57 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
58 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
59 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
60 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
61 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
62 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
63 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
64 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
65 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
66 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
67 'yo', 'za', 'zh', 'zu'
71 def look_for_fixme(func):
72 """Decorator to fail test if text argument starts with "FIXME"."""
75 if (arg is not None) and \
76 isinstance(arg, str) and \
77 arg.lstrip().startswith('FIXME'):
84 def check_layout(layout):
85 '''"layout" in YAML header must be "workshop".'''
87 return layout == 'workshop'
91 def check_carpentry(layout):
92 '''"carpentry" in YAML header must be "dc", "swc", "lc", or "cp".'''
94 return layout in CARPENTRIES
98 def check_country(country):
99 '''"country" must be a lowercase ISO-3166 two-letter code.'''
101 return country in ISO_COUNTRY
105 def check_language(language):
106 '''"language" must be a lowercase ISO-639 two-letter code.'''
108 return language in ISO_LANGUAGE
112 def check_humandate(date):
114 'humandate' must be a human-readable date with a 3-letter month
115 and 4-digit year. Examples include 'Feb 18-20, 2025' and 'Feb 18
116 and 20, 2025'. It may be in languages other than English, but the
117 month name should be kept short to aid formatting of the main
118 Carpentries web site.
124 month_dates, year = date.split(',')
126 # The first three characters of month_dates are not empty
127 month = month_dates[:3]
128 if any(char == ' ' for char in month):
131 # But the fourth character is empty ("February" is illegal)
132 if month_dates[3] != ' ':
135 # year contains *only* numbers
145 def check_humantime(time):
147 'humantime' is a human-readable start and end time for the
148 workshop, such as '09:00 - 16:00'.
151 return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', '')))
154 def check_date(this_date):
156 'startdate' and 'enddate' are machine-readable start and end dates
157 for the workshop, and must be in YYYY-MM-DD format, e.g.,
161 # YAML automatically loads valid dates as datetime.date.
162 return isinstance(this_date, date)
166 def check_latitude_longitude(latlng):
168 'latlng' must be a valid latitude and longitude represented as two
169 floating-point numbers separated by a comma.
173 lat, lng = latlng.split(',')
176 return (-90.0 <= lat <= 90.0) and (-180.0 <= lng <= 180.0)
181 def check_instructors(instructors):
183 'instructor' must be a non-empty comma-separated list of quoted
184 names, e.g. ['First name', 'Second name', ...']. Do not use 'TBD'
185 or other placeholders.
188 # YAML automatically loads list-like strings as lists.
189 return isinstance(instructors, list) and len(instructors) > 0
192 def check_helpers(helpers):
194 'helper' must be a comma-separated list of quoted names,
195 e.g. ['First name', 'Second name', ...']. The list may be empty.
196 Do not use 'TBD' or other placeholders.
199 # YAML automatically loads list-like strings as lists.
200 return isinstance(helpers, list) and len(helpers) >= 0
204 def check_emails(emails):
206 'emails' must be a comma-separated list of valid email addresses.
207 The list may be empty. A valid email address consists of characters,
208 an '@', and more characters. It should not contain the default contact
211 # YAML automatically loads list-like strings as lists.
212 if (isinstance(emails, list) and len(emails) >= 0):
214 if ((not bool(re.match(EMAIL_PATTERN, email))) or (email == DEFAULT_CONTACT_EMAIL)):
222 def check_eventbrite(eventbrite):
224 'eventbrite' (the Eventbrite registration key) must be 9 or more
225 digits. It may appear as an integer or as a string.
228 if isinstance(eventbrite, int):
231 return bool(re.match(EVENTBRITE_PATTERN, eventbrite))
235 def check_collaborative_notes(collaborative_notes):
237 'collaborative_notes' must be a valid URL.
240 return bool(re.match(URL_PATTERN, collaborative_notes))
244 def check_pass(value):
246 This test always passes (it is used for 'checking' things like the
247 workshop address, for which no sensible validation is feasible).
254 'layout': (True, check_layout, 'layout isn\'t "workshop"'),
256 'carpentry': (True, check_carpentry, 'carpentry isn\'t in ' +
257 ', '.join(CARPENTRIES)),
259 'country': (True, check_country,
260 'country invalid: must use lowercase two-letter ISO code ' +
261 'from ' + ', '.join(ISO_COUNTRY)),
263 'language': (False, check_language,
264 'language invalid: must use lowercase two-letter ISO code' +
265 ' from ' + ', '.join(ISO_LANGUAGE)),
267 'humandate': (True, check_humandate,
268 'humandate invalid. Please use three-letter months like ' +
269 '"Jan" and four-letter years like "2025"'),
271 'humantime': (True, check_humantime,
272 'humantime doesn\'t include numbers'),
274 'startdate': (True, check_date,
275 'startdate invalid. Must be of format year-month-day, ' +
278 'enddate': (False, check_date,
279 'enddate invalid. Must be of format year-month-day, i.e.,' +
282 'latlng': (True, check_latitude_longitude,
283 'latlng invalid. Check that it is two floating point ' +
284 'numbers, separated by a comma'),
286 'instructor': (True, check_instructors,
287 'instructor list isn\'t a valid list of format ' +
288 '["First instructor", "Second instructor",..]'),
290 'helper': (True, check_helpers,
291 'helper list isn\'t a valid list of format ' +
292 '["First helper", "Second helper",..]'),
294 'email': (True, check_emails,
295 'contact email list isn\'t a valid list of format ' +
296 '["me@example.org", "you@example.org",..] or contains incorrectly formatted email addresses or ' +
297 '"{0}".'.format(DEFAULT_CONTACT_EMAIL)),
299 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'),
301 'collaborative_notes': (False, check_collaborative_notes, 'Collaborative Notes URL appears invalid'),
303 'venue': (False, check_pass, 'venue name not specified'),
305 'address': (False, check_pass, 'address not specified')
308 # REQUIRED is all required categories.
309 REQUIRED = {k for k in HANDLERS if HANDLERS[k][0]}
311 # OPTIONAL is all optional categories.
312 OPTIONAL = {k for k in HANDLERS if not HANDLERS[k][0]}
315 def check_blank_lines(reporter, raw):
317 Blank lines are not allowed in category headers.
320 lines = [(i, x) for (i, x) in enumerate(
321 raw.strip().split('\n')) if not x.strip()]
322 reporter.check(not lines,
324 'Blank line(s) in header: {0}',
325 ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines]))
328 def check_categories(reporter, left, right, msg):
330 Report differences (if any) between two sets of categories.
334 reporter.check(len(diff) == 0,
336 '{0}: offending entries {1}',
337 msg, sorted(list(diff)))
340 def check_file(reporter, path, data):
342 Get header from file, call all other functions, and check file for
346 # Get metadata as text and as YAML.
347 raw, header, body = split_metadata(path, data)
349 # Do we have any blank lines in the header?
350 check_blank_lines(reporter, raw)
352 # Look through all header entries. If the category is in the input
353 # file and is either required or we have actual data (as opposed to
354 # a commented-out entry), we check it. If it *isn't* in the header
355 # but is required, report an error.
356 for category in HANDLERS:
357 required, handler, message = HANDLERS[category]
358 if category in header:
359 if required or header[category]:
360 reporter.check(handler(header[category]),
362 '{0}\n actual value "{1}"',
363 message, header[category])
366 'Missing mandatory key "{0}"',
369 # Check whether we have missing or too many categories
370 seen_categories = set(header.keys())
371 check_categories(reporter, REQUIRED, seen_categories,
372 'Missing categories')
373 check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL),
374 'Superfluous categories')
377 def check_config(reporter, filename):
379 Check YAML configuration file.
382 config = load_yaml(filename)
384 kind = config.get('kind', None)
385 reporter.check(kind == 'workshop',
387 'Missing or unknown kind of event: {0}',
390 carpentry = config.get('carpentry', None)
391 reporter.check(carpentry in ('swc', 'dc', 'lc', 'cp'),
393 'Missing or unknown carpentry: {0}',
398 '''Run as the main program.'''
400 if len(sys.argv) != 2:
401 print(USAGE, file=sys.stderr)
404 root_dir = sys.argv[1]
405 index_file = os.path.join(root_dir, 'index.html')
406 config_file = os.path.join(root_dir, '_config.yml')
408 reporter = Reporter()
409 check_config(reporter, config_file)
410 check_unwanted_files(root_dir, reporter)
411 with open(index_file) as reader:
413 check_file(reporter, index_file, data)
417 if __name__ == '__main__':