3 '''Check that a workshop's index.html metadata is valid. See the
4 docstrings on the checking functions for a summary of the checks.
11 from datetime import date
12 from util import Reporter, split_metadata, load_yaml, check_unwanted_files
14 # Metadata field patterns.
15 EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
16 HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
17 EVENTBRITE_PATTERN = r'\d{9,10}'
18 URL_PATTERN = r'https?://.+'
21 CARPENTRIES = ("dc", "swc", "lc", "cp")
22 DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org'
24 USAGE = 'Usage: "workshop_check.py path/to/root/directory"'
26 # Country and language codes. Note that codes mean different things: 'ar'
27 # is 'Arabic' as a language but 'Argentina' as a country.
30 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
31 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
32 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
33 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
34 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
35 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
36 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
37 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
38 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
39 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
40 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
41 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
42 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
43 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
44 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
45 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
46 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
47 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
48 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
49 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
50 'ye', 'yt', 'za', 'zm', 'zw'
54 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
55 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
56 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
57 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
58 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
59 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
60 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
61 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
62 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
63 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
64 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
65 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
66 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
67 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
68 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
69 'yo', 'za', 'zh', 'zu'
73 def look_for_fixme(func):
74 """Decorator to fail test if text argument starts with "FIXME"."""
77 if (arg is not None) and \
78 isinstance(arg, str) and \
79 arg.lstrip().startswith('FIXME'):
86 def check_layout(layout):
87 '''"layout" in YAML header must be "workshop".'''
89 return layout == 'workshop'
93 def check_carpentry(layout):
94 '''"carpentry" in YAML header must be "dc", "swc", "lc", or "cp".'''
96 return layout in CARPENTRIES
100 def check_country(country):
101 '''"country" must be a lowercase ISO-3166 two-letter code.'''
103 return country in ISO_COUNTRY
107 def check_language(language):
108 '''"language" must be a lowercase ISO-639 two-letter code.'''
110 return language in ISO_LANGUAGE
114 def check_humandate(date):
116 'humandate' must be a human-readable date with a 3-letter month
117 and 4-digit year. Examples include 'Feb 18-20, 2025' and 'Feb 18
118 and 20, 2025'. It may be in languages other than English, but the
119 month name should be kept short to aid formatting of the main
120 Carpentries web site.
126 month_dates, year = date.split(',')
128 # The first three characters of month_dates are not empty
129 month = month_dates[:3]
130 if any(char == ' ' for char in month):
133 # But the fourth character is empty ("February" is illegal)
134 if month_dates[3] != ' ':
137 # year contains *only* numbers
147 def check_humantime(time):
149 'humantime' is a human-readable start and end time for the
150 workshop, such as '09:00 - 16:00'.
153 return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', '')))
156 def check_date(this_date):
158 'startdate' and 'enddate' are machine-readable start and end dates
159 for the workshop, and must be in YYYY-MM-DD format, e.g.,
163 # YAML automatically loads valid dates as datetime.date.
164 return isinstance(this_date, date)
168 def check_latitude_longitude(latlng):
170 'latlng' must be a valid latitude and longitude represented as two
171 floating-point numbers separated by a comma.
175 lat, lng = latlng.split(',')
178 return (-90.0 <= lat <= 90.0) and (-180.0 <= lng <= 180.0)
183 def check_instructors(instructors):
185 'instructor' must be a non-empty comma-separated list of quoted
186 names, e.g. ['First name', 'Second name', ...']. Do not use 'TBD'
187 or other placeholders.
190 # YAML automatically loads list-like strings as lists.
191 return isinstance(instructors, list) and len(instructors) > 0
194 def check_helpers(helpers):
196 'helper' must be a comma-separated list of quoted names,
197 e.g. ['First name', 'Second name', ...']. The list may be empty.
198 Do not use 'TBD' or other placeholders.
201 # YAML automatically loads list-like strings as lists.
202 return isinstance(helpers, list) and len(helpers) >= 0
206 def check_emails(emails):
208 'emails' must be a comma-separated list of valid email addresses.
209 The list may be empty. A valid email address consists of characters,
210 an '@', and more characters. It should not contain the default contact
213 # YAML automatically loads list-like strings as lists.
214 if (isinstance(emails, list) and len(emails) >= 0):
216 if ((not bool(re.match(EMAIL_PATTERN, email))) or (email == DEFAULT_CONTACT_EMAIL)):
224 def check_eventbrite(eventbrite):
226 'eventbrite' (the Eventbrite registration key) must be 9 or more
227 digits. It may appear as an integer or as a string.
230 if isinstance(eventbrite, int):
233 return bool(re.match(EVENTBRITE_PATTERN, eventbrite))
237 def check_collaborative_notes(collaborative_notes):
239 'collaborative_notes' must be a valid URL.
242 return bool(re.match(URL_PATTERN, collaborative_notes))
246 def check_pass(value):
248 This test always passes (it is used for 'checking' things like the
249 workshop address, for which no sensible validation is feasible).
256 'layout': (True, check_layout, 'layout isn\'t "workshop"'),
258 'carpentry': (True, check_carpentry, 'carpentry isn\'t in ' +
259 ', '.join(CARPENTRIES)),
261 'country': (True, check_country,
262 'country invalid: must use lowercase two-letter ISO code ' +
263 'from ' + ', '.join(ISO_COUNTRY)),
265 'language': (False, check_language,
266 'language invalid: must use lowercase two-letter ISO code' +
267 ' from ' + ', '.join(ISO_LANGUAGE)),
269 'humandate': (True, check_humandate,
270 'humandate invalid. Please use three-letter months like ' +
271 '"Jan" and four-letter years like "2025"'),
273 'humantime': (True, check_humantime,
274 'humantime doesn\'t include numbers'),
276 'startdate': (True, check_date,
277 'startdate invalid. Must be of format year-month-day, ' +
280 'enddate': (False, check_date,
281 'enddate invalid. Must be of format year-month-day, i.e.,' +
284 'latlng': (True, check_latitude_longitude,
285 'latlng invalid. Check that it is two floating point ' +
286 'numbers, separated by a comma'),
288 'instructor': (True, check_instructors,
289 'instructor list isn\'t a valid list of format ' +
290 '["First instructor", "Second instructor",..]'),
292 'helper': (True, check_helpers,
293 'helper list isn\'t a valid list of format ' +
294 '["First helper", "Second helper",..]'),
296 'email': (True, check_emails,
297 'contact email list isn\'t a valid list of format ' +
298 '["me@example.org", "you@example.org",..] or contains incorrectly formatted email addresses or ' +
299 '"{0}".'.format(DEFAULT_CONTACT_EMAIL)),
301 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'),
303 'collaborative_notes': (False, check_collaborative_notes, 'Collaborative Notes URL appears invalid'),
305 'venue': (False, check_pass, 'venue name not specified'),
307 'address': (False, check_pass, 'address not specified')
310 # REQUIRED is all required categories.
311 REQUIRED = {k for k in HANDLERS if HANDLERS[k][0]}
313 # OPTIONAL is all optional categories.
314 OPTIONAL = {k for k in HANDLERS if not HANDLERS[k][0]}
317 def check_blank_lines(reporter, raw):
319 Blank lines are not allowed in category headers.
322 lines = [(i, x) for (i, x) in enumerate(
323 raw.strip().split('\n')) if not x.strip()]
324 reporter.check(not lines,
326 'Blank line(s) in header: {0}',
327 ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines]))
330 def check_categories(reporter, left, right, msg):
332 Report differences (if any) between two sets of categories.
336 reporter.check(len(diff) == 0,
338 '{0}: offending entries {1}',
339 msg, sorted(list(diff)))
342 def check_file(reporter, path, data):
344 Get header from file, call all other functions, and check file for
348 # Get metadata as text and as YAML.
349 raw, header, body = split_metadata(path, data)
351 # Do we have any blank lines in the header?
352 check_blank_lines(reporter, raw)
354 # Look through all header entries. If the category is in the input
355 # file and is either required or we have actual data (as opposed to
356 # a commented-out entry), we check it. If it *isn't* in the header
357 # but is required, report an error.
358 for category in HANDLERS:
359 required, handler, message = HANDLERS[category]
360 if category in header:
361 if required or header[category]:
362 reporter.check(handler(header[category]),
364 '{0}\n actual value "{1}"',
365 message, header[category])
368 'Missing mandatory key "{0}"',
371 # Check whether we have missing or too many categories
372 seen_categories = set(header.keys())
373 check_categories(reporter, REQUIRED, seen_categories,
374 'Missing categories')
375 check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL),
376 'Superfluous categories')
379 def check_config(reporter, filename):
381 Check YAML configuration file.
384 config = load_yaml(filename)
386 kind = config.get('kind', None)
387 reporter.check(kind == 'workshop',
389 'Missing or unknown kind of event: {0}',
392 carpentry = config.get('carpentry', None)
393 reporter.check(carpentry in ('swc', 'dc', 'lc', 'cp'),
395 'Missing or unknown carpentry: {0}',
400 '''Run as the main program.'''
402 if len(sys.argv) != 2:
403 print(USAGE, file=sys.stderr)
406 root_dir = sys.argv[1]
407 index_file = os.path.join(root_dir, 'index.html')
408 config_file = os.path.join(root_dir, '_config.yml')
410 reporter = Reporter()
411 check_config(reporter, config_file)
412 check_unwanted_files(root_dir, reporter)
413 with open(index_file) as reader:
415 check_file(reporter, index_file, data)
419 if __name__ == '__main__':