3 '''Check that a workshop's index.html metadata is valid. See the
4 docstrings on the checking functions for a summary of the checks.
7 from __future__ import print_function
11 from datetime import date
12 from util import Reporter, split_metadata, load_yaml, check_unwanted_files
14 # Metadata field patterns.
15 EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
16 HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
17 EVENTBRITE_PATTERN = r'\d{9,10}'
18 URL_PATTERN = r'https?://.+'
21 CARPENTRIES = ("dc", "swc")
22 DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org'
24 USAGE = 'Usage: "check-workshop path/to/root/directory"'
26 # Country and language codes. Note that codes mean different things: 'ar'
27 # is 'Arabic' as a language but 'Argentina' as a country.
30 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
31 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
32 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
33 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
34 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
35 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
36 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
37 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
38 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
39 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
40 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
41 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
42 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
43 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
44 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
45 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
46 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
47 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
48 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
49 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
50 'ye', 'yt', 'za', 'zm', 'zw'
54 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
55 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
56 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
57 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
58 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
59 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
60 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
61 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
62 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
63 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
64 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
65 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
66 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
67 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
68 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
69 'yo', 'za', 'zh', 'zu'
73 def look_for_fixme(func):
74 """Decorator to fail test if text argument starts with "FIXME"."""
77 if (arg is not None) and \
78 isinstance(arg, str) and \
79 arg.lstrip().startswith('FIXME'):
86 def check_layout(layout):
87 '''"layout" in YAML header must be "workshop".'''
89 return layout == 'workshop'
93 def check_carpentry(layout):
94 '''"carpentry" in YAML header must be "dc" or "swc".'''
96 return layout in CARPENTRIES
100 def check_country(country):
101 '''"country" must be a lowercase ISO-3166 two-letter code.'''
103 return country in ISO_COUNTRY
107 def check_language(language):
108 '''"language" must be a lowercase ISO-639 two-letter code.'''
110 return language in ISO_LANGUAGE
114 def check_humandate(date):
116 'humandate' must be a human-readable date with a 3-letter month
117 and 4-digit year. Examples include 'Feb 18-20, 2025' and 'Feb 18
118 and 20, 2025'. It may be in languages other than English, but the
119 month name should be kept short to aid formatting of the main
120 Software Carpentry web site.
126 month_dates, year = date.split(',')
128 # The first three characters of month_dates are not empty
129 month = month_dates[:3]
130 if any(char == ' ' for char in month):
133 # But the fourth character is empty ("February" is illegal)
134 if month_dates[3] != ' ':
137 # year contains *only* numbers
147 def check_humantime(time):
149 'humantime' is a human-readable start and end time for the
150 workshop, such as '09:00 - 16:00'.
153 return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', '')))
156 def check_date(this_date):
158 'startdate' and 'enddate' are machine-readable start and end dates
159 for the workshop, and must be in YYYY-MM-DD format, e.g.,
163 # YAML automatically loads valid dates as datetime.date.
164 return isinstance(this_date, date)
168 def check_latitude_longitude(latlng):
170 'latlng' must be a valid latitude and longitude represented as two
171 floating-point numbers separated by a comma.
175 lat, lng = latlng.split(',')
178 return (-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0)
183 def check_instructors(instructors):
185 'instructor' must be a non-empty comma-separated list of quoted
186 names, e.g. ['First name', 'Second name', ...']. Do not use 'TBD'
187 or other placeholders.
190 # YAML automatically loads list-like strings as lists.
191 return isinstance(instructors, list) and len(instructors) > 0
194 def check_helpers(helpers):
196 'helper' must be a comma-separated list of quoted names,
197 e.g. ['First name', 'Second name', ...']. The list may be empty.
198 Do not use 'TBD' or other placeholders.
201 # YAML automatically loads list-like strings as lists.
202 return isinstance(helpers, list) and len(helpers) >= 0
206 def check_email(email):
208 'contact' must be a valid email address consisting of characters,
209 an '@', and more characters. It should not be the default contact
210 email address 'admin@software-carpentry.org'.
213 return bool(re.match(EMAIL_PATTERN, email)) and \
214 (email != DEFAULT_CONTACT_EMAIL)
217 def check_eventbrite(eventbrite):
219 'eventbrite' (the Eventbrite registration key) must be 9 or more
220 digits. It may appear as an integer or as a string.
223 if isinstance(eventbrite, int):
226 return bool(re.match(EVENTBRITE_PATTERN, eventbrite))
230 def check_etherpad(etherpad):
232 'etherpad' must be a valid URL.
235 return bool(re.match(URL_PATTERN, etherpad))
239 def check_pass(value):
241 This test always passes (it is used for 'checking' things like the
242 workshop address, for which no sensible validation is feasible).
249 'layout': (True, check_layout, 'layout isn\'t "workshop"'),
251 'carpentry': (True, check_carpentry, 'carpentry isn\'t in ' +
252 ', '.join(CARPENTRIES)),
254 'country': (True, check_country,
255 'country invalid: must use lowercase two-letter ISO code ' +
256 'from ' + ', '.join(ISO_COUNTRY)),
258 'language': (False, check_language,
259 'language invalid: must use lowercase two-letter ISO code' +
260 ' from ' + ', '.join(ISO_LANGUAGE)),
262 'humandate': (True, check_humandate,
263 'humandate invalid. Please use three-letter months like ' +
264 '"Jan" and four-letter years like "2025"'),
266 'humantime': (True, check_humantime,
267 'humantime doesn\'t include numbers'),
269 'startdate': (True, check_date,
270 'startdate invalid. Must be of format year-month-day, ' +
273 'enddate': (False, check_date,
274 'enddate invalid. Must be of format year-month-day, i.e.,' +
277 'latlng': (True, check_latitude_longitude,
278 'latlng invalid. Check that it is two floating point ' +
279 'numbers, separated by a comma'),
281 'instructor': (True, check_instructors,
282 'instructor list isn\'t a valid list of format ' +
283 '["First instructor", "Second instructor",..]'),
285 'helper': (True, check_helpers,
286 'helper list isn\'t a valid list of format ' +
287 '["First helper", "Second helper",..]'),
289 'contact': (True, check_email,
290 'contact email invalid or still set to ' +
291 '"{0}".'.format(DEFAULT_CONTACT_EMAIL)),
293 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'),
295 'etherpad': (False, check_etherpad, 'Etherpad URL appears invalid'),
297 'venue': (False, check_pass, 'venue name not specified'),
299 'address': (False, check_pass, 'address not specified')
302 # REQUIRED is all required categories.
303 REQUIRED = set([k for k in HANDLERS if HANDLERS[k][0]])
305 # OPTIONAL is all optional categories.
306 OPTIONAL = set([k for k in HANDLERS if not HANDLERS[k][0]])
309 def check_blank_lines(reporter, raw):
311 Blank lines are not allowed in category headers.
314 lines = [(i, x) for (i, x) in enumerate(raw.strip().split('\n')) if not x.strip()]
315 reporter.check(not lines,
317 'Blank line(s) in header: {0}',
318 ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines]))
321 def check_categories(reporter, left, right, msg):
323 Report differences (if any) between two sets of categories.
327 reporter.check(len(diff) == 0,
329 '{0}: offending entries {1}',
330 msg, sorted(list(diff)))
333 def check_file(reporter, path, data):
335 Get header from file, call all other functions, and check file for
339 # Get metadata as text and as YAML.
340 raw, header, body = split_metadata(path, data)
342 # Do we have any blank lines in the header?
343 check_blank_lines(reporter, raw)
345 # Look through all header entries. If the category is in the input
346 # file and is either required or we have actual data (as opposed to
347 # a commented-out entry), we check it. If it *isn't* in the header
348 # but is required, report an error.
349 for category in HANDLERS:
350 required, handler, message = HANDLERS[category]
351 if category in header:
352 if required or header[category]:
353 reporter.check(handler(header[category]),
355 '{0}\n actual value "{1}"',
356 message, header[category])
359 'Missing mandatory key "{0}"',
362 # Check whether we have missing or too many categories
363 seen_categories = set(header.keys())
364 check_categories(reporter, REQUIRED, seen_categories,
365 'Missing categories')
366 check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL),
367 'Superfluous categories')
370 def check_config(reporter, filename):
372 Check YAML configuration file.
375 config = load_yaml(filename)
377 kind = config.get('kind', None)
378 reporter.check(kind == 'workshop',
380 'Missing or unknown kind of event: {0}',
383 carpentry = config.get('carpentry', None)
384 reporter.check(carpentry in ('swc', 'dc'),
386 'Missing or unknown carpentry: {0}',
391 '''Run as the main program.'''
393 if len(sys.argv) != 2:
394 print(USAGE, file=sys.stderr)
397 root_dir = sys.argv[1]
398 index_file = os.path.join(root_dir, 'index.html')
399 config_file = os.path.join(root_dir, '_config.yml')
401 reporter = Reporter()
402 check_config(reporter, config_file)
403 check_unwanted_files(root_dir, reporter)
404 with open(index_file) as reader:
406 check_file(reporter, index_file, data)
410 if __name__ == '__main__':