3 '''Check that a workshop's index.html metadata is valid. See the
4 docstrings on the checking functions for a summary of the checks.
11 from datetime import date
12 from util import Reporter, split_metadata
15 # Metadata field patterns.
16 EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
17 HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
18 EVENTBRITE_PATTERN = r'\d{9,10}'
19 URL_PATTERN = r'https?://.+'
22 CARPENTRIES = ("dc", "swc")
23 DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org'
25 USAGE = 'Usage: "check-workshop path/to/root/directory"'
27 # Country and language codes. Note that codes mean different things: 'ar'
28 # is 'Arabic' as a language but 'Argentina' as a country.
31 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
32 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
33 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
34 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
35 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
36 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
37 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
38 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
39 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
40 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
41 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
42 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
43 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
44 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
45 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
46 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
47 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
48 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
49 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
50 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
51 'ye', 'yt', 'za', 'zm', 'zw'
55 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
56 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
57 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
58 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
59 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
60 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
61 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
62 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
63 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
64 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
65 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
66 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
67 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
68 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
69 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
70 'yo', 'za', 'zh', 'zu'
74 def look_for_fixme(func):
75 """Decorator to fail test if text argument starts with "FIXME"."""
78 if (arg is not None) and \
79 isinstance(arg, str) and \
80 arg.lstrip().startswith('FIXME'):
87 def check_layout(layout):
88 '''"layout" in YAML header must be "workshop".'''
90 return layout == 'workshop'
94 def check_carpentry(layout):
95 '''"carpentry" in YAML header must be "dc" or "swc".'''
97 return layout in CARPENTRIES
101 def check_country(country):
102 '''"country" must be a lowercase ISO-3166 two-letter code.'''
104 return country in ISO_COUNTRY
108 def check_language(language):
109 '''"language" must be a lowercase ISO-639 two-letter code.'''
111 return language in ISO_LANGUAGE
115 def check_humandate(date):
117 'humandate' must be a human-readable date with a 3-letter month
118 and 4-digit year. Examples include 'Feb 18-20, 2025' and 'Feb 18
119 and 20, 2025'. It may be in languages other than English, but the
120 month name should be kept short to aid formatting of the main
121 Software Carpentry web site.
127 month_dates, year = date.split(',')
129 # The first three characters of month_dates are not empty
130 month = month_dates[:3]
131 if any(char == ' ' for char in month):
134 # But the fourth character is empty ("February" is illegal)
135 if month_dates[3] != ' ':
138 # year contains *only* numbers
148 def check_humantime(time):
150 'humantime' is a human-readable start and end time for the
151 workshop, such as '09:00 - 16:00'.
154 return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', '')))
157 def check_date(this_date):
159 'startdate' and 'enddate' are machine-readable start and end dates
160 for the workshop, and must be in YYYY-MM-DD format, e.g.,
164 # YAML automatically loads valid dates as datetime.date.
165 return isinstance(this_date, date)
169 def check_latitude_longitude(latlng):
171 'latlng' must be a valid latitude and longitude represented as two
172 floating-point numbers separated by a comma.
176 lat, lng = latlng.split(',')
179 return (-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0)
184 def check_instructors(instructors):
186 'instructor' must be a non-empty comma-separated list of quoted
187 names, e.g. ['First name', 'Second name', ...']. Do not use 'TBD'
188 or other placeholders.
191 # YAML automatically loads list-like strings as lists.
192 return isinstance(instructors, list) and len(instructors) > 0
195 def check_helpers(helpers):
197 'helper' must be a comma-separated list of quoted names,
198 e.g. ['First name', 'Second name', ...']. The list may be empty.
199 Do not use 'TBD' or other placeholders.
202 # YAML automatically loads list-like strings as lists.
203 return isinstance(helpers, list) and len(helpers) >= 0
207 def check_email(email):
209 'contact' must be a valid email address consisting of characters,
210 an '@', and more characters. It should not be the default contact
211 email address 'admin@software-carpentry.org'.
214 return bool(re.match(EMAIL_PATTERN, email)) and \
215 (email != DEFAULT_CONTACT_EMAIL)
218 def check_eventbrite(eventbrite):
220 'eventbrite' (the Eventbrite registration key) must be 9 or more
221 digits. It may appear as an integer or as a string.
224 if isinstance(eventbrite, int):
227 return bool(re.match(EVENTBRITE_PATTERN, eventbrite))
231 def check_etherpad(etherpad):
233 'etherpad' must be a valid URL.
236 return bool(re.match(URL_PATTERN, etherpad))
240 def check_pass(value):
242 This test always passes (it is used for 'checking' things like the
243 workshop address, for which no sensible validation is feasible).
250 'layout': (True, check_layout, 'layout isn\'t "workshop"'),
252 'carpentry': (True, check_carpentry, 'carpentry isn\'t in ' +
253 ', '.join(CARPENTRIES)),
255 'country': (True, check_country,
256 'country invalid: must use lowercase two-letter ISO code ' +
257 'from ' + ', '.join(ISO_COUNTRY)),
259 'language': (False, check_language,
260 'language invalid: must use lowercase two-letter ISO code' +
261 ' from ' + ', '.join(ISO_LANGUAGE)),
263 'humandate': (True, check_humandate,
264 'humandate invalid. Please use three-letter months like ' +
265 '"Jan" and four-letter years like "2025"'),
267 'humantime': (True, check_humantime,
268 'humantime doesn\'t include numbers'),
270 'startdate': (True, check_date,
271 'startdate invalid. Must be of format year-month-day, ' +
274 'enddate': (False, check_date,
275 'enddate invalid. Must be of format year-month-day, i.e.,' +
278 'latlng': (True, check_latitude_longitude,
279 'latlng invalid. Check that it is two floating point ' +
280 'numbers, separated by a comma'),
282 'instructor': (True, check_instructors,
283 'instructor list isn\'t a valid list of format ' +
284 '["First instructor", "Second instructor",..]'),
286 'helper': (True, check_helpers,
287 'helper list isn\'t a valid list of format ' +
288 '["First helper", "Second helper",..]'),
290 'contact': (True, check_email,
291 'contact email invalid or still set to ' +
292 '"{0}".'.format(DEFAULT_CONTACT_EMAIL)),
294 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'),
296 'etherpad': (False, check_etherpad, 'Etherpad URL appears invalid'),
298 'venue': (False, check_pass, 'venue name not specified'),
300 'address': (False, check_pass, 'address not specified')
303 # REQUIRED is all required categories.
304 REQUIRED = set([k for k in HANDLERS if HANDLERS[k][0]])
306 # OPTIONAL is all optional categories.
307 OPTIONAL = set([k for k in HANDLERS if not HANDLERS[k][0]])
310 def check_blank_lines(reporter, raw):
312 Blank lines are not allowed in category headers.
315 lines = [(i, x) for (i, x) in enumerate(raw.strip().split('\n')) if not x.strip()]
316 reporter.check(not lines,
318 'Blank line(s) in header: {0}',
319 ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines]))
322 def check_categories(reporter, left, right, msg):
324 Report differences (if any) between two sets of categories.
328 reporter.check(len(diff) == 0,
330 '{0}: offending entries {1}',
331 msg, sorted(list(diff)))
334 def check_file(reporter, path, data):
336 Get header from file, call all other functions, and check file for
340 # Get metadata as text and as YAML.
341 raw, header, body = split_metadata(path, data)
343 # Do we have any blank lines in the header?
344 check_blank_lines(reporter, raw)
346 # Look through all header entries. If the category is in the input
347 # file and is either required or we have actual data (as opposed to
348 # a commented-out entry), we check it. If it *isn't* in the header
349 # but is required, report an error.
350 for category in HANDLERS:
351 required, handler, message = HANDLERS[category]
352 if category in header:
353 if required or header[category]:
354 reporter.check(handler(header[category]),
356 '{0}\n actual value "{1}"',
357 message, header[category])
360 'Missing mandatory key "{0}"',
363 # Check whether we have missing or too many categories
364 seen_categories = set(header.keys())
365 check_categories(reporter, REQUIRED, seen_categories,
366 'Missing categories')
367 check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL),
368 'Superfluous categories')
371 def check_config(reporter, filename):
373 Check YAML configuration file.
376 with open(filename, 'r') as reader:
377 config = yaml.load(reader)
379 reporter.check(config['kind'] == 'workshop',
381 'Not configured as a workshop: found "{0}" instead',
386 '''Run as the main program.'''
388 if len(sys.argv) != 2:
389 print(USAGE, file=sys.stderr)
392 root_dir = sys.argv[1]
393 index_file = os.path.join(root_dir, 'index.html')
394 config_file = os.path.join(root_dir, '_config.yml')
396 reporter = Reporter()
397 check_config(reporter, config_file)
398 with open(index_file) as reader:
400 check_file(reporter, index_file, data)
404 if __name__ == '__main__':