3 '''Check that a workshop's index.html metadata is valid. See the
4 docstrings on the checking functions for a summary of the checks.
12 from collections import Counter
17 # basic logging configuration
18 logger = logging.getLogger(__name__)
19 verbosity = logging.INFO # severity of at least INFO will emerge
20 logger.setLevel(verbosity)
22 # create console handler and set level to debug
23 console_handler = logging.StreamHandler()
24 console_handler.setLevel(verbosity)
26 formatter = logging.Formatter('%(levelname)s: %(message)s')
27 console_handler.setFormatter(formatter)
28 logger.addHandler(console_handler)
31 # TODO: these regexp patterns need comments inside
32 EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
33 HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
34 EVENTBRITE_PATTERN = r'\d{9,10}'
35 URL_PATTERN = r'https?://.+'
37 CARPENTRIES = ("dc", "swc")
38 DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org'
40 USAGE = 'Usage: "check-workshop path/to/index.html"\n'
42 # Country and language codes. Note that codes mean different things: 'ar'
43 # is 'Arabic' as a language but 'Argentina' as a country.
46 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
47 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
48 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
49 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
50 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
51 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
52 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
53 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
54 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
55 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
56 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
57 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
58 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
59 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
60 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
61 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
62 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
63 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
64 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
65 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
66 'ye', 'yt', 'za', 'zm', 'zw'
70 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
71 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
72 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
73 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
74 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
75 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
76 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
77 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
78 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
79 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
80 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
81 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
82 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
83 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
84 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
85 'yo', 'za', 'zh', 'zu'
89 def add_error(msg, errors):
90 """Add error to the list of errors."""
94 def add_suberror(msg, errors):
95 """Add sub error, ie. error indented by 1 level ("\t"), to the list of errors."""
96 errors.append("\t{0}".format(msg))
99 def look_for_fixme(func):
100 '''Decorator to fail test if text argument starts with "FIXME".'''
102 if (arg is not None) and \
103 isinstance(arg, str) and \
104 arg.lstrip().startswith('FIXME'):
111 def check_layout(layout):
112 '''"layout" in YAML header must be "workshop".'''
114 return layout == 'workshop'
118 def check_carpentry(layout):
119 '''"carpentry" in YAML header must be "dc" or "swc".'''
121 return layout in CARPENTRIES
125 def check_country(country):
126 '''"country" must be a lowercase ISO-3166 two-letter code.'''
128 return country in ISO_COUNTRY
132 def check_language(language):
133 '''"language" must be a lowercase ISO-639 two-letter code.'''
135 return language in ISO_LANGUAGE
139 def check_humandate(date):
140 '''"humandate" must be a human-readable date with a 3-letter month and
141 4-digit year. Examples include "Feb 18-20, 2025" and "Feb 18 and
142 20, 2025". It may be in languages other than English, but the
143 month name should be kept short to aid formatting of the main
144 Software Carpentry web site.'''
149 month_dates, year = date.split(",")
151 # The first three characters of month_dates are not empty
152 month = month_dates[:3]
153 if any(char == " " for char in month):
156 # But the fourth character is empty ("February" is illegal)
157 if month_dates[3] != " ":
160 # year contains *only* numbers
170 def check_humantime(time):
171 '''"humantime" is a human-readable start and end time for the workshop,
172 such as "09:00 - 16:00".'''
174 return bool(re.match(HUMANTIME_PATTERN, time.replace(" ", "")))
177 def check_date(this_date):
178 '''"startdate" and "enddate" are machine-readable start and end dates for
179 the workshop, and must be in YYYY-MM-DD format, e.g., "2015-07-01".'''
181 from datetime import date
182 # yaml automatically loads valid dates as datetime.date
183 return isinstance(this_date, date)
187 def check_latitude_longitude(latlng):
188 '''"latlng" must be a valid latitude and longitude represented as two
189 floating-point numbers separated by a comma.'''
192 lat, lng = latlng.split(',')
197 return (-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0)
200 def check_instructors(instructors):
201 '''"instructor" must be a non-empty comma-separated list of quoted names,
202 e.g. ['First name', 'Second name', ...']. Do not use "TBD" or other
205 # yaml automatically loads list-like strings as lists
206 return isinstance(instructors, list) and len(instructors) > 0
209 def check_helpers(helpers):
210 '''"helper" must be a comma-separated list of quoted names,
211 e.g. ['First name', 'Second name', ...']. The list may be empty. Do
212 not use "TBD" or other placeholders.'''
214 # yaml automatically loads list-like strings as lists
215 return isinstance(helpers, list) and len(helpers) >= 0
219 def check_email(email):
220 '''"contact" must be a valid email address consisting of characters, a
221 @, and more characters. It should not be the default contact
222 email address "admin@software-carpentry.org".'''
224 return bool(re.match(EMAIL_PATTERN, email)) and \
225 (email != DEFAULT_CONTACT_EMAIL)
228 def check_eventbrite(eventbrite):
229 '''"eventbrite" (the Eventbrite registration key) must be 9 or more digits.'''
231 if isinstance(eventbrite, int):
234 return bool(re.match(EVENTBRITE_PATTERN, eventbrite))
238 def check_etherpad(etherpad):
239 '''"etherpad" must be a valid URL.'''
241 return bool(re.match(URL_PATTERN, etherpad))
245 def check_pass(value):
246 '''This test always passes (it is used for "checking" things like
247 addresses, for which no sensible validation is feasible).'''
253 'layout': (True, check_layout, 'layout isn\'t "workshop"'),
255 'carpentry': (True, check_carpentry, 'carpentry isn\'t in ' +
256 ', '.join(CARPENTRIES)),
258 'country': (True, check_country,
259 'country invalid: must use lowercase two-letter ISO code ' +
260 'from ' + ', '.join(ISO_COUNTRY)),
262 'language': (False, check_language,
263 'language invalid: must use lowercase two-letter ISO code' +
264 ' from ' + ', '.join(ISO_LANGUAGE)),
266 'humandate': (True, check_humandate,
267 'humandate invalid. Please use three-letter months like ' +
268 '"Jan" and four-letter years like "2025".'),
270 'humantime': (True, check_humantime,
271 'humantime doesn\'t include numbers'),
273 'startdate': (True, check_date,
274 'startdate invalid. Must be of format year-month-day, ' +
275 'i.e., 2014-01-31.'),
277 'enddate': (False, check_date,
278 'enddate invalid. Must be of format year-month-day, i.e.,' +
281 'latlng': (True, check_latitude_longitude,
282 'latlng invalid. Check that it is two floating point ' +
283 'numbers, separated by a comma.'),
285 'instructor': (True, check_instructors,
286 'instructor list isn\'t a valid list of format ' +
287 '["First instructor", "Second instructor",..].'),
289 'helper': (True, check_helpers,
290 'helper list isn\'t a valid list of format ' +
291 '["First helper", "Second helper",..].'),
293 'contact': (True, check_email,
294 'contact email invalid or still set to ' +
295 '"{0}".'.format(DEFAULT_CONTACT_EMAIL)),
297 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid.'),
299 'etherpad': (False, check_etherpad, 'Etherpad URL appears invalid.'),
301 'venue': (False, check_pass, 'venue name not specified'),
303 'address': (False, check_pass, 'address not specified')
306 # REQUIRED is all required categories.
307 REQUIRED = set([k for k in HANDLERS if HANDLERS[k][0]])
309 # OPTIONAL is all optional categories.
310 OPTIONAL = set([k for k in HANDLERS if not HANDLERS[k][0]])
313 def check_validity(data, function, errors, error_msg):
314 '''Wrapper-function around the various check-functions.'''
315 valid = function(data)
317 add_error(error_msg, errors)
318 add_suberror('Offending entry is: "{0}"'.format(data), errors)
322 def check_blank_lines(raw_data, errors, error_msg):
323 '''Blank lines are not allowed in category headers.'''
324 lines = [x.strip() for x in raw_data.split('\n')]
326 add_error(error_msg, errors)
327 add_suberror('{0} blank lines found in header'.format(lines.count('')), errors)
332 def check_categories(left, right, errors, error_msg):
333 '''Report set difference of categories.'''
334 result = left - right
336 add_error(error_msg, errors)
337 add_suberror('Offending entries: {0}'.format(result), errors)
342 def get_header(text):
343 '''Extract YAML header from raw data, returning (None, None) if no
344 valid header found and (raw, parsed) if header found.'''
346 # YAML header must be right at the start of the file.
347 if not text.startswith('---'):
350 # YAML header must start and end with '---'
351 pieces = text.split('---')
355 # Return raw text and YAML-ized form.
356 raw = pieces[1].strip()
357 return raw, yaml.load(raw)
360 def check_file(filename, data, errors):
361 '''Get header from file, call all other functions and check file
362 for validity. Return list of errors (empty when no errors).'''
364 raw, header = get_header(data)
366 msg = ('Cannot find YAML header in given file "{0}".'.format(filename))
367 add_error(msg, errors)
370 # Do we have any blank lines in the header?
371 is_valid = check_blank_lines(raw, errors,
372 'There are blank lines in the header')
374 # Look through all header entries. If the category is in the input
375 # file and is either required or we have actual data (as opposed to
376 # a commented-out entry), we check it. If it *isn't* in the header
377 # but is required, report an error.
378 for category in HANDLERS:
379 required, handler_function, error_message = HANDLERS[category]
380 if category in header:
381 if required or header[category]:
382 is_valid &= check_validity(header[category],
383 handler_function, errors,
386 msg = 'index file is missing mandatory key "{0}"'.format(category)
387 add_error(msg, errors)
390 # Check whether we have missing or too many categories
391 seen_categories = set(header.keys())
393 is_valid &= check_categories(REQUIRED, seen_categories, errors,
394 'There are missing categories')
396 is_valid &= check_categories(seen_categories, REQUIRED.union(OPTIONAL),
397 errors, 'There are superfluous categories')
400 def check_config(filename, errors):
401 '''Check YAML configuration file.'''
403 with open(filename, 'r') as reader:
404 config = yaml.load(reader)
406 if config['kind'] != 'workshop':
407 msg = 'Not configured as a workshop: found "{0}" instead'.format(config['kind'])
408 add_error(msg, errors)
412 '''Run as the main program.'''
414 if len(sys.argv) != 2:
415 print(USAGE, file=sys.stderr)
418 root_dir = sys.argv[1]
419 index_file = os.path.join(root_dir, 'index.html')
420 config_file = os.path.join(root_dir, '_config.yml')
421 logger.info('Testing "{0}" and "{1}"'.format(index_file, config_file))
424 check_config(config_file, errors)
425 with open(index_file) as reader:
427 check_file(index_file, data, errors)
434 logger.info('Everything seems to be in order')
438 if __name__ == '__main__':