Handling anchors by ignoring them
authorGreg Wilson <gvwilson@third-bit.com>
Fri, 8 Jul 2016 11:07:18 +0000 (07:07 -0400)
committerGreg Wilson <gvwilson@third-bit.com>
Fri, 8 Jul 2016 11:07:18 +0000 (07:07 -0400)
bin/extract_figures.py

index 519912285a14d62fad8a354470a78cc858f9a723..7eb55f0c405ab7dfe5a80eeb24257368c4375591 100755 (executable)
@@ -69,7 +69,7 @@ def find_image_links(doc, result):
 
     if ((doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr'])) \
        or \
-       ((doc['type'] == 'html_element') and (doc['value'] == 'a')):
+       ((doc['type'] == 'html_element') and (doc['value'] == 'a') and ('href' in doc['attr'])):
         path = doc['attr']['href']
         if os.path.splitext(path)[1].lower() in IMAGE_FILE_SUFFIX:
             result.append({'alt':'', 'src': doc['attr']['href']})