from html.parser import HTMLParserclass my_html_parser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.bprint = False self.imgs = [] def handle_starttag(self, tag, attrs): # print('start:<%s>' % tag) self.bprint = False if tag is not 'a': img = '' for (var, value) in attrs: if var == 'href': img = value if var == 'class' and value == 'view_img_link': # print('%s="%s"' %(var, value)) self.bprint = True if str(img).__len__() > 0 and self.bprint is True: self.imgs.append(img) def handle_startendtag(self, tag, attrs): pass # print('start_end:<%s>' % tag) def handle_endtag(self, tag): pass # print('end:<%s>' % tag) # def handle_data(self, data): # if self.bprint is True: # print('data:<%s>' % data) # def handle_comment(self, data): # print('comment:<%s>' % data) # # def handle_entityref(self, name): # print('entityref:<&%s>' % name) # # def handle_charref(self, name): # print('charref:<&#%s>' % name) def print_img(self): for img in self.imgs: print(img)if __name__ == '__main__': parser = my_html_parser() parser.feed('''Some html HTML tutorial...
END ''')