import os, sys
from bs4 import BeautifulSoup
# Import HTML text
filename = '/PATH/FILE-NAME.html'
html = "".join(open( filename ).readlines()).replace('\n',' ').decode("utf8")
soup = BeautifulSoup(html, 'html5lib')
# Parse subtitles, for each subtitle
for v in soup.findAll( 'HTML-NODE-ELEMENT-TO-FIND' ):
# Print the attribute
print v['ATTRIBUTE-NAME'].strip().encode("utf8")
No comments:
Post a Comment