2016-02-23

Python - Beautifulsoup, print a given attribute of all elements of a certain type

import os, sys
from bs4 import BeautifulSoup

# Import HTML text
filename = '/PATH/FILE-NAME.html'
html = "".join(open( filename ).readlines()).replace('\n',' ').decode("utf8")
soup = BeautifulSoup(html, 'html5lib')

# Parse subtitles, for each subtitle
for v in soup.findAll( 'HTML-NODE-ELEMENT-TO-FIND' ):

    # Print the attribute
    print v['ATTRIBUTE-NAME'].strip().encode("utf8")

No comments:

Post a Comment