""" Parse html pages """ from bs4 import BeautifulSoup REGIONS = { 'Northern Netherlands': None, 'Eastern Netherlands': None, 'Southern Netherlands': None, 'Western Netherlands': None, 'Amsterdam': None, 'Luxembourg': None, } def read(): """Read data from HTML""" with open("index.html") as file: soup = BeautifulSoup(file, 'html.parser') return soup.find_all(class_='list_link') def parse_regios(regios_tree): """Parse raw data""" for regio_tree in regios_tree: name_tag = regio_tree.find(class_='list_name') name = name_tag.string index_tag = regio_tree.find(class_='list_level') index = index_tag.string buildings = index_tag['title'].replace('Hospital: ', '') print("%40s %2s %4s" % (name, index, buildings)) if __name__ == '__main__': raw_data = read() data = parse_regios(raw_data)