| 123456789101112131415161718192021222324252627282930313233343536373839 |
- """
- Parse html pages
- """
- from bs4 import BeautifulSoup
- REGIONS = {
- 'Northern Netherlands': None,
- 'Eastern Netherlands': None,
- 'Southern Netherlands': None,
- 'Western Netherlands': None,
- 'Amsterdam': None,
- 'Luxembourg': None,
- }
- def read():
- """Read data from HTML"""
- with open("index.html") as file:
- soup = BeautifulSoup(file, 'html.parser')
- return soup.find_all(class_='list_link')
- def parse_regios(regios_tree):
- """Parse raw data"""
- for regio_tree in regios_tree:
- name_tag = regio_tree.find(class_='list_name')
- name = name_tag.string
- index_tag = regio_tree.find(class_='list_level')
- index = index_tag.string
- buildings = index_tag['title'].replace('Hospital: ', '')
- print("%40s %2s %4s" % (name, index, buildings))
- if __name__ == '__main__':
- raw_data = read()
- data = parse_regios(raw_data)
|