|
|
@@ -1,39 +0,0 @@
|
|
|
-
|
|
|
-"""
|
|
|
-Parse html pages
|
|
|
-"""
|
|
|
-
|
|
|
-from bs4 import BeautifulSoup
|
|
|
-
|
|
|
-
|
|
|
-REGIONS = {
|
|
|
- 'Northern Netherlands': None,
|
|
|
- 'Eastern Netherlands': None,
|
|
|
- 'Southern Netherlands': None,
|
|
|
- 'Western Netherlands': None,
|
|
|
- 'Amsterdam': None,
|
|
|
- 'Luxembourg': None,
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-def read():
|
|
|
- """Read data from HTML"""
|
|
|
- with open("index.html") as file:
|
|
|
- soup = BeautifulSoup(file, 'html.parser')
|
|
|
- return soup.find_all(class_='list_link')
|
|
|
-
|
|
|
-
|
|
|
-def parse_regios(regios_tree):
|
|
|
- """Parse raw data"""
|
|
|
- for regio_tree in regios_tree:
|
|
|
- name_tag = regio_tree.find(class_='list_name')
|
|
|
- name = name_tag.string
|
|
|
- index_tag = regio_tree.find(class_='list_level')
|
|
|
- index = index_tag.string
|
|
|
- buildings = index_tag['title'].replace('Hospital: ', '')
|
|
|
- print("%40s %2s %4s" % (name, index, buildings))
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == '__main__':
|
|
|
- raw_data = read()
|
|
|
- data = parse_regios(raw_data)
|