|
|
@@ -0,0 +1,39 @@
|
|
|
+
|
|
|
+"""
|
|
|
+Parse html pages
|
|
|
+"""
|
|
|
+
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+
|
|
|
+
|
|
|
+REGIONS = {
|
|
|
+ 'Northern Netherlands': None,
|
|
|
+ 'Eastern Netherlands': None,
|
|
|
+ 'Southern Netherlands': None,
|
|
|
+ 'Western Netherlands': None,
|
|
|
+ 'Amsterdam': None,
|
|
|
+ 'Luxembourg': None,
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+def read():
|
|
|
+ """Read data from HTML"""
|
|
|
+ with open("index.html") as file:
|
|
|
+ soup = BeautifulSoup(file, 'html.parser')
|
|
|
+ return soup.find_all(class_='list_link')
|
|
|
+
|
|
|
+
|
|
|
+def parse_regios(regios_tree):
|
|
|
+ """Parse raw data"""
|
|
|
+ for regio_tree in regios_tree:
|
|
|
+ name_tag = regio_tree.find(class_='list_name')
|
|
|
+ name = name_tag.string
|
|
|
+ index_tag = regio_tree.find(class_='list_level')
|
|
|
+ index = index_tag.string
|
|
|
+ buildings = index_tag['title'].replace('Hospital: ', '')
|
|
|
+ print("%40s %2s %4s" % (name, index, buildings))
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ raw_data = read()
|
|
|
+ data = parse_regios(raw_data)
|