|
@@ -24,8 +24,8 @@ def get_factories(region_id):
|
|
|
|
|
|
def read_factories():
|
|
|
"""Read factories file"""
|
|
|
- with open('factories.html') as file:
|
|
|
- factories, more = parse_factories(file)
|
|
|
+ with open('factories_4001.html') as file:
|
|
|
+ factories, more = parse_factories(file, 4001)
|
|
|
return factories
|
|
|
|
|
|
def download_factories(region_id):
|
|
@@ -38,29 +38,24 @@ def download_factories(region_id):
|
|
|
'{}factory/search/{}/0/0/{}'.format(BASE_URL, region_id, page*25),
|
|
|
headers=HEADERS
|
|
|
)
|
|
|
- tmp_factories, more = parse_factories(response.text)
|
|
|
+ tmp_factories, more = parse_factories(response.text, region_id)
|
|
|
factories = factories + tmp_factories
|
|
|
page += 1
|
|
|
return factories
|
|
|
|
|
|
-def parse_factories(html):
|
|
|
+def parse_factories(html, region_id):
|
|
|
"""Parse html return factories"""
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
factories_tree = soup.find_all(class_='list_link')
|
|
|
factories = []
|
|
|
for factory_tree in factories_tree:
|
|
|
columns = factory_tree.find_all('td')
|
|
|
- if columns[1].contents[4].name == 'span':
|
|
|
- resource_type = TYPES[columns[1].contents[4]['class'][0]]
|
|
|
- else:
|
|
|
- resource_type = None
|
|
|
factories.append({
|
|
|
- 'id': factory_tree['user'],
|
|
|
+ 'region_id': region_id,
|
|
|
+ 'id': int(factory_tree['user']),
|
|
|
'name': columns[1].contents[0].strip(),
|
|
|
- 'resource_type': resource_type,
|
|
|
- 'region_name': columns[1].contents[2],
|
|
|
- 'level': columns[2].string,
|
|
|
- 'workers': re.sub(r'\/[0-9]*$', '', columns[3].string),
|
|
|
+ 'level': int(columns[2].string),
|
|
|
+ 'workers': int(re.sub(r'\/[0-9]*$', '', columns[3].string)),
|
|
|
'wage': int(columns[4].string.replace('%', '')),
|
|
|
'experience': int(columns[5].string),
|
|
|
})
|