api.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. """API module"""
  2. import re
  3. from datetime import datetime, date, timedelta
  4. import requests
  5. from bs4 import BeautifulSoup
  6. from app import BASE_URL, HEADERS
  7. TYPES = {
  8. 'yellow': 1,
  9. 'oil': 2,
  10. 'ore': 5,
  11. 'uranium': 11,
  12. 'diamond': 15,
  13. }
  14. def get_factories(region_id):
  15. """Get factories from state"""
  16. # return read_factories()
  17. return download_factories(region_id)
  18. def read_factories():
  19. """Read factories file"""
  20. with open('factories_4001.html') as file:
  21. factories, more = parse_factories(file, 4001)
  22. return factories
  23. def download_factories(region_id):
  24. """Download the factories"""
  25. factories = []
  26. more = True
  27. page = 0
  28. while more:
  29. response = requests.get(
  30. '{}factory/search/{}/0/0/{}'.format(BASE_URL, region_id, page*25),
  31. headers=HEADERS
  32. )
  33. tmp_factories, more = parse_factories(response.text, region_id)
  34. factories = factories + tmp_factories
  35. page += 1
  36. return factories
  37. def parse_factories(html, region_id):
  38. """Parse html return factories"""
  39. soup = BeautifulSoup(html, 'html.parser')
  40. factories_tree = soup.find_all(class_='list_link')
  41. factories = []
  42. for factory_tree in factories_tree:
  43. columns = factory_tree.find_all('td')
  44. factories.append({
  45. 'region_id': region_id,
  46. 'id': int(factory_tree['user']),
  47. 'name': columns[1].contents[0].strip(),
  48. 'level': int(columns[2].string),
  49. 'workers': int(re.sub(r'\/[0-9]*$', '', columns[3].string)),
  50. 'wage': int(columns[4].string.replace('%', '')),
  51. 'experience': int(columns[5].string),
  52. })
  53. return factories, bool(len(factories_tree) >= 25)
  54. def parse_date(date_string):
  55. """Parse date to object"""
  56. if 'Today' in date_string:
  57. return date.today()
  58. if 'Yesterday' in date_string:
  59. return date.today() - timedelta(1)
  60. return datetime.strptime(date_string, '%d %B %Y').date()