api.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. """API module"""
  2. import re
  3. from datetime import datetime, timedelta
  4. import requests
  5. from bs4 import BeautifulSoup
  6. from app import BASE_URL, HEADERS
  7. def get_regions(state_id):
  8. """Download list of regions"""
  9. regions = []
  10. not_reached_date = True
  11. page = 0
  12. while not_reached_date:
  13. tmp_regions = download_regions(state_id, page)
  14. if not tmp_regions:
  15. not_reached_date = False
  16. break
  17. # tmp_regions = read_regions()
  18. if len(tmp_regions) < 25:
  19. not_reached_date = False
  20. regions += tmp_regions
  21. page += 1
  22. return regions
  23. def download_regions(state_id, page):
  24. """Download the region"""
  25. response = requests.get(
  26. '{}listed/state/{}/{}'.format(BASE_URL, state_id, page*25),
  27. headers=HEADERS
  28. )
  29. return parse_regions(response.text)
  30. def read_regions():
  31. """Read from file"""
  32. with open('regions.html') as file:
  33. return parse_regions(file)
  34. def parse_regions(html):
  35. """Parse html return regions"""
  36. soup = BeautifulSoup(html, 'html.parser')
  37. regions_tree = soup.find_all(class_='list_link')
  38. regions = []
  39. for region_tree in regions_tree:
  40. columns = region_tree.find_all('td')
  41. regions.append({
  42. 'id': int(region_tree['user']),
  43. 'name': columns[1].string,
  44. })
  45. return regions