Selaa lähdekoodia

Initial commit

JoostSijm 5 vuotta sitten
commit
426449c564
13 muutettua tiedostoa jossa 483 lisäystä ja 0 poistoa
  1. 5 0
      .gitignore
  2. 17 0
      Pipfile
  3. 155 0
      Pipfile.lock
  4. 54 0
      app/__init__.py
  5. 33 0
      app/__main__.py
  6. 52 0
      app/api.py
  7. 24 0
      app/app.py
  8. 71 0
      app/database.py
  9. 18 0
      app/job_storage.py
  10. 8 0
      app/jobs.py
  11. 40 0
      app/models.py
  12. 3 0
      example.env
  13. 3 0
      example.jobs.json

+ 5 - 0
.gitignore

@@ -0,0 +1,5 @@
+.venv/
+.env
+__pycache__
+*.log
+jobs.json

+ 17 - 0
Pipfile

@@ -0,0 +1,17 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+requests = "*"
+beautifulsoup4 = "*"
+sqlalchemy = "*"
+python-dotenv = "*"
+psycopg2-binary = "*"
+apscheduler = "*"
+
+[requires]
+python_version = "3"

+ 155 - 0
Pipfile.lock

@@ -0,0 +1,155 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "dfc866db4a7e79933532497f154755e037f8ef5947733a821745397d78893acf"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "apscheduler": {
+            "hashes": [
+                "sha256:3bb5229eed6fbbdafc13ce962712ae66e175aa214c69bed35a06bffcf0c5e244",
+                "sha256:e8b1ecdb4c7cb2818913f766d5898183c7cb8936680710a4d3a966e02262e526"
+            ],
+            "index": "pypi",
+            "version": "==3.6.3"
+        },
+        "beautifulsoup4": {
+            "hashes": [
+                "sha256:5279c36b4b2ec2cb4298d723791467e3000e5384a43ea0cdf5d45207c7e97169",
+                "sha256:6135db2ba678168c07950f9a16c4031822c6f4aec75a65e0a97bc5ca09789931",
+                "sha256:dcdef580e18a76d54002088602eba453eec38ebbcafafeaabd8cab12b6155d57"
+            ],
+            "index": "pypi",
+            "version": "==4.8.1"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3",
+                "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f"
+            ],
+            "version": "==2019.11.28"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "version": "==3.0.4"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
+                "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
+            ],
+            "version": "==2.8"
+        },
+        "psycopg2-binary": {
+            "hashes": [
+                "sha256:040234f8a4a8dfd692662a8308d78f63f31a97e1c42d2480e5e6810c48966a29",
+                "sha256:086f7e89ec85a6704db51f68f0dcae432eff9300809723a6e8782c41c2f48e03",
+                "sha256:18ca813fdb17bc1db73fe61b196b05dd1ca2165b884dd5ec5568877cabf9b039",
+                "sha256:19dc39616850342a2a6db70559af55b22955f86667b5f652f40c0e99253d9881",
+                "sha256:2166e770cb98f02ed5ee2b0b569d40db26788e0bf2ec3ae1a0d864ea6f1d8309",
+                "sha256:3a2522b1d9178575acee4adf8fd9f979f9c0449b00b4164bb63c3475ea6528ed",
+                "sha256:3aa773580f85a28ffdf6f862e59cb5a3cc7ef6885121f2de3fca8d6ada4dbf3b",
+                "sha256:3b5deaa3ee7180585a296af33e14c9b18c218d148e735c7accf78130765a47e3",
+                "sha256:407af6d7e46593415f216c7f56ba087a9a42bd6dc2ecb86028760aa45b802bd7",
+                "sha256:4c3c09fb674401f630626310bcaf6cd6285daf0d5e4c26d6e55ca26a2734e39b",
+                "sha256:4c6717962247445b4f9e21c962ea61d2e884fc17df5ddf5e35863b016f8a1f03",
+                "sha256:50446fae5681fc99f87e505d4e77c9407e683ab60c555ec302f9ac9bffa61103",
+                "sha256:5057669b6a66aa9ca118a2a860159f0ee3acf837eda937bdd2a64f3431361a2d",
+                "sha256:5dd90c5438b4f935c9d01fcbad3620253da89d19c1f5fca9158646407ed7df35",
+                "sha256:659c815b5b8e2a55193ede2795c1e2349b8011497310bb936da7d4745652823b",
+                "sha256:69b13fdf12878b10dc6003acc8d0abf3ad93e79813fd5f3812497c1c9fb9be49",
+                "sha256:7a1cb80e35e1ccea3e11a48afe65d38744a0e0bde88795cc56a4d05b6e4f9d70",
+                "sha256:7e6e3c52e6732c219c07bd97fff6c088f8df4dae3b79752ee3a817e6f32e177e",
+                "sha256:7f42a8490c4fe854325504ce7a6e4796b207960dabb2cbafe3c3959cb00d1d7e",
+                "sha256:84156313f258eafff716b2961644a4483a9be44a5d43551d554844d15d4d224e",
+                "sha256:8578d6b8192e4c805e85f187bc530d0f52ba86c39172e61cd51f68fddd648103",
+                "sha256:890167d5091279a27e2505ff0e1fb273f8c48c41d35c5b92adbf4af80e6b2ed6",
+                "sha256:98e10634792ac0e9e7a92a76b4991b44c2325d3e7798270a808407355e7bb0a1",
+                "sha256:9aadff9032e967865f9778485571e93908d27dab21d0fdfdec0ca779bb6f8ad9",
+                "sha256:9f24f383a298a0c0f9b3113b982e21751a8ecde6615494a3f1470eb4a9d70e9e",
+                "sha256:a73021b44813b5c84eda4a3af5826dd72356a900bac9bd9dd1f0f81ee1c22c2f",
+                "sha256:afd96845e12638d2c44d213d4810a08f4dc4a563f9a98204b7428e567014b1cd",
+                "sha256:b73ddf033d8cd4cc9dfed6324b1ad2a89ba52c410ef6877998422fcb9c23e3a8",
+                "sha256:b8f490f5fad1767a1331df1259763b3bad7d7af12a75b950c2843ba319b2415f",
+                "sha256:dbc5cd56fff1a6152ca59445178652756f4e509f672e49ccdf3d79c1043113a4",
+                "sha256:eac8a3499754790187bb00574ab980df13e754777d346f85e0ff6df929bcd964",
+                "sha256:eaed1c65f461a959284649e37b5051224f4db6ebdc84e40b5e65f2986f101a08"
+            ],
+            "index": "pypi",
+            "version": "==2.8.4"
+        },
+        "python-dotenv": {
+            "hashes": [
+                "sha256:debd928b49dbc2bf68040566f55cdb3252458036464806f4094487244e2a4093",
+                "sha256:f157d71d5fec9d4bd5f51c82746b6344dffa680ee85217c123f4a0c8117c4544"
+            ],
+            "index": "pypi",
+            "version": "==0.10.3"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
+                "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
+            ],
+            "version": "==2019.3"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
+                "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
+            ],
+            "index": "pypi",
+            "version": "==2.22.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd",
+                "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66"
+            ],
+            "version": "==1.13.0"
+        },
+        "soupsieve": {
+            "hashes": [
+                "sha256:bdb0d917b03a1369ce964056fc195cfdff8819c40de04695a80bc813c3cfa1f5",
+                "sha256:e2c1c5dee4a1c36bcb790e0fabd5492d874b8ebd4617622c4f6a731701060dda"
+            ],
+            "version": "==1.9.5"
+        },
+        "sqlalchemy": {
+            "hashes": [
+                "sha256:bfb8f464a5000b567ac1d350b9090cf081180ec1ab4aa87e7bca12dab25320ec"
+            ],
+            "index": "pypi",
+            "version": "==1.3.12"
+        },
+        "tzlocal": {
+            "hashes": [
+                "sha256:11c9f16e0a633b4b60e1eede97d8a46340d042e67b670b290ca526576e039048",
+                "sha256:949b9dd5ba4be17190a80c0268167d7e6c92c62b30026cf9764caf3e308e5590"
+            ],
+            "version": "==2.0.0"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293",
+                "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745"
+            ],
+            "version": "==1.25.7"
+        }
+    },
+    "develop": {}
+}

+ 54 - 0
app/__init__.py

@@ -0,0 +1,54 @@
+"""STate Region Logger"""
+
+import os
+import logging
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from dotenv import load_dotenv
+from apscheduler.schedulers.background import BackgroundScheduler
+
+
+load_dotenv()
+
+# database
+ENGINE = create_engine(os.environ["DATABASE_URI"], client_encoding='utf8')
+SESSION = sessionmaker(bind=ENGINE)
+
+# scheduler
+SCHEDULER = BackgroundScheduler(
+    daemon=True,
+    job_defaults={'misfire_grace_time': 10*60},
+)
+SCHEDULER.start()
+
+# get logger
+LOGGER = logging.getLogger(__name__)
+LOGGER.setLevel(logging.INFO)
+SCHEDULER_LOGGER = logging.getLogger('apscheduler')
+SCHEDULER_LOGGER.setLevel(logging.DEBUG)
+
+# create file handler
+FILE_HANDLER = logging.FileHandler('output.log')
+FILE_HANDLER.setLevel(logging.DEBUG)
+
+# create console handler
+STREAM_HANDLER = logging.StreamHandler()
+STREAM_HANDLER.setLevel(logging.INFO)
+
+# create formatter and add it to the handlers
+FORMATTER = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+STREAM_HANDLER.setFormatter(FORMATTER)
+FILE_HANDLER.setFormatter(FORMATTER)
+
+# add the handlers to logger
+LOGGER.addHandler(STREAM_HANDLER)
+LOGGER.addHandler(FILE_HANDLER)
+SCHEDULER_LOGGER.addHandler(STREAM_HANDLER)
+SCHEDULER_LOGGER.addHandler(FILE_HANDLER)
+
+# api
+BASE_URL = os.environ["API_URL"]
+HEADERS = {
+    'Authorization': os.environ["AUTHORIZATION"]
+}

+ 33 - 0
app/__main__.py

@@ -0,0 +1,33 @@
+"""Main app"""
+
+import sys
+import time
+
+from app import SCHEDULER, LOGGER, jobs, job_storage
+
+
+if __name__ == '__main__':
+    # jobs
+    # jobs.update_regions(2981)
+    # sys.exit()
+
+    # Jobs
+    JOBS = job_storage.get_jobs()
+    for state_id in JOBS:
+        LOGGER.info('"%s" add update job', state_id,)
+        SCHEDULER.add_job(
+            jobs.update_regions,
+            'cron',
+            args=[state_id],
+            id='{}_update_regions'.format(state_id),
+            replace_existing=True,
+            hour='12'
+        )
+
+    try:
+        while True:
+            time.sleep(100)
+    except KeyboardInterrupt:
+        print('Exiting application')
+        SCHEDULER.shutdown()
+        sys.exit()

+ 52 - 0
app/api.py

@@ -0,0 +1,52 @@
+"""API module"""
+
+import re
+from datetime import datetime, timedelta
+
+import requests
+from bs4 import BeautifulSoup
+
+from app import BASE_URL, HEADERS
+
+def get_regions(state_id):
+    """Download list of regions"""
+    regions = []
+    not_reached_date = True
+    page = 0
+    while not_reached_date:
+        tmp_regions = download_regions(state_id, page)
+        if not tmp_regions:
+            not_reached_date = False
+            break
+        # tmp_regions = read_regions()
+        if len(tmp_regions) < 25:
+            not_reached_date = False
+        regions += tmp_regions
+        page += 1
+    return regions
+
+def download_regions(state_id, page):
+    """Download the region"""
+    response = requests.get(
+        '{}listed/state/{}/{}'.format(BASE_URL, state_id, page*25),
+        headers=HEADERS
+    )
+    return parse_regions(response.text)
+
+def read_regions():
+    """Read from file"""
+    with open('regions.html') as file:
+        return parse_regions(file)
+
+def parse_regions(html):
+    """Parse html return regions"""
+    soup = BeautifulSoup(html, 'html.parser')
+    regions_tree = soup.find_all(class_='list_link')
+    regions = []
+    for region_tree in regions_tree:
+        columns = region_tree.find_all('td')
+        regions.append({
+            'id': int(region_tree['user']),
+            'name': columns[1].string,
+        })
+    return regions

+ 24 - 0
app/app.py

@@ -0,0 +1,24 @@
+"""General functions module"""
+
+import random
+import math
+import re
+
+from app import LOGGER, database, api
+
+
+def update_regions(state_id):
+    """Update department regions"""
+    LOGGER.info('"%s": Run update regions', state_id)
+    current_regions = database.get_current_regions(state_id)
+    LOGGER.info(
+        '"%s": Currently has "%s" regions in database',
+        state_id, len(current_regions)
+    )
+    regions = api.get_regions(state_id)
+    LOGGER.info(
+        '"%s": Got "%s" regions from API',
+        state_id, len(regions)
+    )
+    database.save_regions(state_id, regions)
+    LOGGER.info('"%s": saved regions', state_id)

+ 71 - 0
app/database.py

@@ -0,0 +1,71 @@
+"""Database module"""
+
+from datetime import datetime, timedelta
+
+from app import SESSION
+from app.models import State, Region, StateRegion
+
+
+def get_current_regions(state_id):
+    """Get latest professor from database"""
+    session = SESSION()
+    current_regions = session.query(Region) \
+        .join(Region.state_regions) \
+        .filter(StateRegion.state_id == state_id) \
+        .filter(StateRegion.until_date_time == None) \
+        .all()
+    session.close()
+    return current_regions
+
+def save_regions(state_id, regions):
+    """Save residents to database"""
+    session = SESSION()
+    region_ids = []
+    state = session.query(State).get(state_id)
+    if state is None:
+        state = save_state(session, state_id)
+    for region_dict in regions:
+        region = session.query(Region).get(region_dict['id'])
+        if region is None:
+            region = save_region(session, region_dict)
+        region_ids.append(region.id)
+        state_region = session.query(StateRegion) \
+            .filter(StateRegion.state_id == state.id) \
+            .filter(StateRegion.region_id == region.id) \
+            .filter(StateRegion.until_date_time == None) \
+            .first()
+        if not state_region:
+            state_region = StateRegion()
+            state_region.state_id = state.id
+            state_region.region_id = region.id
+            state_region.from_date_time = datetime.now().replace(second=0, minute=0)
+            session.add(state_region)
+            session.commit()
+
+    saved_state_regions = session.query(StateRegion) \
+        .filter(StateRegion.state_id == state.id) \
+        .filter(StateRegion.until_date_time == None) \
+        .all()
+    for saved_state_region in saved_state_regions:
+        if saved_state_region.region_id not in region_ids:
+            saved_state_region.until_date_time = datetime.now().replace(second=0, minute=0)
+    session.commit()
+    session.close()
+
+def save_state(session, state_id):
+    """Save state to database"""
+    state = State()
+    state.id = state_id
+    state.name = 'UNKNOWN'
+    session.add(state)
+    session.commit()
+    return state
+
+def save_region(session, region_dict):
+    """Save region to database"""
+    region = Region()
+    region.id = region_dict['id']
+    region.name = region_dict['name']
+    session.add(region)
+    session.commit()
+    return region

+ 18 - 0
app/job_storage.py

@@ -0,0 +1,18 @@
+"""store and read jobs"""
+
+import json
+
+from app import LOGGER
+
+
+def get_jobs():
+    """Read jobs"""
+    LOGGER.info('Read stored jobs')
+    try:
+        with open('jobs.json', 'r') as jobs_file:
+            jobs = json.load(jobs_file)
+            LOGGER.info('found "%s" job(s) in job storage', len(jobs))
+            return jobs
+    except FileNotFoundError:
+        LOGGER.error('job storage file "jobs.json" not found')
+    return []

+ 8 - 0
app/jobs.py

@@ -0,0 +1,8 @@
+"""Jobs for scheduler module"""
+
+from app import app
+
+
+def update_regions(state_id):
+    """Update regions"""
+    app.update_regions(state_id)

+ 40 - 0
app/models.py

@@ -0,0 +1,40 @@
+"""Database models"""
+
+from sqlalchemy import Column, ForeignKey, Integer, String, \
+    SmallInteger, DateTime, BigInteger, Date
+from sqlalchemy.orm import relationship, backref
+from sqlalchemy.ext.declarative import declarative_base
+
+
+Base = declarative_base()
+
+class Region(Base):
+    """Model for region"""
+    __tablename__ = 'region'
+    id = Column(Integer, primary_key=True)
+    name = Column(String)
+
+
+class State(Base):
+    """Model for state"""
+    __tablename__ = 'state'
+    id = Column(Integer, primary_key=True)
+    name = Column(String)
+
+
+class StateRegion(Base):
+    """Model for state region"""
+    __tablename__ = 'state_region'
+    state_id = Column(Integer, ForeignKey('state.id'), primary_key=True)
+    region_id = Column(Integer, ForeignKey('region.id'), primary_key=True)
+    from_date_time = Column(DateTime, primary_key=True)
+    until_date_time = Column(DateTime)
+
+    region = relationship(
+        'Region',
+        backref=backref('state_regions', lazy='dynamic')
+    )
+    state = relationship(
+        'State',
+        backref=backref('state_regions', lazy='dynamic')
+    )

+ 3 - 0
example.env

@@ -0,0 +1,3 @@
+AUTHORIZATION=PLACEHOLDER
+DATABASE_URI='postgresql://vboo@localhost/vboo'
+API_URL='http://localhost:5000/api/request/'

+ 3 - 0
example.jobs.json

@@ -0,0 +1,3 @@
+[
+    2788
+]