Prechádzať zdrojové kódy

Add rating, comments, and and post date to article

JoostSijm 4 rokov pred
rodič
commit
7b9455415e

+ 15 - 0
src/rival_regions_wrapper/api_wrapper/article.py

@@ -1,9 +1,11 @@
 """Articl class"""
 
+from datetime import timedelta
 import unicodedata
 import re
 
 from bs4 import BeautifulSoup
+from dateutil import parser
 
 
 class Article(object):
@@ -38,6 +40,8 @@ class Article(object):
             'region_id': int(region['action'].replace('map/details/', '')),
             'content_text': news_content.text,
             'content_html': news_content.prettify(),
+            'rating': int(soup.select_one('#news_number').text),
+            'comments': int(soup.select_one('.news_comments_link').text)
         }
 
         if newspaper:
@@ -52,4 +56,15 @@ class Article(object):
             article_info['language'] = re.sub(r'\s|,', '', result[1].strip())
         except IndexError:
             pass
+
+        date_element = soup.select_one('.news_conent_title')
+        date_string = date_element.text.replace('✘', '').strip()
+        if 'Yesterday' in date_string:
+            time = re.search(r'\d\d:\d\d', date_string)
+            article_info['post_date'] = parser.parse(time.group(0)) - timedelta(days=1)
+        elif 'Today' in date_string:
+            time = re.search(r'\d\d:\d\d', date_string)
+            article_info['post_date'] = parser.parse(time.group(0))
+        else:
+            article_info['post_date'] = parser.parse(date_string)
         return article_info

+ 7 - 1
tests/test_rival_regions_wrapper.py

@@ -204,7 +204,7 @@ def article_keys():
     """Standard key fro article"""
     return ['article_id', 'article_title', 'newspaper_id', 'newspaper_name', \
         'author_name', 'author_id', 'region_name', 'region_id', 'content_text', 'content_html', \
-        'language']
+        'language', 'rating', 'comments', 'post_date']
 
 @pytest.mark.vcr()
 def test_article_info_one(api_wrapper, article_keys):
@@ -225,6 +225,9 @@ def test_article_info_one(api_wrapper, article_keys):
     assert isinstance(response['content_text'], str), "Content text should be a string"
     assert isinstance(response['content_html'], str), "Content html should be a string"
     assert isinstance(response['language'], str), "Language should be a string"
+    assert isinstance(response['rating'], int), "Rating should be an integer"
+    assert isinstance(response['comments'], int), "Comments should be an integer"
+    assert isinstance(response['post_date'], datetime), "Post date should be a datetime"
 
 @pytest.mark.vcr()
 def test_article_info_two(api_wrapper, article_keys):
@@ -245,3 +248,6 @@ def test_article_info_two(api_wrapper, article_keys):
     assert isinstance(response['content_text'], str), "Content text should be a string"
     assert isinstance(response['content_html'], str), "Content html should be a string"
     assert isinstance(response['language'], str), "Language should be a string"
+    assert isinstance(response['rating'], int), "Rating should be an integer"
+    assert isinstance(response['comments'], int), "Comments should be an integer"
+    assert isinstance(response['post_date'], datetime), "Post date should be a datetime"