diff --git a/app.py b/app.py
index 6f0e41a..4a90e70 100644
--- a/app.py
+++ b/app.py
@@ -1,12 +1,94 @@
-from flask import Flask, Response
-from parsers.nvidia import *
+from flask import Flask, Response, abort
+from parsers.nvidia import NvidiaParser
+import os, time, threading
+
+class ParserData:
+ parser = None
+ rss = None
+ time = None
+ lock = threading.Lock()
+
+ def __init__(self, parser):
+ self.parser = parser
app = Flask(__name__)
-@app.route("/")
-def index():
- return "
Hello, World!
"
+parsers = [ParserData(NvidiaParser())]
+
+def getCachePath(parser):
+ path = './_cache/' + parser.__class__.__name__
+ return path
+
+def checkParserCache(parser):
+ path = getCachePath(parser.parser)
+
+ try:
+ os.path.getmtime(path)
+ with open(path, 'r') as f:
+ parser.rss = f.read()
+ except:
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ open(path, 'w').close()
+ return True
+
+ filetime = os.path.getmtime(path)
+ currtime = time.time()
+ parser.time = filetime
+
+ return (currtime - filetime) > parser.parser.CACHE_TIMEOUT
+
+def updateParserAndCache(parser):
+ rss = parser.parser.getRss()
+
+ with parser.lock:
+ parser.rss = rss
+ parser.time = time.time()
+
+ with open(getCachePath(parser.parser), 'w') as f:
+ f.write(parser.rss.decode('utf-8'))
+
+def runParserWorker(parser):
+ if checkParserCache(parser):
+ updateParserAndCache(parser)
+
+ while True:
+ nextmark = parser.time + parser.parser.CACHE_TIMEOUT
+ sleepfor = nextmark - time.time()
+ if sleepfor > 0:
+ time.sleep(nextmark - time.time())
+ updateParserAndCache(parser)
+
+def runParserWorkers():
+ for parser in parsers:
+ threading.Thread(target=runParserWorker, args=(parser,)).start()
+
+indexPage = """
+
+
+RSS index page
+
+
+RSS index page
+Custom generators of RSS feed from different blogs
+
+"""
+
+for parser in parsers:
+ indexPage += f'{parser.parser.NAME}'
+indexPage += ''
+indexPage += ''
+
+@app.route('/')
+def index():
+ return indexPage
+
+runParserWorkers()
+
+for parser in parsers:
+ @app.route(parser.parser.URL)
+ def query():
+ with parser.lock:
+ if parser.rss == None:
+ abort(404)
+ return Response(parser.rss, mimetype='text/xml')
-@app.route("/nvidia")
-def rss_query():
- return Response(parseNvidia(), mimetype='text/xml')
diff --git a/parsers/nvidia.py b/parsers/nvidia.py
index 66d8131..97b1359 100644
--- a/parsers/nvidia.py
+++ b/parsers/nvidia.py
@@ -1,27 +1,52 @@
from pyquery import PyQuery as pq
from feedgen.feed import FeedGenerator
+from collections import namedtuple
+import multiprocessing
+from datetime import datetime
-def parseNvidia():
- root_url = 'https://research.nvidia.com'
- d = pq(root_url +'/publications')
+Entry = namedtuple('Entry', 'url fe')
- fg = FeedGenerator()
- fg.id(root_url)
- fg.title('NVidia Research')
- fg.link(href=root_url, rel='alternate')
- fg.logo(root_url + '/favicon.ico')
- fg.description('NVidia Research papers')
+class NvidiaParser:
+ NAME = 'NVidia Research'
+ URL = '/nvidia'
+ CACHE_TIMEOUT = 3600
- for elem in d('.views-field-title').items():
- link = elem.find('a')
- url = root_url + link.attr.href
- title = link.text()
+ def parseNvidiaDate(entry):
+ dom = pq(entry.url)
+ print(entry.url)
- fe = fg.add_entry()
- fe.id(url)
- fe.title(title)
- fe.link(href=url)
+ time = dom('.field--name-field-publication-date').find('time').attr.datetime
+ time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%S%z')
+ entry.fe.pubDate(time)
- return fg.rss_str()
+ def getRss(self):
+ root_url = 'https://research.nvidia.com'
+ d = pq(root_url +'/publications')
+ fg = FeedGenerator()
+ fg.id(root_url)
+ fg.title('NVidia Research')
+ fg.link(href=root_url, rel='alternate')
+ fg.logo(root_url + '/themes/custom/nvidia/favicon.ico')
+ fg.description('NVidia Research papers')
+
+ entries = []
+ print('RSS GOT')
+
+ for elem in d('.views-field-title').items():
+ link = elem.find('a')
+ url = root_url + link.attr.href
+ title = link.text()
+
+ fe = fg.add_entry()
+ fe.id(url)
+ fe.title(title)
+ fe.link(href=url)
+
+ entries.append(Entry(url, fe))
+
+ with multiprocessing.Pool(8) as p:
+ p.map(NvidiaParser.parseNvidiaDate, entries)
+
+ return fg.rss_str()