caching, index page, threading
parent
804356a139
commit
23f06f73a2
98
app.py
98
app.py
|
|
@ -1,12 +1,94 @@
|
|||
from flask import Flask, Response
|
||||
from parsers.nvidia import *
|
||||
from flask import Flask, Response, abort
|
||||
from parsers.nvidia import NvidiaParser
|
||||
import os, time, threading
|
||||
|
||||
class ParserData:
|
||||
parser = None
|
||||
rss = None
|
||||
time = None
|
||||
lock = threading.Lock()
|
||||
|
||||
def __init__(self, parser):
|
||||
self.parser = parser
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return "<p>Hello, World!</p>"
|
||||
parsers = [ParserData(NvidiaParser())]
|
||||
|
||||
def getCachePath(parser):
|
||||
path = './_cache/' + parser.__class__.__name__
|
||||
return path
|
||||
|
||||
def checkParserCache(parser):
|
||||
path = getCachePath(parser.parser)
|
||||
|
||||
try:
|
||||
os.path.getmtime(path)
|
||||
with open(path, 'r') as f:
|
||||
parser.rss = f.read()
|
||||
except:
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
open(path, 'w').close()
|
||||
return True
|
||||
|
||||
filetime = os.path.getmtime(path)
|
||||
currtime = time.time()
|
||||
parser.time = filetime
|
||||
|
||||
return (currtime - filetime) > parser.parser.CACHE_TIMEOUT
|
||||
|
||||
def updateParserAndCache(parser):
|
||||
rss = parser.parser.getRss()
|
||||
|
||||
with parser.lock:
|
||||
parser.rss = rss
|
||||
parser.time = time.time()
|
||||
|
||||
with open(getCachePath(parser.parser), 'w') as f:
|
||||
f.write(parser.rss.decode('utf-8'))
|
||||
|
||||
def runParserWorker(parser):
|
||||
if checkParserCache(parser):
|
||||
updateParserAndCache(parser)
|
||||
|
||||
while True:
|
||||
nextmark = parser.time + parser.parser.CACHE_TIMEOUT
|
||||
sleepfor = nextmark - time.time()
|
||||
if sleepfor > 0:
|
||||
time.sleep(nextmark - time.time())
|
||||
updateParserAndCache(parser)
|
||||
|
||||
def runParserWorkers():
|
||||
for parser in parsers:
|
||||
threading.Thread(target=runParserWorker, args=(parser,)).start()
|
||||
|
||||
indexPage = """
|
||||
<html>
|
||||
<head>
|
||||
<title>RSS index page</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>RSS index page</h1>
|
||||
<h3>Custom generators of RSS feed from different blogs</h3>
|
||||
<br/>
|
||||
"""
|
||||
|
||||
for parser in parsers:
|
||||
indexPage += f'<a href={parser.parser.URL}>{parser.parser.NAME}</a>'
|
||||
indexPage += '</body>'
|
||||
indexPage += '</html>'
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return indexPage
|
||||
|
||||
runParserWorkers()
|
||||
|
||||
for parser in parsers:
|
||||
@app.route(parser.parser.URL)
|
||||
def query():
|
||||
with parser.lock:
|
||||
if parser.rss == None:
|
||||
abort(404)
|
||||
return Response(parser.rss, mimetype='text/xml')
|
||||
|
||||
@app.route("/nvidia")
|
||||
def rss_query():
|
||||
return Response(parseNvidia(), mimetype='text/xml')
|
||||
|
|
|
|||
|
|
@ -1,7 +1,25 @@
|
|||
from pyquery import PyQuery as pq
|
||||
from feedgen.feed import FeedGenerator
|
||||
from collections import namedtuple
|
||||
import multiprocessing
|
||||
from datetime import datetime
|
||||
|
||||
def parseNvidia():
|
||||
Entry = namedtuple('Entry', 'url fe')
|
||||
|
||||
class NvidiaParser:
|
||||
NAME = 'NVidia Research'
|
||||
URL = '/nvidia'
|
||||
CACHE_TIMEOUT = 3600
|
||||
|
||||
def parseNvidiaDate(entry):
|
||||
dom = pq(entry.url)
|
||||
print(entry.url)
|
||||
|
||||
time = dom('.field--name-field-publication-date').find('time').attr.datetime
|
||||
time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%S%z')
|
||||
entry.fe.pubDate(time)
|
||||
|
||||
def getRss(self):
|
||||
root_url = 'https://research.nvidia.com'
|
||||
d = pq(root_url +'/publications')
|
||||
|
||||
|
|
@ -9,9 +27,12 @@ def parseNvidia():
|
|||
fg.id(root_url)
|
||||
fg.title('NVidia Research')
|
||||
fg.link(href=root_url, rel='alternate')
|
||||
fg.logo(root_url + '/favicon.ico')
|
||||
fg.logo(root_url + '/themes/custom/nvidia/favicon.ico')
|
||||
fg.description('NVidia Research papers')
|
||||
|
||||
entries = []
|
||||
print('RSS GOT')
|
||||
|
||||
for elem in d('.views-field-title').items():
|
||||
link = elem.find('a')
|
||||
url = root_url + link.attr.href
|
||||
|
|
@ -22,6 +43,10 @@ def parseNvidia():
|
|||
fe.title(title)
|
||||
fe.link(href=url)
|
||||
|
||||
entries.append(Entry(url, fe))
|
||||
|
||||
with multiprocessing.Pool(8) as p:
|
||||
p.map(NvidiaParser.parseNvidiaDate, entries)
|
||||
|
||||
return fg.rss_str()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue