rss-parser/parsers/nvidia.py

53 lines
1.4 KiB
Python

from pyquery import PyQuery as pq
from feedgen.feed import FeedGenerator
from collections import namedtuple
import multiprocessing
from datetime import datetime
Entry = namedtuple('Entry', 'url fe')
class NvidiaParser:
NAME = 'NVidia Research'
URL = '/nvidia'
CACHE_TIMEOUT = 3600
def parseNvidiaDate(entry):
dom = pq(entry.url)
print(entry.url)
time = dom('.field--name-field-publication-date').find('time').attr.datetime
time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%S%z')
entry.fe.pubDate(time)
def getRss(self):
root_url = 'https://research.nvidia.com'
d = pq(root_url +'/publications')
fg = FeedGenerator()
fg.id(root_url)
fg.title('NVidia Research')
fg.link(href=root_url, rel='alternate')
fg.logo(root_url + '/themes/custom/nvidia/favicon.ico')
fg.description('NVidia Research papers')
entries = []
print('RSS GOT')
for elem in d('.views-field-title').items():
link = elem.find('a')
url = root_url + link.attr.href
title = link.text()
fe = fg.add_entry()
fe.id(url)
fe.title(title)
fe.link(href=url)
entries.append(Entry(url, fe))
with multiprocessing.Pool(8) as p:
p.map(NvidiaParser.parseNvidiaDate, entries)
return fg.rss_str()