rss-parser/parsers/nvidia.py

68 lines
1.7 KiB
Python

from pyquery import PyQuery as pq
from feedgen.feed import FeedGenerator
from collections import namedtuple
import multiprocessing
from datetime import datetime
import requests
Entry = namedtuple('Entry', 'url fe')
class NvidiaParser:
NAME = 'NVidia Research'
URL = '/nvidia'
CACHE_TIMEOUT = 3600
root_url = 'https://research.nvidia.com'
favicon = None
def loadFavicon(self):
try:
favUrl = NvidiaParser.root_url + '/themes/custom/nvidia/favicon.ico'
self.favicon = requests.get(favUrl)
except:
pass
# def __init__(self):
# self.loadFavicon()
def parseNvidiaDate(entry):
dom = pq(entry.url)
print(entry.url)
time = dom('.field--name-field-publication-date').find('time').attr.datetime
time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%S%z')
entry.fe.pubDate(time)
def getRss(self):
d = pq(self.root_url +'/publications')
# self.loadFavicon()
fg = FeedGenerator()
fg.id(self.root_url)
fg.title('NVidia Research')
fg.link(href=self.root_url, rel='alternate')
fg.description('NVidia Research papers')
entries = []
for elem in d('.views-field-title').items():
link = elem.find('a')
url = self.root_url + link.attr.href
title = link.text()
fe = fg.add_entry()
fe.id(url)
fe.title(title)
fe.link(href=url)
entries.append(Entry(url, fe))
for entry in entries:
NvidiaParser.parseNvidiaDate(entry)
print(entry.url)
# with multiprocessing.Pool(8) as p:
# p.map(NvidiaParser.parseNvidiaDate, entries)
return fg.rss_str()