from pyquery import PyQuery as pq from feedgen.feed import FeedGenerator from collections import namedtuple import multiprocessing from datetime import datetime import requests Entry = namedtuple('Entry', 'url fe') class NvidiaParser: NAME = 'NVidia Research' URL = '/nvidia' CACHE_TIMEOUT = 3600 root_url = 'https://research.nvidia.com' favicon = None def loadFavicon(self): try: favUrl = NvidiaParser.root_url + '/themes/custom/nvidia/favicon.ico' self.favicon = requests.get(favUrl) except: pass # def __init__(self): # self.loadFavicon() def parseNvidiaDate(entry): dom = pq(entry.url) print(entry.url) time = dom('.field--name-field-publication-date').find('time').attr.datetime time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%S%z') entry.fe.pubDate(time) def getRss(self): d = pq(self.root_url +'/publications') # self.loadFavicon() fg = FeedGenerator() fg.id(self.root_url) fg.title('NVidia Research') fg.link(href=self.root_url, rel='alternate') fg.description('NVidia Research papers') entries = [] for elem in d('.views-field-title').items(): link = elem.find('a') url = self.root_url + link.attr.href title = link.text() fe = fg.add_entry() fe.id(url) fe.title(title) fe.link(href=url) entries.append(Entry(url, fe)) for entry in entries: NvidiaParser.parseNvidiaDate(entry) print(entry.url) # with multiprocessing.Pool(8) as p: # p.map(NvidiaParser.parseNvidiaDate, entries) return fg.rss_str()