diff --git a/.gitignore b/.gitignore
index d6b8423..f9fec48 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
_cache/
-*__pycache__/
+rssparser
diff --git a/README.md b/README.md
index e6ac167..997ac06 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,2 @@
# RSS Parsers
-## Required PiP modules
-- flask
-- pyquery
-- feedgen
diff --git a/app.go b/app.go
new file mode 100644
index 0000000..87c1806
--- /dev/null
+++ b/app.go
@@ -0,0 +1,166 @@
+package main
+
+import (
+ "fmt"
+ "log"
+ "net/http"
+ "os"
+ "path/filepath"
+ "sync"
+ "time"
+
+ "github.com/gorilla/feeds"
+)
+
+type RssEntry struct {
+ title string
+ url string
+ descripiton string
+ time time.Time
+}
+
+type Parser interface {
+ Parse() []RssEntry
+
+ Title() string
+ Description() string
+ RootUrl() string
+ ServerUrl() string
+ CacheName() string
+ CacheTimeout() int
+}
+
+type ParserFeed struct {
+ parser Parser
+ rss string
+ mutex *sync.Mutex
+ time time.Time
+}
+
+func (this ParserFeed) CachePath() string {
+ return "./_cache/" + this.parser.CacheName() + ".rss"
+}
+
+func (this *ParserFeed) ReadCache() bool {
+ path := this.CachePath()
+ stat, err := os.Stat(path)
+ if err != nil {
+ return true
+ }
+
+ data, err := os.ReadFile(this.CachePath())
+ if err != nil {
+ return true
+ }
+
+ this.SetFeed(string(data))
+ this.time = stat.ModTime()
+
+ return time.Now().Sub(this.time).Seconds() > float64(this.parser.CacheTimeout())
+}
+
+func (this *ParserFeed) ParseAndUpdateCache() {
+ feed := &feeds.Feed{
+ Title: this.parser.Title(),
+ Link: &feeds.Link{Href: this.parser.RootUrl()},
+ Description: this.parser.Description(),
+ Created: time.Now(),
+ }
+
+ entities := this.parser.Parse()
+
+ for _, re := range entities {
+ feed.Items = append(feed.Items, &feeds.Item{
+ Id: re.url,
+ Title: re.title,
+ Link: &feeds.Link{Href: re.url},
+ Description: re.title,
+ Created: time.Now(),
+ })
+ }
+
+ rssFeed, err := feed.ToRss()
+ if err != nil {
+ fmt.Println(err)
+ }
+
+ dir := filepath.Dir(this.CachePath())
+ os.MkdirAll(dir, os.ModePerm)
+ os.WriteFile(this.CachePath(), []byte(rssFeed), os.ModePerm)
+
+ this.mutex.Lock()
+ this.rss = rssFeed
+ this.time = time.Now()
+ this.mutex.Unlock()
+}
+
+func (this *ParserFeed) RunWorker() {
+ if this.ReadCache() {
+ this.ParseAndUpdateCache()
+ }
+
+ for {
+ nextmark := this.time.Add(time.Second * time.Duration(this.parser.CacheTimeout()))
+ sleepfor := nextmark.Unix() - time.Now().Unix()
+ fmt.Println(sleepfor)
+
+ if sleepfor > 0 {
+ time.Sleep(time.Second * time.Duration(sleepfor))
+ }
+ this.ParseAndUpdateCache()
+ fmt.Println("update")
+ }
+}
+
+func (this *ParserFeed) SetFeed(feed string) {
+ this.mutex.Lock()
+ this.rss = feed
+ this.mutex.Unlock()
+}
+
+func (this *ParserFeed) GetFeed() string {
+ this.mutex.Lock()
+ rss := this.rss
+ this.mutex.Unlock()
+
+ return rss
+}
+
+func main() {
+ parsers := []*ParserFeed{
+ {parser: NvidiaParser{}, mutex: &sync.Mutex{}},
+ }
+
+ for _, p := range parsers {
+ go p.RunWorker()
+ }
+
+ rootPage :=
+ `
+ RSS index page
+
+
+
+ RSS index page
+ Custom generators of RSS feed from different blogs
+
`
+
+ for _, p := range parsers {
+ rootPage += fmt.Sprintf("%v", p.parser.ServerUrl(), p.parser.Title())
+ }
+
+ rootPage += ""
+
+ http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+ fmt.Fprintf(w, rootPage)
+ })
+
+ for _, p := range parsers {
+ http.HandleFunc(p.parser.ServerUrl(), func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/xml")
+ fmt.Fprint(w, p.GetFeed())
+ })
+ }
+
+ log.Fatal(http.ListenAndServe(":8081", nil))
+}
diff --git a/app.py b/app.py
deleted file mode 100644
index ca15f0c..0000000
--- a/app.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from flask import Flask, Response, abort, request
-from parsers.nvidia import NvidiaParser
-from urllib.parse import urlparse
-import os, time, threading
-
-class ParserData:
- parser = None
- rss = None
- time = None
- lock = threading.Lock()
-
- def __init__(self, parser):
- self.parser = parser
-
-app = Flask(__name__)
-
-parsers = [ParserData(NvidiaParser())]
-
-def getCachePath(parser):
- path = './_cache/' + parser.__class__.__name__
- return path
-
-def checkParserCache(parser):
- path = getCachePath(parser.parser)
-
- try:
- os.path.getmtime(path)
- with open(path, 'r') as f:
- parser.rss = f.read()
- except:
- os.makedirs(os.path.dirname(path), exist_ok=True)
- open(path, 'w').close()
- return True
-
- filetime = os.path.getmtime(path)
- currtime = time.time()
- parser.time = filetime
-
- return (currtime - filetime) > parser.parser.CACHE_TIMEOUT
-
-def updateParserAndCache(parser):
- rss = parser.parser.getRss()
-
- with parser.lock:
- parser.rss = rss
- parser.time = time.time()
-
- with open(getCachePath(parser.parser), 'w') as f:
- f.write(parser.rss.decode('utf-8'))
-
-def runParserWorker(parser):
- if checkParserCache(parser):
- updateParserAndCache(parser)
-
- while True:
- nextmark = parser.time + parser.parser.CACHE_TIMEOUT
- sleepfor = nextmark - time.time()
- if sleepfor > 0:
- time.sleep(nextmark - time.time())
- updateParserAndCache(parser)
-
-def runParserWorkers():
- for parser in parsers:
- threading.Thread(target=runParserWorker, args=(parser,)).start()
-
-indexPage = """
-
-
-RSS index page
-
-
-RSS index page
-Custom generators of RSS feed from different blogs
-
-"""
-
-for parser in parsers:
- indexPage += f'{parser.parser.NAME}'
-indexPage += ''
-indexPage += ''
-
-@app.route('/')
-def index():
- return indexPage
-
-#@app.route('/favicon.ico')
-#def favicon():
-# referrer = request.referrer
-# if referrer != None:
-# u = urlparse(referrer)
-#
-# for parser in parsers:
-# if parser.parser.URL == u.path:
-# favi = parser.parser.favicon
-# return Response(favi.content, mimetype=favi.headers['Content-Type'])
-#
-# abort(404)
-
-runParserWorkers()
-
-for parser in parsers:
- @app.route(parser.parser.URL)
- def query():
- with parser.lock:
- if parser.rss == None:
- abort(404)
- return Response(parser.rss, mimetype='text/xml')
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..0357e52
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,10 @@
+module rssparser
+
+go 1.19
+
+require (
+ github.com/PuerkitoBio/goquery v1.8.0 // indirect
+ github.com/andybalholm/cascadia v1.3.1 // indirect
+ github.com/gorilla/feeds v1.1.1 // indirect
+ golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..a07a53d
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,13 @@
+github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
+github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
+github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/gorilla/feeds v1.1.1 h1:HwKXxqzcRNg9to+BbvJog4+f3s/xzvtZXICcQGutYfY=
+github.com/gorilla/feeds v1.1.1/go.mod h1:Nk0jZrvPFZX1OBe5NPiddPw7CfwF6Q9eqzaBbaightA=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
diff --git a/nvidia.go b/nvidia.go
new file mode 100644
index 0000000..3cd3cf9
--- /dev/null
+++ b/nvidia.go
@@ -0,0 +1,77 @@
+package main
+
+import (
+ "fmt"
+ "net/http"
+ "time"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+type NvidiaParser struct {
+}
+
+func (this NvidiaParser) Parse() []RssEntry {
+ var result []RssEntry
+
+ resp, err := http.Get(this.RootUrl() + "/publications")
+ if err != nil {
+ }
+
+ doc, err := goquery.NewDocumentFromResponse(resp)
+
+ // parse articles and links
+ doc.Find(".views-field-title").Each(func(i int, s *goquery.Selection) {
+ link := s.Find("a")
+ href, exists := link.Attr("href")
+ if exists {
+ result = append(result, RssEntry{title: link.Text(), url: this.RootUrl() + href})
+ }
+ })
+
+ // parse times
+ for _, re := range result {
+ resp, err := http.Get(re.url)
+ if err != nil {
+ }
+
+ doc, err := goquery.NewDocumentFromResponse(resp)
+ re.descripiton = doc.Find(".field--type-text-with-summary").Text()
+
+ date, exists := doc.Find(".field--name-field-publication-date").Find("time").Attr("datetime")
+ if exists {
+ t, err := time.Parse(time.RFC3339, date)
+ if err == nil {
+ re.time = t
+ }
+ }
+
+ fmt.Println(re.url)
+ }
+
+ return result
+}
+
+func (this NvidiaParser) Title() string {
+ return "NVidia Research"
+}
+
+func (this NvidiaParser) Description() string {
+ return "NVidia Research papers"
+}
+
+func (this NvidiaParser) RootUrl() string {
+ return "https://research.nvidia.com"
+}
+
+func (this NvidiaParser) ServerUrl() string {
+ return "/nvidia"
+}
+
+func (this NvidiaParser) CacheName() string {
+ return "NvidiaParser"
+}
+
+func (this NvidiaParser) CacheTimeout() int {
+ return 7200
+}
diff --git a/parsers/nvidia.py b/parsers/nvidia.py
deleted file mode 100644
index 556b996..0000000
--- a/parsers/nvidia.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from pyquery import PyQuery as pq
-from feedgen.feed import FeedGenerator
-from collections import namedtuple
-import multiprocessing
-from datetime import datetime
-import requests
-
-Entry = namedtuple('Entry', 'url fe')
-
-class NvidiaParser:
- NAME = 'NVidia Research'
- URL = '/nvidia'
- CACHE_TIMEOUT = 3600
- root_url = 'https://research.nvidia.com'
- favicon = None
-
- def loadFavicon(self):
- try:
- favUrl = NvidiaParser.root_url + '/themes/custom/nvidia/favicon.ico'
- self.favicon = requests.get(favUrl)
- except:
- pass
-
-# def __init__(self):
-# self.loadFavicon()
-
- def parseNvidiaDate(entry):
- dom = pq(entry.url)
- print(entry.url)
-
- time = dom('.field--name-field-publication-date').find('time').attr.datetime
- time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%S%z')
- entry.fe.pubDate(time)
-
- def getRss(self):
- d = pq(self.root_url +'/publications')
-# self.loadFavicon()
-
- fg = FeedGenerator()
- fg.id(self.root_url)
- fg.title('NVidia Research')
- fg.link(href=self.root_url, rel='alternate')
- fg.description('NVidia Research papers')
-
- entries = []
-
- for elem in d('.views-field-title').items():
- link = elem.find('a')
- url = self.root_url + link.attr.href
- title = link.text()
-
- fe = fg.add_entry()
- fe.id(url)
- fe.title(title)
- fe.link(href=url)
-
- entries.append(Entry(url, fe))
-
- for entry in entries:
- NvidiaParser.parseNvidiaDate(entry)
- print(entry.url)
-
-# with multiprocessing.Pool(8) as p:
-# p.map(NvidiaParser.parseNvidiaDate, entries)
-
- return fg.rss_str()
-