Skip to content

Commit

Permalink
custom rss parser
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaumewuip committed Aug 28, 2023
1 parent c34237e commit fdaf3b0
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 53 deletions.
13 changes: 3 additions & 10 deletions rss-to-tana/index.js
@@ -1,16 +1,9 @@
const RSSParser = require('rss-parser');
const cron = require('node-cron');

const Store = require('./store');
const Item = require('./item');
const Tana = require('./tana');

const parser = new RSSParser({
defaultRSS: 2.0,
xml2js: {
strict: true,
}
});
const RSS = require('./rss');

const schedules = {
twiceAtNight: '0 0 23,4 * * *', // 23:00 and 04:00 every day
Expand Down Expand Up @@ -108,9 +101,9 @@ function dateDiffInDays(a, b) {
async function extractItems(feed) {
console.log(feed.url, '- parsing')
try {
const parsedFeed = await parser.parseURL(feed.url);
const items = await RSS.parse(feed.url);

return parsedFeed.items.map(rssItem => Item.create(rssItem, feed))
return items.map(rssItem => Item.create(rssItem, feed))
} catch (error) {
console.error(feed.url, `parsing error`, error);

Expand Down
2 changes: 1 addition & 1 deletion rss-to-tana/item.js
Expand Up @@ -91,7 +91,7 @@ function website(feedUrl, item) {
const create = (rssItem, feed) => ({
id: rssItem.link,
title: rssItem.title,
publishedAt: new Date(rssItem.isoDate),
publishedAt: rssItem.publishedAt,
tanaNode: feed.toTana(feed.url, rssItem),
feed,
})
Expand Down
129 changes: 89 additions & 40 deletions rss-to-tana/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions rss-to-tana/package.json
Expand Up @@ -2,9 +2,9 @@
"name": "rss-to-tana",
"main": "index.js",
"dependencies": {
"htmlparser2": "^9.0.0",
"node-cron": "^3.0.2",
"redis": "^4.6.7",
"rss-parser": "^3.9.0"
"redis": "^4.6.7"
},
"scripts": {
"start": "node index.js"
Expand Down
34 changes: 34 additions & 0 deletions rss-to-tana/rss.js
@@ -0,0 +1,34 @@
const htmlparser2 = require('htmlparser2')

function parsePubDate(pubDateString) {
return new Date(pubDateString)
}

async function parse(feedUrl) {
const response = await fetch(feedUrl)

if (!response.ok) {
throw new Error(`Error fetching ${feedUrl}: ${response.status} ${response.statusText}`)
}

const content = await response.text()

const feed = htmlparser2.parseFeed(content);

const items = feed.items || []

try {
return items.map((item) => ({
title: item.title,
link: item.link,
publishedAt: parsePubDate(item.pubDate || item.date),
}))
} catch (err) {
console.log(err)
return []
}
}

module.exports = {
parse,
}

0 comments on commit fdaf3b0

Please sign in to comment.