-
Notifications
You must be signed in to change notification settings - Fork 6
Add author/dc:creator support to fulltext feed output using feedsmith #964
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
c55df2b
0fefe72
d152925
8d414eb
4ca6317
5aef183
0d10fee
376d0ac
6072ca5
a28a9bf
5b70a82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,7 @@ | ||
| import express from 'express' | ||
| import bodyParser from 'body-parser' | ||
| import Parser from 'rss-parser' | ||
| import { Feed } from 'feed' | ||
| import type { FeedOptions, Item } from 'feed/lib/typings' | ||
| import { generateRssFeed, generateJsonFeed } from 'feedsmith' | ||
| import * as Sentry from '@sentry/node' | ||
| import { RewriteFrames } from '@sentry/integrations' | ||
|
|
||
|
|
@@ -48,60 +47,103 @@ app.use(express.static(constants.publicPath)) | |
|
|
||
| app.use(bodyParser.urlencoded({ extended: false })) | ||
|
|
||
| async function getFullTextFeed(feedUrl: string, maxItemsPerFeed: number) { | ||
| interface FeedData { | ||
| title: string | ||
| description?: string | ||
| link: string | ||
| image?: string | ||
| items: Array<{ | ||
| title: string | ||
| link: string | ||
| date: Date | ||
| content?: string | ||
| description?: string | ||
| creator?: string | ||
| categories?: string[] | ||
| guid?: string | ||
| }> | ||
| } | ||
|
|
||
| async function getFullTextFeed(feedUrl: string, maxItemsPerFeed: number): Promise<FeedData> { | ||
| const parser = new Parser() | ||
| try { | ||
| const feed = await parser.parseURL(feedUrl) | ||
| const feedOptions: FeedOptions = { | ||
| ...feed, | ||
| title: feed.title!, | ||
| description: feed.description, | ||
| link: feedUrl, | ||
| id: feedUrl, | ||
| image: feed.image?.url, | ||
| copyright: '', | ||
| } | ||
| const outputFeed = new Feed(feedOptions) | ||
|
|
||
| const newItems = await Promise.all((feed.items || []).filter(item => !!item.link).slice(0, maxItemsPerFeed).map(async item => { | ||
| const newItem: Item = { | ||
| ...item, | ||
| title: item.title!, | ||
| link: item.link!, | ||
| date: new Date(item.pubDate!), | ||
| } | ||
| const items = await Promise.all((feed.items || []).filter(item => !!item.link).slice(0, maxItemsPerFeed).map(async item => { | ||
| let content: string | undefined = await cache.get(item.link!) | ||
| if (!content) { | ||
| content = (await parsePageUsingMercury(item.link!)).content | ||
| await cache.set(item.link!, content) | ||
| } | ||
| newItem.content = content | ||
| return newItem | ||
| return { | ||
| title: item.title!, | ||
| link: item.link!, | ||
| date: new Date(item.pubDate!), | ||
| content, | ||
| description: item.contentSnippet || item.content, | ||
| creator: item.creator, | ||
| categories: item.categories, | ||
| guid: item.guid, | ||
| } | ||
| })) | ||
| for (const newItem of newItems) { | ||
| outputFeed.addItem(newItem) | ||
|
|
||
| return { | ||
| title: feed.title!, | ||
| description: feed.description, | ||
| link: feedUrl, | ||
| image: feed.image?.url, | ||
| items, | ||
| } | ||
| return outputFeed | ||
| } catch (e) { | ||
| if (constants.sentryDsn) { | ||
| Sentry.captureException(e) | ||
| } | ||
| const outputFeed = new Feed({ | ||
| id: `${feedUrl}-failed`, | ||
| return { | ||
| title: `Failed to get fulltext rss for ${feedUrl}.`, | ||
| copyright: '', | ||
| }) | ||
| const errorItem: Item = { | ||
| title: `Failed to get fulltext rss for ${feedUrl}.`, | ||
| content: `Exception: ${e}`, | ||
| link: 'https://github.com/whtsky/fulltextrssplz/issues', | ||
| date: new Date(), | ||
| link: feedUrl, | ||
| items: [{ | ||
| title: `Failed to get fulltext rss for ${feedUrl}.`, | ||
| content: `Exception: ${e}`, | ||
| link: 'https://github.com/whtsky/fulltextrssplz/issues', | ||
| date: new Date(), | ||
| }], | ||
| } | ||
| outputFeed.addItem(errorItem) | ||
| return outputFeed | ||
| } | ||
| } | ||
|
|
||
| function feedToRss(data: FeedData): string { | ||
| return generateRssFeed({ | ||
| title: data.title, | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same question,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in |
||
| description: data.description || '', | ||
| link: data.link, | ||
| items: data.items.map(item => ({ | ||
| title: item.title, | ||
| link: item.link, | ||
| pubDate: item.date, | ||
| description: item.description, | ||
| guid: item.guid ? { value: item.guid } : undefined, | ||
| dc: item.creator ? { creator: item.creator } : undefined, | ||
| content: item.content ? { encoded: item.content } : undefined, | ||
| })), | ||
| }, { lenient: true }) | ||
| } | ||
|
|
||
| function feedToJson(data: FeedData): string { | ||
| return JSON.stringify(generateJsonFeed({ | ||
| title: data.title, | ||
| home_page_url: data.link, | ||
| description: data.description, | ||
| items: data.items.map(item => ({ | ||
| id: item.guid || item.link, | ||
| url: item.link, | ||
| title: item.title, | ||
| date_published: item.date, | ||
| content_html: item.content, | ||
| authors: item.creator ? [{ name: item.creator }] : undefined, | ||
| })), | ||
| }, { lenient: true })) | ||
| } | ||
|
|
||
| app.get('/feed', async (req, res) => { | ||
| const feedUrl = req.query.url | ||
|
|
||
|
|
@@ -145,18 +187,18 @@ app.get('/feed', async (req, res) => { | |
| } | ||
| } | ||
|
|
||
| const outputFeed = await getFullTextFeed(feedUrl, maxItemsPerFeed) | ||
| const feedData = await getFullTextFeed(feedUrl, maxItemsPerFeed) | ||
|
|
||
| if (constants.cacheControlMaxAge > 0) { | ||
| res.set('Cache-control', `public, max-age=${constants.cacheControlMaxAge}`) | ||
| } | ||
|
|
||
| if (format == Format.RSS) { | ||
| res.set('Content-type', 'application/rss+xml;charset=UTF-8') | ||
| res.end(outputFeed.rss2()) | ||
| res.end(feedToRss(feedData)) | ||
| } else if (format == Format.JSON) { | ||
| res.set('Content-type', 'application/json;charset=UTF-8') | ||
| res.end(outputFeed.json1()) | ||
| res.end(feedToJson(feedData)) | ||
| } else { | ||
| res.end('unknown format:' + format) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,132 @@ | ||
| // @ts-nocheck | ||
| import { generateRssFeed, generateJsonFeed } from 'feedsmith' | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot don't think this file should still exist
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deleted in |
||
|
|
||
| describe('RSS feed generation with dc:creator', () => { | ||
| test('includes dc:creator when creator is specified', () => { | ||
| const rss = generateRssFeed({ | ||
| title: 'Test Feed', | ||
| link: 'https://example.com', | ||
| description: 'A test feed', | ||
| items: [{ | ||
| title: 'Test Article', | ||
| link: 'https://example.com/article', | ||
| pubDate: new Date('2024-01-01'), | ||
| dc: { creator: 'John Doe' }, | ||
| content: { encoded: '<p>Full content</p>' }, | ||
| }], | ||
| }, { lenient: true }) | ||
|
|
||
| expect(rss).toContain('<dc:creator>John Doe</dc:creator>') | ||
| expect(rss).toContain('xmlns:dc="http://purl.org/dc/elements/1.1/"') | ||
| }) | ||
|
|
||
| test('omits dc:creator when no creator specified', () => { | ||
| const rss = generateRssFeed({ | ||
| title: 'Test Feed', | ||
| link: 'https://example.com', | ||
| description: 'A test feed', | ||
| items: [{ | ||
| title: 'Test Article', | ||
| link: 'https://example.com/article', | ||
| pubDate: new Date('2024-01-01'), | ||
| content: { encoded: '<p>Full content</p>' }, | ||
| }], | ||
| }, { lenient: true }) | ||
|
|
||
| expect(rss).not.toContain('<dc:creator>') | ||
| }) | ||
|
|
||
| test('handles mixed items with and without creators', () => { | ||
| const rss = generateRssFeed({ | ||
| title: 'Test Feed', | ||
| link: 'https://example.com', | ||
| description: 'A test feed', | ||
| items: [ | ||
| { | ||
| title: 'Article 1', | ||
| link: 'https://example.com/1', | ||
| dc: { creator: 'Author One' }, | ||
| }, | ||
| { | ||
| title: 'Article 2', | ||
| link: 'https://example.com/2', | ||
| }, | ||
| { | ||
| title: 'Article 3', | ||
| link: 'https://example.com/3', | ||
| dc: { creator: 'Author Three' }, | ||
| }, | ||
| ], | ||
| }, { lenient: true }) | ||
|
|
||
| expect(rss).toContain('<dc:creator>Author One</dc:creator>') | ||
| expect(rss).not.toContain('Author Two') | ||
| expect(rss).toContain('<dc:creator>Author Three</dc:creator>') | ||
| }) | ||
|
|
||
| test('properly handles special characters in creator names', () => { | ||
| const rss = generateRssFeed({ | ||
| title: 'Test Feed', | ||
| link: 'https://example.com', | ||
| description: 'A test feed', | ||
| items: [{ | ||
| title: 'Test Article', | ||
| link: 'https://example.com/article', | ||
| dc: { creator: 'Name with <special> & "chars"' }, | ||
| }], | ||
| }, { lenient: true }) | ||
|
|
||
| expect(rss).toContain('<dc:creator>') | ||
| // Special chars should be safely contained (CDATA or entity-escaped) | ||
| expect(rss).toContain('Name with') | ||
| }) | ||
|
|
||
| test('includes content:encoded with full text', () => { | ||
| const rss = generateRssFeed({ | ||
| title: 'Test Feed', | ||
| link: 'https://example.com', | ||
| description: 'A test feed', | ||
| items: [{ | ||
| title: 'Test Article', | ||
| link: 'https://example.com/article', | ||
| content: { encoded: '<p>Full article content</p>' }, | ||
| dc: { creator: 'John Doe' }, | ||
| }], | ||
| }, { lenient: true }) | ||
|
|
||
| expect(rss).toContain('<content:encoded>') | ||
| expect(rss).toContain('Full article content') | ||
| expect(rss).toContain('xmlns:content="http://purl.org/rss/1.0/modules/content/"') | ||
| }) | ||
| }) | ||
|
|
||
| describe('JSON feed generation with authors', () => { | ||
| test('includes author when creator is specified', () => { | ||
| const json = generateJsonFeed({ | ||
| title: 'Test Feed', | ||
| items: [{ | ||
| id: 'https://example.com/article', | ||
| url: 'https://example.com/article', | ||
| title: 'Test Article', | ||
| content_html: '<p>Full content</p>', | ||
| authors: [{ name: 'John Doe' }], | ||
| }], | ||
| }, { lenient: true }) | ||
|
|
||
| expect(json.items[0].authors).toEqual([{ name: 'John Doe' }]) | ||
| }) | ||
|
|
||
| test('omits authors when no creator specified', () => { | ||
| const json = generateJsonFeed({ | ||
| title: 'Test Feed', | ||
| items: [{ | ||
| id: 'https://example.com/article', | ||
| url: 'https://example.com/article', | ||
| title: 'Test Article', | ||
| content_html: '<p>Full content</p>', | ||
| }], | ||
| }, { lenient: true }) | ||
|
|
||
| expect(json.items[0].authors).toBeUndefined() | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,7 +10,8 @@ | |
| "inlineSources": true, | ||
| "target": "ES6", | ||
| "esModuleInterop": true, | ||
| "allowSyntheticDefaultImports": true | ||
| "allowSyntheticDefaultImports": true, | ||
| "skipLibCheck": true | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is unacceptable. but you're welcome to upgrade the node version to 24 if @types/node is blocking in your way @copilot
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
| }, | ||
| "include": ["src/**/*", "api/*"], | ||
| "exclude": ["node_modules", "**/*.spec.ts"] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we use ...item here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in
0d10fee— now uses...itemspread with overrides for transformed fields (date,content,description).