Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import * as constants from './constants'
import { parsePageUsingMercury } from './parser'
import { verify } from './sign'
import { getCache } from './cache'
import { parseCreator, addDcCreators } from './author'

const app = express()
const cache = getCache({
Expand Down Expand Up @@ -70,6 +71,9 @@ async function getFullTextFeed(feedUrl: string, maxItemsPerFeed: number) {
link: item.link!,
date: new Date(item.pubDate!),
}
if (item.creator) {
newItem.author = [parseCreator(item.creator)]
}
let content: string | undefined = await cache.get(item.link!)
if (!content) {
content = (await parsePageUsingMercury(item.link!)).content
Expand Down Expand Up @@ -153,7 +157,7 @@ app.get('/feed', async (req, res) => {

if (format == Format.RSS) {
res.set('Content-type', 'application/rss+xml;charset=UTF-8')
res.end(outputFeed.rss2())
res.end(addDcCreators(outputFeed.rss2(), outputFeed.items))
} else if (format == Format.JSON) {
res.set('Content-type', 'application/json;charset=UTF-8')
res.end(outputFeed.json1())
Expand Down
190 changes: 190 additions & 0 deletions src/author.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// @ts-nocheck
import { Feed } from 'feed'
import type { FeedOptions, Item } from 'feed/lib/typings'
import { parseCreator, addDcCreators } from './author'

describe('parseCreator', () => {
test('parses name-only creator', () => {
const result = parseCreator('John Doe')
expect(result).toEqual({ name: 'John Doe' })
})

test('parses email (name) format', () => {
const result = parseCreator('john@example.com (John Doe)')
expect(result).toEqual({ email: 'john@example.com', name: 'John Doe' })
})

test('handles creator with just a username', () => {
const result = parseCreator('admin')
expect(result).toEqual({ name: 'admin' })
})

test('does not parse invalid email format', () => {
const result = parseCreator('not-an-email (Name)')
expect(result).toEqual({ name: 'not-an-email (Name)' })
})
})

describe('addDcCreators', () => {
function buildFeedXml(items: Item[]): { xml: string; items: Item[] } {
const feedOptions: FeedOptions = {
title: 'Test Feed',
id: 'https://example.com/feed',
link: 'https://example.com',
copyright: '',
}
const feed = new Feed(feedOptions)
for (const item of items) {
feed.addItem(item)
}
return { xml: feed.rss2(), items: feed.items }
}

test('adds dc:creator for item with name-only author', () => {
const items: Item[] = [
{
title: 'Test Article',
link: 'https://example.com/article',
date: new Date('2024-01-01'),
content: 'Test content',
author: [{ name: 'John Doe' }],
},
]
const { xml, items: feedItems } = buildFeedXml(items)
const result = addDcCreators(xml, feedItems)

expect(result).toContain('<dc:creator><![CDATA[John Doe]]></dc:creator>')
expect(result).toContain('xmlns:dc=')
})

test('adds dc:creator for item with email and name author', () => {
const items: Item[] = [
{
title: 'Test Article',
link: 'https://example.com/article',
date: new Date('2024-01-01'),
content: 'Test content',
author: [{ name: 'John Doe', email: 'john@example.com' }],
},
]
const { xml, items: feedItems } = buildFeedXml(items)
const result = addDcCreators(xml, feedItems)

expect(result).toContain('<dc:creator><![CDATA[John Doe]]></dc:creator>')
// Should also have <author> element from feed library
expect(result).toContain('<author>john@example.com (John Doe)</author>')
})

test('skips dc:creator for items without author', () => {
const items: Item[] = [
{
title: 'Test Article',
link: 'https://example.com/article',
date: new Date('2024-01-01'),
content: 'Test content',
},
]
const { xml, items: feedItems } = buildFeedXml(items)
const result = addDcCreators(xml, feedItems)

expect(result).not.toContain('<dc:creator>')
})

test('handles mixed items with and without authors', () => {
const items: Item[] = [
{
title: 'Article 1',
link: 'https://example.com/1',
date: new Date('2024-01-01'),
content: 'Content 1',
author: [{ name: 'Author One' }],
},
{
title: 'Article 2',
link: 'https://example.com/2',
date: new Date('2024-01-02'),
content: 'Content 2',
},
{
title: 'Article 3',
link: 'https://example.com/3',
date: new Date('2024-01-03'),
content: 'Content 3',
author: [{ name: 'Author Three' }],
},
]
const { xml, items: feedItems } = buildFeedXml(items)
const result = addDcCreators(xml, feedItems)

expect(result).toContain('<dc:creator><![CDATA[Author One]]></dc:creator>')
expect(result).not.toContain('<dc:creator><![CDATA[Author Two]]></dc:creator>')
expect(result).toContain('<dc:creator><![CDATA[Author Three]]></dc:creator>')
})

test('adds xmlns:dc namespace if not present', () => {
const items: Item[] = [
{
title: 'Test Article',
link: 'https://example.com/article',
date: new Date('2024-01-01'),
author: [{ name: 'John Doe' }],
},
]
const feedOptions: FeedOptions = {
title: 'Test Feed',
id: 'https://example.com/feed',
link: 'https://example.com',
copyright: '',
}
const feed = new Feed(feedOptions)
for (const item of items) {
feed.addItem(item)
}
// Without content, the feed library won't add xmlns:dc
const xml = feed.rss2()
expect(xml).not.toContain('xmlns:dc=')

const result = addDcCreators(xml, feed.items)
expect(result).toContain('xmlns:dc="http://purl.org/dc/elements/1.1/"')
expect(result).toContain('<dc:creator><![CDATA[John Doe]]></dc:creator>')
})

test('produces valid XML structure', () => {
const items: Item[] = [
{
title: 'Test Article',
link: 'https://example.com/article',
date: new Date('2024-01-01'),
content: 'Test content',
author: [{ name: 'John Doe' }],
},
]
const { xml, items: feedItems } = buildFeedXml(items)
const result = addDcCreators(xml, feedItems)

// dc:creator should be inside <item> block
const itemStart = result.indexOf('<item>')
const itemEnd = result.indexOf('</item>')
const dcCreatorIdx = result.indexOf('<dc:creator>')
expect(dcCreatorIdx).toBeGreaterThan(itemStart)
expect(dcCreatorIdx).toBeLessThan(itemEnd)
})

test('escapes ]]> in author names to prevent CDATA injection', () => {
const items: Item[] = [
{
title: 'Test Article',
link: 'https://example.com/article',
date: new Date('2024-01-01'),
content: 'Test content',
author: [{ name: 'Evil]]>Author' }],
},
]
const { xml, items: feedItems } = buildFeedXml(items)
const result = addDcCreators(xml, feedItems)

// The ]]> sequence should be escaped by splitting the CDATA section
expect(result).not.toContain('<dc:creator><![CDATA[Evil]]>Author]]></dc:creator>')
expect(result).toContain('<dc:creator><![CDATA[Evil]]]]><![CDATA[>Author]]></dc:creator>')
})
})
40 changes: 40 additions & 0 deletions src/author.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import type { Item, Author } from 'feed/lib/typings'

export function parseCreator(creator: string): Author {
const match = creator.match(/^(\S+@\S+)\s+\((.+)\)$/)
if (match) {
return { email: match[1], name: match[2] }
}
return { name: creator }
}

export function addDcCreators(rssXml: string, items: Item[]): string {
let result = rssXml
let searchFrom = 0

for (const item of items) {
const closeIdx = result.indexOf('</item>', searchFrom)
if (closeIdx === -1) break

if (item.author?.length && item.author[0].name) {
const name = item.author[0].name.replace(/]]>/g, ']]]]><![CDATA[>')
const newlinePos = result.lastIndexOf('\n', closeIdx)
const itemIndent = result.substring(newlinePos + 1, closeIdx)
const childIndent = itemIndent + ' '
const dcLine = '\n' + childIndent + `<dc:creator><![CDATA[${name}]]></dc:creator>`

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot this implementation feels too hack. if our current library don't support this, could you explore and find a library with good supporting on author & creator fields>

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced the feed library with feedsmith which has native support for Dublin Core namespace elements including dc:creator. The hacky XML post-processing in src/author.ts is now completely removed — feedsmith handles dc:creator natively via dc: { creator: '...' } on items, and automatically manages namespace declarations. See commit 4ca6317.

result = result.slice(0, newlinePos) + dcLine + result.slice(newlinePos)
searchFrom = closeIdx + dcLine.length + '</item>'.length
} else {
searchFrom = closeIdx + '</item>'.length
}
}

if (!result.includes('xmlns:dc=')) {
result = result.replace(
'version="2.0"',
'version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/"'
)
}

return result
}