Skip to content
Draft
5 changes: 3 additions & 2 deletions modules/node_modules/@frogpond/ccc-google-calendar/index.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import {get} from '@frogpond/ccc-lib'
import {get, fastGetTrimmedText} from '@frogpond/ccc-lib'
import moment from 'moment'
import getUrls from 'get-urls'
import _jsdom from 'jsdom'
const {JSDOM} = _jsdom

function convertGoogleEvents(data, now = moment()) {
let events = data.map((event) => {
const title = fastGetTrimmedText(event.summary || '')
const startTime = moment(event.start.date || event.start.dateTime)
const endTime = moment(event.end.date || event.end.dateTime)
let description = (event.description || '').replace('<br>', '\n')
Expand All @@ -15,7 +16,7 @@ function convertGoogleEvents(data, now = moment()) {
dataSource: 'google',
startTime,
endTime,
title: event.summary || '',
title,
description: description,
location: event.location || '',
isOngoing: startTime.isBefore(now, 'day'),
Expand Down
28 changes: 28 additions & 0 deletions modules/node_modules/@frogpond/ccc-lib/html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import {parseDocument} from 'htmlparser2'
import {textContent} from 'domutils'
import {toLaxTitleCase} from '@frogpond/titlecase'

export {encode, decode} from 'html-entities'
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did we handle these here before adding this module? Does JSDOM handle this for us automatically when we call textContent?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great point. Looks like JSDOM handles decoding the entities properly for us. I've created a Repl to show the differences between fastGetTrimmedText and JSDOM's textContent.


// Html

export function parseHtml(string) {
return parseDocument(string, {
normalizeWhitespace: false,
xmlMode: false,
decodeEntities: true,
})
}

export function innerTextWithSpaces(elem) {
return textContent(elem).split(/\s+/u).join(' ').trim()
}

export function removeHtmlWithRegex(str) {
return str.replace(/<[^>]*>/gu, ' ')
}

export function fastGetTrimmedText(str) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we have JSDOM here and aren't resource constrained, I'd like to remove this fn in favor of a JSDOM-based solution

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for mentioning this. We will go with the textContent solution.

return removeHtmlWithRegex(str).replace(/\s+/gu, ' ').trim()
}

1 change: 1 addition & 0 deletions modules/node_modules/@frogpond/ccc-lib/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export {get} from './http'
export * from './cache'
export * from './url'
export * from './html'
15 changes: 10 additions & 5 deletions modules/node_modules/@frogpond/ccc-presence/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {get, ONE_HOUR} from '@frogpond/ccc-lib'
import {get, ONE_HOUR, decode} from '@frogpond/ccc-lib'
import mem from 'mem'
import lodash from 'lodash'
import _jsdom from 'jsdom'
Expand Down Expand Up @@ -45,10 +45,15 @@ export function cleanOrg(org) {
// )

let category = org.categories.join(', ')
let meetings =
(org.regularMeetingLocation || '').trim() +
(org.regularMeetingTime || '').trim()
let description = JSDOM.fragment(org.description).textContent.trim()

let meetings = decode(
(org.regularMeetingTime + ' ' + org.regularMeetingLocation).trim()
)

let description = decode(
JSDOM.fragment(org.description).textContent.trim()
)

let website = (org.website || '').trim()
if (website && !/^https?:\/\//.test(website)) {
website = `http://${website}`
Expand Down
5 changes: 3 additions & 2 deletions modules/node_modules/@frogpond/ccc-reason-calendar/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* eslint-disable camelcase */

import {get} from '@frogpond/ccc-lib'
import {get, fastGetTrimmedText} from '@frogpond/ccc-lib'
import moment from 'moment-timezone'
import dropWhile from 'lodash/dropWhile'
import dropRightWhile from 'lodash/dropRightWhile'
Expand Down Expand Up @@ -113,6 +113,7 @@ function convertReasonEvent(event, now = moment()) {
moment(event.startTime).isBefore(now, 'day') &&
moment(event.endTime).isSameOrAfter(now)

let title = fastGetTrimmedText(event.name || '')
let description = (event.description || '').replace('<br>', '\n')
description = JSDOM.fragment(description).textContent.trim()

Expand All @@ -122,7 +123,7 @@ function convertReasonEvent(event, now = moment()) {
dataSource: 'reason',
startTime: event.startTime,
endTime: event.endTime,
title: event.name || '',
title: title,
description: description,
location: event.location || '',
links: links,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import {get, ONE_DAY} from '@frogpond/ccc-lib'
import {get, ONE_DAY, fastGetTrimmedText, decode} from '@frogpond/ccc-lib'
import mem from 'mem'
import _jsdom from 'jsdom'
import getUrls from 'get-urls'

const {JSDOM} = _jsdom

export function cleanJob(job) {
const title = fastGetTrimmedText(job.title)
const office = fastGetTrimmedText(job.office)
const hoursPerWeek = fastGetTrimmedText(job.hoursPerWeek)

// these all need to retain their newlines
const description = cleanTextBlock(
JSDOM.fragment(job.description).textContent,
const description = decode(
cleanTextBlock(
JSDOM.fragment(job.description).textContent,
)
)
const comments = cleanTextBlock(JSDOM.fragment(job.comments).textContent)
const skills = cleanTextBlock(JSDOM.fragment(job.skills).textContent)
Expand All @@ -34,12 +40,12 @@ export function cleanJob(job) {
contactPhone: contactPhone,
description: description,
goodForIncomingStudents: job.goodForIncomingStudents,
hoursPerWeek: job.hoursPerWeek,
hoursPerWeek: hoursPerWeek,
howToApply: howToApply,
id: job.id,
lastModified: job.lastModified,
links: links,
office: job.office,
office: office,
openPositions: job.openPositions,
skills: skills,
timeline: timeline,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {get, ONE_HOUR} from '@frogpond/ccc-lib'
import {get, ONE_HOUR, innerTextWithSpaces, parseHtml} from '@frogpond/ccc-lib'
import mem from 'mem'
import moment from 'moment-timezone'

Expand All @@ -19,9 +19,17 @@ export async function getStreams({streamClass, sort, dateFrom, dateTo}) {
(resp) => resp.body,
)
const processed = data.results.map((stream) => {
let {starttime} = stream
let {starttime, title, subtitle, performer} = stream

let streamTitle = innerTextWithSpaces(parseHtml(title))
let detail = innerTextWithSpaces(
parseHtml(subtitle || performer || ''),
)

return {
...stream,
title: streamTitle,
subtitle: detail,
starttime: moment
.tz(starttime, 'YYYY-MM-DD HH:mm', 'America/Chicago')
.toISOString(),
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@
"test": "./scripts/smoke-test.sh"
},
"dependencies": {
"@frogpond/titlecase": "^1.0.0",
"dotenv": "10.0.0",
"esm": "3.2.25",
"get-urls": "10.0.1",
"got": "9.6.0",
"html-entities": "2.3.2",
"htmlparser2": "^7.1.2",
"is-absolute-url": "3.0.3",
"jsdom": "16.6.0",
"koa": "2.13.4",
Expand Down
55 changes: 55 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@
minimatch "^3.0.4"
strip-json-comments "^3.1.1"

"@frogpond/titlecase@^1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@frogpond/titlecase/-/titlecase-1.0.0.tgz#993e5371c31b58a839e76fdc531053c55b1a2736"
integrity sha512-C1qKm/J+B+cXo+7+ZHbRnt2iApx/IrxMEXwOxe+ZkeTvSC1nZ2XRWf0xzFuAanpBNDhFcgrxdnR8FMKNHo1scQ==

"@gar/promisify@^1.0.1":
version "1.1.2"
resolved "https://registry.yarnpkg.com/@gar/promisify/-/promisify-1.1.2.tgz#30aa825f11d438671d585bd44e7fd564535fc210"
Expand Down Expand Up @@ -674,18 +679,48 @@ doctrine@^3.0.0:
dependencies:
esutils "^2.0.2"

dom-serializer@^1.0.1:
version "1.3.2"
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-1.3.2.tgz#6206437d32ceefaec7161803230c7a20bc1b4d91"
integrity sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.2.0"
entities "^2.0.0"

domelementtype@^2.0.1, domelementtype@^2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.2.0.tgz#9a0b6c2782ed6a1c7323d42267183df9bd8b1d57"
integrity sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==

domexception@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/domexception/-/domexception-2.0.1.tgz#fb44aefba793e1574b0af6aed2801d057529f304"
integrity sha512-yxJ2mFy/sibVQlu5qHjOkf9J3K6zgmCxgJ94u2EdvDOV09H+32LtRswEcUsmUWN72pVLOEnTSRaIVVzVQgS0dg==
dependencies:
webidl-conversions "^5.0.0"

domhandler@^4.2.0, domhandler@^4.2.2:
version "4.2.2"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.2.2.tgz#e825d721d19a86b8c201a35264e226c678ee755f"
integrity sha512-PzE9aBMsdZO8TK4BnuJwH0QT41wgMbRzuZrHUcpYncEjmQazq8QEaBWgLG7ZyC/DAZKEgglpIA6j4Qn/HmxS3w==
dependencies:
domelementtype "^2.2.0"

domino@^2.1.6:
version "2.1.6"
resolved "https://registry.yarnpkg.com/domino/-/domino-2.1.6.tgz#fe4ace4310526e5e7b9d12c7de01b7f485a57ffe"
integrity sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ==

domutils@^2.8.0:
version "2.8.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.8.0.tgz#4437def5db6e2d1f5d6ee859bd95ca7d02048135"
integrity sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==
dependencies:
dom-serializer "^1.0.1"
domelementtype "^2.2.0"
domhandler "^4.2.0"

dotenv@10.0.0:
version "10.0.0"
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-10.0.0.tgz#3d4227b8fb95f81096cdd2b66653fb2c7085ba81"
Expand Down Expand Up @@ -732,6 +767,16 @@ enquirer@^2.3.5:
dependencies:
ansi-colors "^4.1.1"

entities@^2.0.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==

entities@^3.0.1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/entities/-/entities-3.0.1.tgz#2b887ca62585e96db3903482d336c1006c3001d4"
integrity sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q==

env-paths@^2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-2.2.0.tgz#cdca557dc009152917d6166e2febe1f039685e43"
Expand Down Expand Up @@ -1135,6 +1180,16 @@ html-entities@2.3.2:
resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.3.2.tgz#760b404685cb1d794e4f4b744332e3b00dcfe488"
integrity sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ==

htmlparser2@^7.1.2:
version "7.1.2"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-7.1.2.tgz#587923d38f03bc89e03076e00cba2c7473f37f7c"
integrity sha512-d6cqsbJba2nRdg8WW2okyD4ceonFHn9jLFxhwlNcLhQWcFPdxXeJulgOLjLKtAK9T6ahd+GQNZwG9fjmGW7lyg==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.2.2"
domutils "^2.8.0"
entities "^3.0.1"

http-assert@^1.3.0:
version "1.4.1"
resolved "https://registry.yarnpkg.com/http-assert/-/http-assert-1.4.1.tgz#c5f725d677aa7e873ef736199b89686cceb37878"
Expand Down