diff --git a/src/controllers/sites.js b/src/controllers/sites.js index b50226206..d297539a9 100755 --- a/src/controllers/sites.js +++ b/src/controllers/sites.js @@ -45,6 +45,7 @@ import { wwwUrlResolver, resolveWwwUrl, getIsSummitPlgEnabled, CUSTOMER_VISIBLE_TIERS, } from '../support/utils.js'; import AccessControlUtil from '../support/access-control-util.js'; +import { auditTargetURLsPatchGuard } from '../support/audit-target-urls-validation.js'; import { triggerBrandProfileAgent } from '../support/brand-profile-trigger.js'; /** @@ -639,6 +640,16 @@ function SitesController(ctx, log, env) { ? Config.toDynamoItem(siteConfig) || {} : {}; const merged = { ...existingConfig, ...requestBody.config }; + const auditTargetURLsResult = auditTargetURLsPatchGuard( + merged, + site.getBaseURL(), + requestBody.config, + badRequest, + ); + if (auditTargetURLsResult?.error) return auditTargetURLsResult.error; + if (auditTargetURLsResult?.normalized !== undefined) { + merged.auditTargetURLs = auditTargetURLsResult.normalized; + } site.setConfig(merged); updates = true; } diff --git a/src/support/audit-target-urls-validation.js b/src/support/audit-target-urls-validation.js new file mode 100644 index 000000000..2f8683f11 --- /dev/null +++ b/src/support/audit-target-urls-validation.js @@ -0,0 +1,153 @@ +/* + * Copyright 2026 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * API validation for audit target URLs: parseable absolute URL, HTTPS only, and hostname + * must match the site base URL hostname when that can be derived. + */ + +/** Upper bound for manual audit target URLs per site (align with ASO UI). */ +export const MAX_MANUAL_AUDIT_TARGET_URLS = 500; + +/** + * Compare hostnames as equal if they match after lowercasing and stripping one leading `www.`. + * + * @param {string} hostname + * @returns {string} + */ +export function normalizeHostnameForAuditTargetMatch(hostname) { + const lower = hostname.toLowerCase(); + return lower.startsWith('www.') ? lower.slice(4) : lower; +} + +/** + * @param {string} baseURL + * @returns {string|null} + */ +export function siteHostnameFromBaseURL(baseURL) { + if (!baseURL || typeof baseURL !== 'string') { + return null; + } + try { + return new URL(baseURL).hostname; + } catch { + return null; + } +} + +/** + * @param {string} trimmed + * @param {string|null} siteHostname + * @returns {{ ok: true } | { ok: false, error: string }} + */ +export function validateAuditTargetUrlString(trimmed, siteHostname) { + let url; + try { + url = new URL(trimmed); + } catch { + return { ok: false, error: 'Invalid URL' }; + } + if (url.protocol !== 'https:') { + return { ok: false, error: 'URL must use HTTPS' }; + } + if ( + siteHostname + && normalizeHostnameForAuditTargetMatch(url.hostname) + !== normalizeHostnameForAuditTargetMatch(siteHostname) + ) { + return { + ok: false, + error: `URL hostname must match the site domain (${siteHostname}, with or without www.)`, + }; + } + return { ok: true }; +} + +/** + * Validates config.auditTargetURLs after merge (manual list only). + * + * @param {unknown} auditTargetURLs + * @param {string} siteBaseURL + * @returns {{ ok: true, normalized?: object } | { ok: false, error: string }} + */ +export function validateAuditTargetURLsConfig(auditTargetURLs, siteBaseURL) { + if (auditTargetURLs === undefined) { + return { ok: true }; + } + if (auditTargetURLs === null || typeof auditTargetURLs !== 'object' || Array.isArray(auditTargetURLs)) { + return { ok: false, error: 'config.auditTargetURLs must be an object when provided' }; + } + + const siteHostname = siteHostnameFromBaseURL(siteBaseURL); + const { manual, ...rest } = auditTargetURLs; + + if (manual === undefined) { + return { ok: true }; + } + if (!Array.isArray(manual)) { + return { ok: false, error: 'config.auditTargetURLs.manual must be an array' }; + } + if (manual.length > MAX_MANUAL_AUDIT_TARGET_URLS) { + return { + ok: false, + error: `config.auditTargetURLs.manual cannot contain more than ${MAX_MANUAL_AUDIT_TARGET_URLS} URLs`, + }; + } + + const normalizedManual = []; + for (let i = 0; i < manual.length; i += 1) { + const entry = manual[i]; + if (!entry || typeof entry !== 'object' || typeof entry.url !== 'string') { + return { + ok: false, + error: `config.auditTargetURLs.manual[${i}] must be an object with a string "url" property`, + }; + } + const trimmed = entry.url.trim(); + const result = validateAuditTargetUrlString(trimmed, siteHostname); + if (!result.ok) { + return { ok: false, error: `Invalid audit target URL at index ${i}: ${result.error}` }; + } + normalizedManual.push({ url: trimmed }); + } + + return { + ok: true, + normalized: { ...rest, manual: normalizedManual }, + }; +} + +/** + * When `configPatch` includes `auditTargetURLs`, validates `merged.auditTargetURLs` (HTTPS + + * hostname vs site) and writes normalized `manual` entries back onto `merged`. + * + * We must consult `configPatch`, not only `merged`: after `{ ...existingConfig, ...patch }`, + * `merged.auditTargetURLs` is still present if it existed on the site and the client only + * changed other keys (e.g. `slack`). In that case we must not re-validate or reject legacy + * data until the client explicitly sends `auditTargetURLs` again. + * + * @param {Record} merged + * @param {string} siteBaseURL + * @param {Record} configPatch + * @param {(message: string) => unknown} badRequestFn + * @returns {null | { error: unknown } | { normalized?: object }} + * `null` if the patch did not include `auditTargetURLs`; `{ error }` to return from the + * controller; `{ normalized }` or `{}` on success (caller assigns `normalized` to merged). + */ +export function auditTargetURLsPatchGuard(merged, siteBaseURL, configPatch, badRequestFn) { + if (!Object.prototype.hasOwnProperty.call(configPatch, 'auditTargetURLs')) { + return null; + } + const v = validateAuditTargetURLsConfig(merged.auditTargetURLs, siteBaseURL); + if (!v.ok) return { error: badRequestFn(v.error) }; + return v.normalized !== undefined ? { normalized: v.normalized } : {}; +} diff --git a/test/controllers/sites.test.js b/test/controllers/sites.test.js index 54ea30223..5ad176e4c 100644 --- a/test/controllers/sites.test.js +++ b/test/controllers/sites.test.js @@ -10,8 +10,6 @@ * governing permissions and limitations under the License. */ -/* eslint-env mocha */ - import { Organization, Site } from '@adobe/spacecat-shared-data-access'; import { Config } from '@adobe/spacecat-shared-data-access/src/models/site/config.js'; import OrganizationSchema from '@adobe/spacecat-shared-data-access/src/models/organization/organization.schema.js'; @@ -3437,6 +3435,79 @@ describe('Sites Controller', () => { expect(mergedConfig.handlers).to.deep.equal({ 'meta-tags': { excludedURLs: [] } }); }); + describe('auditTargetURLs validation', () => { + it('returns bad request when manual URL hostname does not match site base URL', async () => { + const site = sites[0]; + site.getConfig = sandbox.stub().returns(Config({ slack: { channel: '#x' } })); + site.setConfig = sandbox.stub(); + site.save = sandbox.stub(); + + const response = await sitesController.updateSite({ + params: { siteId: SITE_IDS[0] }, + data: { + config: { + auditTargetURLs: { + manual: [{ url: 'https://example.com/path1' }], + }, + }, + }, + ...defaultAuthAttributes, + }); + + expect(response.status).to.equal(400); + const err = await response.json(); + expect(err.message).to.include('Invalid audit target URL at index 0:'); + expect(err.message).to.include('site domain (site1.com, with or without www.)'); + expect(site.setConfig).to.have.not.been.called; + }); + + it('accepts manual URLs on the site hostname', async () => { + const site = sites[0]; + site.getConfig = sandbox.stub().returns(Config({})); + site.setConfig = sandbox.stub(); + site.save = sandbox.stub().resolves(site); + + const response = await sitesController.updateSite({ + params: { siteId: SITE_IDS[0] }, + data: { + config: { + auditTargetURLs: { + manual: [{ url: 'https://site1.com/path1' }, { url: 'https://site1.com/path2' }], + }, + }, + }, + ...defaultAuthAttributes, + }); + + expect(response.status).to.equal(200); + const merged = site.setConfig.firstCall.args[0]; + expect(merged.auditTargetURLs.manual).to.deep.equal([ + { url: 'https://site1.com/path1' }, + { url: 'https://site1.com/path2' }, + ]); + }); + + it('does not validate auditTargetURLs when key is omitted from config patch', async () => { + const site = sites[0]; + const existingConfig = Config({ + auditTargetURLs: { manual: [{ url: 'https://wrong.example/' }] }, + }); + site.getConfig = sandbox.stub().returns(existingConfig); + site.setConfig = sandbox.stub(); + site.save = sandbox.stub().resolves(site); + + const response = await sitesController.updateSite({ + params: { siteId: SITE_IDS[0] }, + data: { config: { slack: { channel: '#only' } } }, + ...defaultAuthAttributes, + }); + + expect(response.status).to.equal(200); + const merged = site.setConfig.firstCall.args[0]; + expect(merged.auditTargetURLs.manual[0].url).to.equal('https://wrong.example/'); + }); + }); + it('allows removing a config key by explicitly setting it to null', async () => { const site = sites[0]; const existingConfig = Config({ diff --git a/test/support/audit-target-urls-validation.test.js b/test/support/audit-target-urls-validation.test.js new file mode 100644 index 000000000..39b17101b --- /dev/null +++ b/test/support/audit-target-urls-validation.test.js @@ -0,0 +1,205 @@ +/* + * Copyright 2026 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { expect } from 'chai'; + +import sinon from 'sinon'; + +import { + auditTargetURLsPatchGuard, + MAX_MANUAL_AUDIT_TARGET_URLS, + normalizeHostnameForAuditTargetMatch, + siteHostnameFromBaseURL, + validateAuditTargetUrlString, + validateAuditTargetURLsConfig, +} from '../../src/support/audit-target-urls-validation.js'; + +describe('audit-target-urls-validation', () => { + describe('normalizeHostnameForAuditTargetMatch', () => { + it('lowercases and strips one leading www.', () => { + expect(normalizeHostnameForAuditTargetMatch('WWW.Example.COM')).to.equal('example.com'); + }); + + it('does not strip www2 or nested www', () => { + expect(normalizeHostnameForAuditTargetMatch('www2.example.com')).to.equal('www2.example.com'); + expect(normalizeHostnameForAuditTargetMatch('www.www.example.com')).to.equal('www.example.com'); + }); + }); + + describe('siteHostnameFromBaseURL', () => { + it('returns hostname for valid base URL', () => { + expect(siteHostnameFromBaseURL('https://main--foo--bar.aem.page')).to.equal('main--foo--bar.aem.page'); + }); + + it('returns null for invalid or empty input', () => { + expect(siteHostnameFromBaseURL('')).to.equal(null); + expect(siteHostnameFromBaseURL(null)).to.equal(null); + expect(siteHostnameFromBaseURL(undefined)).to.equal(null); + expect(siteHostnameFromBaseURL(123)).to.equal(null); + expect(siteHostnameFromBaseURL('not-a-url')).to.equal(null); + }); + }); + + describe('validateAuditTargetUrlString', () => { + it('accepts HTTPS URL matching site hostname', () => { + expect(validateAuditTargetUrlString('https://site1.com/path', 'site1.com')).to.deep.equal({ ok: true }); + }); + + it('accepts apex URL when site hostname is www variant', () => { + expect(validateAuditTargetUrlString('https://site1.com/path', 'www.site1.com')).to.deep.equal({ ok: true }); + }); + + it('accepts www URL when site hostname is apex', () => { + expect(validateAuditTargetUrlString('https://www.site1.com/path', 'site1.com')).to.deep.equal({ ok: true }); + }); + + it('skips hostname check when site hostname is null', () => { + expect(validateAuditTargetUrlString('https://any.example/foo', null).ok).to.equal(true); + }); + + it('rejects non-HTTPS', () => { + const r = validateAuditTargetUrlString('http://site1.com/', 'site1.com'); + expect(r.ok).to.equal(false); + expect(r.error).to.equal('URL must use HTTPS'); + }); + + it('rejects hostname mismatch', () => { + const r = validateAuditTargetUrlString('https://other.com/', 'site1.com'); + expect(r.ok).to.equal(false); + expect(r.error).to.equal( + 'URL hostname must match the site domain (site1.com, with or without www.)', + ); + }); + + it('rejects invalid URL', () => { + const r = validateAuditTargetUrlString(':::bad', 'site1.com'); + expect(r.ok).to.equal(false); + expect(r.error).to.equal('Invalid URL'); + }); + }); + + describe('validateAuditTargetURLsConfig', () => { + it('returns ok for undefined', () => { + expect(validateAuditTargetURLsConfig(undefined, 'https://site1.com')).to.deep.equal({ ok: true }); + }); + + it('rejects null and non-objects', () => { + expect(validateAuditTargetURLsConfig(null, 'https://x.com').ok).to.equal(false); + expect(validateAuditTargetURLsConfig([], 'https://x.com').ok).to.equal(false); + }); + + it('returns ok for empty object without manual', () => { + expect(validateAuditTargetURLsConfig({}, 'https://site1.com')).to.deep.equal({ ok: true }); + }); + + it('rejects non-array manual', () => { + const r = validateAuditTargetURLsConfig({ manual: {} }, 'https://site1.com'); + expect(r.ok).to.equal(false); + expect(r.error).to.include('manual must be an array'); + }); + + it('rejects manual list longer than MAX_MANUAL_AUDIT_TARGET_URLS', () => { + const manual = Array.from({ length: MAX_MANUAL_AUDIT_TARGET_URLS + 1 }, (_, i) => ({ + url: `https://site1.com/u${i}`, + })); + const r = validateAuditTargetURLsConfig({ manual }, 'https://site1.com'); + expect(r.ok).to.equal(false); + expect(r.error).to.equal( + `config.auditTargetURLs.manual cannot contain more than ${MAX_MANUAL_AUDIT_TARGET_URLS} URLs`, + ); + }); + + it('normalizes trimmed manual URLs', () => { + const r = validateAuditTargetURLsConfig( + { manual: [{ url: ' https://site1.com/x ' }] }, + 'https://site1.com', + ); + expect(r.ok).to.equal(true); + expect(r.normalized).to.deep.equal({ manual: [{ url: 'https://site1.com/x' }] }); + }); + + it('accepts apex manual URL when site base URL uses www hostname', () => { + const r = validateAuditTargetURLsConfig( + { manual: [{ url: 'https://site1.com/y' }] }, + 'https://www.site1.com', + ); + expect(r.ok).to.equal(true); + }); + + it('rejects bad entry shape', () => { + const r = validateAuditTargetURLsConfig({ manual: [{ url: 1 }] }, 'https://site1.com'); + expect(r.ok).to.equal(false); + expect(r.error).to.include('must be an object with a string'); + }); + + it('returns index-scoped error for valid shape but failing URL rules', () => { + const r = validateAuditTargetURLsConfig( + { manual: [{ url: 'https://evil.com/' }] }, + 'https://site1.com', + ); + expect(r.ok).to.equal(false); + expect(r.error).to.match(/index 0/); + }); + + it('preserves unknown keys alongside manual', () => { + const r = validateAuditTargetURLsConfig( + { manual: [{ url: 'https://site1.com/' }], future: true }, + 'https://site1.com', + ); + expect(r.ok).to.equal(true); + expect(r.normalized.future).to.equal(true); + }); + }); + + describe('auditTargetURLsPatchGuard', () => { + const badRequest = sinon.stub().callsFake((msg) => ({ status: 400, message: msg })); + + afterEach(() => badRequest.resetHistory()); + + it('returns null when patch does not set auditTargetURLs', () => { + const merged = { slack: {} }; + expect(auditTargetURLsPatchGuard(merged, 'https://site1.com', { slack: {} }, badRequest)).to.equal(null); + expect(badRequest.called).to.equal(false); + }); + + it('returns error payload and does not mutate merged when invalid', () => { + const merged = { auditTargetURLs: { manual: [{ url: 'https://evil.com/' }] } }; + const patch = { auditTargetURLs: merged.auditTargetURLs }; + const r = auditTargetURLsPatchGuard(merged, 'https://site1.com', patch, badRequest); + expect(r).to.have.property('error'); + expect(r.error).to.include({ status: 400 }); + expect(r.error.message).to.be.a('string').and.to.match(/site domain/); + expect(badRequest.callCount).to.equal(1); + expect(merged.auditTargetURLs.manual[0].url).to.equal('https://evil.com/'); + }); + + it('returns empty object when patch is valid but nothing to normalize', () => { + const merged = { auditTargetURLs: {} }; + const r = auditTargetURLsPatchGuard( + merged, + 'https://site1.com', + { auditTargetURLs: {} }, + badRequest, + ); + expect(r).to.deep.equal({}); + expect(badRequest.called).to.equal(false); + }); + + it('returns normalized auditTargetURLs for caller to apply', () => { + const merged = { auditTargetURLs: { manual: [{ url: ' https://site1.com/x ' }] } }; + const patch = { auditTargetURLs: merged.auditTargetURLs }; + const r = auditTargetURLsPatchGuard(merged, 'https://site1.com', patch, badRequest); + expect(r).to.deep.equal({ normalized: { manual: [{ url: 'https://site1.com/x' }] } }); + expect(merged.auditTargetURLs.manual[0].url).to.equal(' https://site1.com/x '); + }); + }); +});