Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/internal-links/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ export class InternalLinksConfigResolver {
}

isLinkCheckerEnabled() {
return this.handlerConfig.isLinkcheckerEnabled ?? false;
return this.handlerConfig.isLinkCheckerEnabled
?? this.handlerConfig.isLinkcheckerEnabled
?? false;
}

getLinkCheckerProgramId() {
Expand Down
3 changes: 2 additions & 1 deletion src/internal-links/crawl-detection.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ function getSourceItemType(parentTag) {
function getAssetTypeFromUrl(url, pageUrl = 'https://example.com') {
try {
const pathname = new URL(url, pageUrl).pathname.toLowerCase();
if (/\.(svg|png|jpe?g|gif|webp|avif)$/.test(pathname)) return 'image';
if (/\.svg$/.test(pathname)) return 'svg';
if (/\.(png|jpe?g|gif|webp|avif)$/.test(pathname)) return 'image';
/* c8 ignore start - Asset type branches covered by integration tests at extraction level */
if (/\.css$/.test(pathname)) return 'css';
if (/\.js$/.test(pathname)) return 'js';
Expand Down
19 changes: 18 additions & 1 deletion src/internal-links/finalization.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ function normalizeLinkCheckerValidity(validity) {
return String(validity || 'UNKNOWN').trim().toUpperCase();
}

const LINKCHECKER_VALIDITY_ONLY_ASSET_TYPES = new Set([
'image',
'svg',
'css',
'js',
'iframe',
'video',
'audio',
'media',
]);

function requiresExplicitBrokenStatus(itemType) {
return LINKCHECKER_VALIDITY_ONLY_ASSET_TYPES.has(itemType);
}

function disableEventLoopWait(context) {
if (context && 'callbackWaitsForEmptyEventLoop' in context) {
context.callbackWaitsForEmptyEventLoop = false;
Expand Down Expand Up @@ -200,13 +215,15 @@ export function createFinalizeCrawlDetection({
const statusBucket = classifyStatusBucket(httpStatus);
const isExplicitBrokenValidity = validity === 'INVALID';
const hasBrokenStatus = Boolean(statusBucket);
const requireBrokenStatus = requiresExplicitBrokenStatus(itemType);

if (!lc.urlFrom || !lc.urlTo) {
skippedLinkCheckerRows += 1;
return null;
}

if (!hasBrokenStatus && !isExplicitBrokenValidity) {
if ((requireBrokenStatus && !hasBrokenStatus)
|| (!hasBrokenStatus && !isExplicitBrokenValidity)) {
skippedLinkCheckerRows += 1;
return null;
}
Expand Down
10 changes: 6 additions & 4 deletions src/internal-links/linkchecker-splunk.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,19 @@ export function buildLinkCheckerQuery({
// 1. Base index and time range
// 2. Filter by program and environment (automatically logged by AEM)
// 3. Filter for LinkChecker internal link removal events (FT_SITES-39847)
// 4. Parse JSON structure
// 5. Extract fields and limit results
// 4. Parse the top-level event JSON, then extract the embedded JSON payload from msg
// 5. Parse embedded LinkChecker payload, extract fields, and limit results
return [
'search',
'index=dx_aem_engineering',
`earliest=-${lookbackMinutes}m@m`,
'latest=@m',
`aem_program_id="${escapeSplunkString(programId)}"`,
`aem_envId="${escapeSplunkString(environmentId)}"`,
'"linkchecker.removed_internal_link"', // FT_SITES-39847 ensures this field exists
'| spath', // Parse JSON structure
'msg="*linkchecker.removed_internal_link*"',
'| spath', // Parse top-level Skyline event JSON
'| rex field=msg "LinkCheckerTransformer (?<linkchecker_json>\\{.*\\})$"',
'| spath input=linkchecker_json', // Parse embedded LinkChecker payload from msg
'| rename linkchecker.removed_internal_link.urlFrom as urlFrom',
'| rename linkchecker.removed_internal_link.urlTo as urlTo',
'| rename linkchecker.removed_internal_link.validity as validity',
Expand Down
8 changes: 8 additions & 0 deletions test/audits/internal-links/config.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,14 @@ describe('internal-links config resolver', () => {
expect(resolver.getLinkCheckerEnvironmentId()).to.equal('env-456');
});

it('supports the camelCase site-config flag for enabling LinkChecker', () => {
const resolver = new InternalLinksConfigResolver(createSite({
isLinkCheckerEnabled: true,
}), {});

expect(resolver.isLinkCheckerEnabled()).to.equal(true);
});

it('prefers deliveryConfig program and environment IDs over handler config', () => {
const resolver = new InternalLinksConfigResolver(createSite({
aemProgramId: 'handler-program',
Expand Down
43 changes: 43 additions & 0 deletions test/audits/internal-links/crawl-detection.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1589,6 +1589,49 @@ describe('Crawl Detection Module', () => {
expect(result.results.some((entry) => entry.urlTo === 'https://example.com/content/dam/apcolourcatalogue/asset/hero.webp' && entry.itemType === 'image')).to.equal(true);
});

it('should classify SVG assets referenced from CSS url() as svg', async () => {
const scrapeResultPaths = new Map([
['https://example.com/page1', 'scrapes/page1.json'],
]);

const htmlWithCssSvgAsset = `
<html>
<head>
<style>
.icon {
background-image: url('/webassets/icons/search.svg');
}
</style>
</head>
<body></body>
</html>
`;

getObjectFromKeyStub.resolves({
scrapeResult: { rawBody: htmlWithCssSvgAsset },
finalUrl: 'https://example.com/page1',
});

isLinkInaccessibleStub
.withArgs('https://example.com/webassets/icons/search.svg')
.resolves(createValidationResponse(true, 404, '4xx', { contentType: 'image/svg+xml' }));

const result = await detectBrokenLinksFromCrawlBatch({
scrapeResultPaths,
batchStartIndex: 0,
batchSize: 1,
initialBrokenUrls: [],
initialWorkingUrls: [],
}, mockContext);

expect(result.results).to.have.lengthOf(1);
expect(result.results[0]).to.deep.include({
urlTo: 'https://example.com/webassets/icons/search.svg',
itemType: 'svg',
anchorText: '[style url()]',
});
});

it('should ignore CSS URLs with invalid escaped code points', async () => {
const scrapeResultPaths = new Map([
['https://example.com/page1', 'scrapes/page1.json'],
Expand Down
52 changes: 52 additions & 0 deletions test/audits/internal-links/finalization.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,58 @@ describe('internal-links finalization', () => {
expect(linkCheckerLinks[0].statusBucket).to.equal('not_found_404');
});

it('should drop LinkChecker SVG assets without an explicit broken HTTP status', async () => {
const updateAuditResult = sinon.stub().resolves({});
const finalize = buildFinalize({ updateAuditResult });

await finalize(buildContext([
{
urlFrom: 'https://example.com/page',
urlTo: 'https://example.com/webassets/icons/search.svg',
itemType: 'svg',
validity: 'INVALID',
},
]), { skipCrawlDetection: false });

expect(mockIsLinkInaccessible).to.not.have.been.called;
const reportedLinks = updateAuditResult.firstCall.args[2];
const linkCheckerLinks = reportedLinks.filter((l) => l.detectionSource === 'linkchecker');
expect(linkCheckerLinks).to.have.lengthOf(0);
});

it('should keep LinkChecker SVG assets when they have an explicit broken HTTP status', async () => {
mockIsLinkInaccessible.resolves({
isBroken: false,
inconclusive: true,
httpStatus: null,
statusBucket: null,
});

const updateAuditResult = sinon.stub().resolves({});
const finalize = buildFinalize({
updateAuditResult,
createConfigResolver: () => ({
getIncludedStatusBuckets: () => ['not_found_404', 'masked_by_linkchecker'],
getIncludedItemTypes: () => ['link', 'svg'],
}),
});

await finalize(buildContext([
{
urlFrom: 'https://example.com/page',
urlTo: 'https://example.com/webassets/icons/search.svg',
itemType: 'svg',
httpStatus: 404,
validity: 'INVALID',
},
]), { skipCrawlDetection: false });

const reportedLinks = updateAuditResult.firstCall.args[2];
const linkCheckerLinks = reportedLinks.filter((l) => l.detectionSource === 'linkchecker');
expect(linkCheckerLinks).to.have.lengthOf(1);
expect(linkCheckerLinks[0].statusBucket).to.equal('not_found_404');
});

it('should skip re-validation when insufficient time remains', async () => {
const updateAuditResult = sinon.stub().resolves({});
const getTimeoutStatus = sinon.stub().returns({
Expand Down
4 changes: 3 additions & 1 deletion test/audits/internal-links/linkchecker-splunk.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ describe('linkchecker-splunk', () => {
expect(query).to.include('latest=@m');
expect(query).to.include('aem_program_id="program123"');
expect(query).to.include('aem_envId="env456"');
expect(query).to.include('"linkchecker.removed_internal_link"');
expect(query).to.include('msg="*linkchecker.removed_internal_link*"');
expect(query).to.include('| spath');
expect(query).to.include('| rex field=msg "LinkCheckerTransformer (?<linkchecker_json>\\{.*\\})$"');
expect(query).to.include('| spath input=linkchecker_json');
expect(query).to.include('| rename linkchecker.removed_internal_link.urlFrom as urlFrom');
expect(query).to.include('| where isnotnull(urlFrom) AND isnotnull(urlTo)');
expect(query).to.include('| head 10000');
Expand Down
Loading