adobe · ssilare-adobe · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/src/prerender/handler.js b/src/prerender/handler.js
@@ -239,6 +239,21 @@ async function getRecentlyProcessedPathnames(context, siteId) {
   }
 }
 
+/**
+ * Returns true when the URL's pathname is NOT in the set of recently processed pathnames.
+ * URLs that cannot be parsed are treated as not recent (included by default).
+ * @param {string} url
+ * @param {Set<string>} recentPathnames
+ * @returns {boolean}
+ */
+function isNotRecentUrl(url, recentPathnames) {
+  try {
+    return !recentPathnames.has(new URL(url).pathname);
+  } catch {
+    return true;
+  }
+}
+
 function normalizePathname(url) {
   try {
     const { pathname } = new URL(url);
@@ -728,21 +743,6 @@ export async function importTopPages(context) {
   };
 }
 
-/**
- * Returns true when the URL's pathname is NOT in the set of recently processed pathnames.
- * URLs that cannot be parsed are treated as not recent (included by default).
- * @param {string} url
- * @param {Set<string>} recentPathnames
- * @returns {boolean}
- */
-function isNotRecentUrl(url, recentPathnames) {
-  try {
-    return !recentPathnames.has(new URL(url).pathname);
-  } catch {
-    return true;
-  }
-}
-
 /**
  * Step 2: Submit URLs for scraping OR skip if in ai-only mode
  * @param {Object} context - Audit context with site and dataAccess
@@ -793,65 +793,80 @@ export async function submitForScraping(context) {
   }
 
   const topPagesUrls = await getTopOrganicUrlsFromSeo(context);
-  // getTopAgenticUrls internally handles errors and returns [] on failure
-  const agenticUrls = await getTopAgenticUrls(site, context);
-
   const preferredBase = getPreferredBaseUrl(site, context);
   const rebasedTopPagesUrls = topPagesUrls.map((url) => rebaseUrl(url, preferredBase, log));
   const rebasedIncludedURLs = ((await site?.getConfig?.()?.getIncludedURLs?.(AUDIT_TYPE)) || [])
     .map((url) => rebaseUrl(url, preferredBase, log));
 
-  // Daily batching: filter URLs recently processed within the rolling recent window
-  const recentPathnames = await getRecentlyProcessedPathnames(context, siteId);
-
-  const filteredOrganicUrls = rebasedTopPagesUrls
-    .filter((url) => isNotRecentUrl(url, recentPathnames));
-  const filteredIncludedURLs = rebasedIncludedURLs
-    .filter((url) => isNotRecentUrl(url, recentPathnames));
-  const filteredAgenticUrls = agenticUrls.filter((url) => isNotRecentUrl(url, recentPathnames));
-
-  const hasRecentOrganic = filteredOrganicUrls.length !== topPagesUrls.length;
-  const isFirstRunOfCycle = !hasRecentOrganic;
-
-  // Build a single ordered queue across all URL sources and slice the next daily batch
-  // after removing anything processed within the recent window.
-  const orderedCandidateUrls = [
-    ...filteredOrganicUrls,
-    ...filteredIncludedURLs,
-    ...filteredAgenticUrls,
-  ];
-  const batchedUrls = orderedCandidateUrls.slice(0, DAILY_BATCH_SIZE);
-
-  const organicUrlSet = new Set(filteredOrganicUrls);
-  const includedUrlSet = new Set(filteredIncludedURLs);
-  const batchedOrganicUrls = batchedUrls.filter((url) => organicUrlSet.has(url));
-  const batchedIncludedURLs = batchedUrls.filter((url) => includedUrlSet.has(url));
-  const batchedAgenticUrls = batchedUrls.filter(
-    (url) => !organicUrlSet.has(url) && !includedUrlSet.has(url),
-  );
-
-  // Merge URLs ensuring uniqueness while handling www vs non-www differences
-  // Also filters out non-HTML URLs (PDFs, images, etc.) in a single pass
-  const { urls: finalUrls, filteredCount } = mergeAndGetUniqueHtmlUrls(batchedUrls);
+  // When triggered from Slack, skip agentic sources and daily batching
+  const isSlackTriggered = !!(auditContext?.slackContext?.channelId);
+
+  let finalUrls;
+  let filteredCount;
+  let agenticUrlsCount = 0;
+  let currentAgentic = 0;
+  let currentOrganic;
+  let currentIncludedUrls;
+  let isFirstRunOfCycle;
+  let agenticNewThisCycle = 0;
+
+  if (isSlackTriggered) {
+    ({ urls: finalUrls, filteredCount } = mergeAndGetUniqueHtmlUrls([
+      ...rebasedTopPagesUrls,
+      ...rebasedIncludedURLs,
+    ]));
+    currentOrganic = rebasedTopPagesUrls.length;
+    currentIncludedUrls = rebasedIncludedURLs.length;
+    isFirstRunOfCycle = true;
+  } else {
+    // getTopAgenticUrls internally handles errors and returns [] on failure
+    const agenticUrls = await getTopAgenticUrls(site, context);
+    agenticUrlsCount = agenticUrls.length;
+
+    // Daily batching: filter URLs recently processed within the rolling recent window
+    const recentPathnames = await getRecentlyProcessedPathnames(context, siteId);
+
+    const filteredOrganicUrls = rebasedTopPagesUrls
+      .filter((url) => isNotRecentUrl(url, recentPathnames));
+    const filteredIncludedURLs = rebasedIncludedURLs
+      .filter((url) => isNotRecentUrl(url, recentPathnames));
+    const filteredAgenticUrls = agenticUrls.filter((url) => isNotRecentUrl(url, recentPathnames));
+
+    const hasRecentOrganic = filteredOrganicUrls.length !== topPagesUrls.length;
+    isFirstRunOfCycle = !hasRecentOrganic;
+    agenticNewThisCycle = filteredAgenticUrls.length;
+
+    const orderedCandidateUrls = [
+      ...filteredOrganicUrls,
+      ...filteredIncludedURLs,
+      ...filteredAgenticUrls,
+    ];
+    const batchedUrls = orderedCandidateUrls.slice(0, DAILY_BATCH_SIZE);
+
+    const organicUrlSet = new Set(filteredOrganicUrls);
+    const includedUrlSet = new Set(filteredIncludedURLs);
+    currentOrganic = batchedUrls.filter((url) => organicUrlSet.has(url)).length;
+    currentIncludedUrls = batchedUrls.filter((url) => includedUrlSet.has(url)).length;
+    currentAgentic = batchedUrls.filter(
+      (url) => !organicUrlSet.has(url) && !includedUrlSet.has(url),
+    ).length;
 
-  const currentAgentic = batchedAgenticUrls.length;
-  const currentOrganic = batchedOrganicUrls.length;
-  const currentIncludedUrls = batchedIncludedURLs.length;
+    ({ urls: finalUrls, filteredCount } = mergeAndGetUniqueHtmlUrls(batchedUrls));
+  }
 
-  log.info(`${LOG_PREFIX} 
-    prerender_submit_scraping_metrics:
+  log.info(`${LOG_PREFIX} prerender_submit_scraping_metrics:
     submittedUrls=${finalUrls.length},
-    agenticUrls=${agenticUrls.length},
+    agenticUrls=${agenticUrlsCount},
     topPagesUrls=${topPagesUrls.length},
     includedURLs=${rebasedIncludedURLs.length},
     filteredOutUrls=${filteredCount},
     currentAgentic=${currentAgentic},
     currentOrganic=${currentOrganic},
     currentIncludedUrls=${currentIncludedUrls},
     isFirstRunOfCycle=${isFirstRunOfCycle},
-    agenticNewThisCycle=${filteredAgenticUrls.length},
+    agenticNewThisCycle=${agenticNewThisCycle},
     baseUrl=${site.getBaseURL()},
-    siteId=${siteId},`);
+    siteId=${siteId}`);
 
   if (finalUrls.length === 0) {
     // Fallback to base URL if no URLs found
@@ -862,7 +877,7 @@ export async function submitForScraping(context) {
 
   return {
     urls: finalUrls.map((url) => ({ url })),
-    siteId: site.getId(),
+    siteId,
     processingType: AUDIT_TYPE,
     maxScrapeAge: 0,
     options: {
@@ -1422,7 +1437,7 @@ export async function getScrapeJobStats(
  */
 export async function processContentAndGenerateOpportunities(context) {
   const {
-    site, audit, log, scrapeResultPaths, data, dataAccess,
+    site, audit, log, scrapeResultPaths, data, dataAccess, auditContext,
   } = context;
 
   // Check for AI-only mode - skip processing step (step 1 already triggered Mystique)
@@ -1434,6 +1449,7 @@ export async function processContentAndGenerateOpportunities(context) {
 
   const siteId = site.getId();
   const startTime = process.hrtime();
+  const isSlackTriggered = !!(auditContext?.slackContext?.channelId);
 
   // Check if this is a paid LLMO customer early so we can use it in all logs
   const isPaid = await isPaidLLMOCustomer(context);
@@ -1451,11 +1467,13 @@ export async function processContentAndGenerateOpportunities(context) {
       log.info(`${LOG_PREFIX} Found ${urlsToCheck.length} URLs from scrape results`);
     } else {
       /* c8 ignore start */
-      // Fetch agentic URLs only for URL list fallback
-      try {
-        agenticUrls = await getTopAgenticUrls(site, context);
-      } catch (e) {
-        log.warn(`${LOG_PREFIX} Failed to fetch agentic URLs for fallback: ${e.message}. baseUrl=${site.getBaseURL()}`);
+      // Fetch agentic URLs for URL list fallback (skipped for Slack-triggered runs)
+      if (!isSlackTriggered) {
+        try {
+          agenticUrls = await getTopAgenticUrls(site, context);
+        } catch (e) {
+          log.warn(`${LOG_PREFIX} Failed to fetch agentic URLs for fallback: ${e.message}. baseUrl=${site.getBaseURL()}`);
+        }
       }
 
       // Load top organic pages cache for fallback merging
@@ -1503,7 +1521,6 @@ export async function processContentAndGenerateOpportunities(context) {
 
     log.info(`${LOG_PREFIX} Found ${urlsNeedingPrerender.length}/${successfulComparisons.length} URLs needing prerender from total ${urlsToCheck.length} URLs scraped. isPaidLLMOCustomer=${isPaid}`);
 
-    const { auditContext } = context;
     const { scrapeJobId } = auditContext || {};
     // getScrapeJobStats combines 403s from COMPLETE-status URLs (already in comparisonResults)
     // and FAILED-status URLs (absent from comparisonResults, fetched from ScrapeUrl table).
@@ -1653,7 +1670,6 @@ export async function processContentAndGenerateOpportunities(context) {
     };
 
     // Upload status.json on error so UI can show audit status via S3 fallback
-    const { auditContext } = context;
     await uploadStatusSummaryToS3(site.getBaseURL(), {
       siteId,
       auditId: audit.getId(),

diff --git a/test/audits/prerender/handler.test.js b/test/audits/prerender/handler.test.js
@@ -1028,6 +1028,106 @@ describe('Prerender Audit', () => {
 
       });
 
+      it('should include organic URLs even when all are in the recency window when triggered from Slack', async () => {
+        const athenaStub = sandbox.stub().resolves([]);
+        const mockHandler = await esmock('../../../src/prerender/handler.js', {
+          '../../../src/utils/agentic-urls.js': {
+            getTopAgenticUrlsFromAthena: athenaStub,
+          },
+        });
+
+        const context = {
+          site: {
+            getId: () => 'site-1',
+            getBaseURL: () => 'https://example.com',
+            getConfig: () => ({ getIncludedURLs: () => [] }),
+          },
+          auditContext: { slackContext: { channelId: 'C123', threadTs: '1.0' } },
+          dataAccess: {
+            SiteTopPage: {
+              allBySiteIdAndSourceAndGeo: sandbox.stub().resolves([
+                { getUrl: () => 'https://example.com/organic-page-1' },
+                { getUrl: () => 'https://example.com/organic-page-2' },
+              ]),
+            },
+          },
+          log: { info: sandbox.stub(), warn: sandbox.stub(), debug: sandbox.stub() },
+          env: {},
+        };
+
+        const result = await mockHandler.submitForScraping(context);
+
+        // Both URLs must be present even though they would be "recent" in a scheduled run
+        expect(result.urls).to.deep.equal([
+          { url: 'https://example.com/organic-page-1' },
+          { url: 'https://example.com/organic-page-2' },
+        ]);
+      });
+
+      it('should not fetch agentic URLs when triggered from Slack', async () => {
+        const athenaStub = sandbox.stub().resolves(['https://example.com/agentic-1']);
+        const mockHandler = await esmock('../../../src/prerender/handler.js', {
+          '../../../src/utils/agentic-urls.js': {
+            getTopAgenticUrlsFromAthena: athenaStub,
+          },
+        });
+
+        const context = {
+          site: {
+            getId: () => 'site-1',
+            getBaseURL: () => 'https://example.com',
+            getConfig: () => ({ getIncludedURLs: () => [] }),
+          },
+          auditContext: { slackContext: { channelId: 'C123', threadTs: '1.0' } },
+          dataAccess: {
+            SiteTopPage: {
+              allBySiteIdAndSourceAndGeo: sandbox.stub().resolves([
+                { getUrl: () => 'https://example.com/organic-page-1' },
+                { getUrl: () => 'https://example.com/organic-page-2' },
+              ]),
+            },
+          },
+          log: { info: sandbox.stub(), warn: sandbox.stub(), debug: sandbox.stub() },
+          env: {},
+        };
+
+        const result = await mockHandler.submitForScraping(context);
+
+        expect(athenaStub).to.not.have.been.called;
+        expect(result.urls).to.deep.equal([
+          { url: 'https://example.com/organic-page-1' },
+          { url: 'https://example.com/organic-page-2' },
+        ]);
+      });
+
+      it('should still fetch agentic URLs for scheduled (non-Slack) runs', async () => {
+        const athenaStub = sandbox.stub().resolves(['https://example.com/agentic-1']);
+        const mockHandler = await esmock('../../../src/prerender/handler.js', {
+          '../../../src/utils/agentic-urls.js': {
+            getTopAgenticUrlsFromAthena: athenaStub,
+          },
+        });
+
+        const context = {
+          site: {
+            getId: () => 'site-1',
+            getBaseURL: () => 'https://example.com',
+            getConfig: () => ({ getIncludedURLs: () => [] }),
+          },
+          dataAccess: {
+            SiteTopPage: {
+              allBySiteIdAndSourceAndGeo: sandbox.stub().resolves([]),
+            },
+            PageCitability: { allByIndexKeys: sandbox.stub().resolves([]) },
+          },
+          log: { info: sandbox.stub(), warn: sandbox.stub(), debug: sandbox.stub() },
+          env: {},
+        };
+
+        await mockHandler.submitForScraping(context);
+
+        expect(athenaStub).to.have.been.called;
+      });
 
     });
 
@@ -1237,6 +1337,51 @@ describe('Prerender Audit', () => {
         expect(context.log.info).to.have.been.calledWith('Prerender - No URLs found for comparison. baseUrl=https://example.com, siteId=test-site-id');
       });
 
+      it('should not fetch agentic URLs in fallback path when triggered from Slack', async () => {
+        const athenaStub = sandbox.stub().resolves(['https://example.com/agentic-1']);
+        const mockHandler = await esmock('../../../src/prerender/handler.js', {
+          '../../../src/utils/agentic-urls.js': {
+            getTopAgenticUrlsFromAthena: athenaStub,
+            getPreferredBaseUrl: () => 'https://example.com',
+          },
+        });
+
+        const context = {
+          site: {
+            getId: () => 'test-site-id',
+            getBaseURL: () => 'https://example.com',
+            getConfig: () => ({ getIncludedURLs: () => [] }),
+          },
+          audit: { getId: () => 'audit-id' },
+          dataAccess: {
+            SiteTopPage: {
+              allBySiteIdAndSourceAndGeo: sandbox.stub().resolves([
+                { getUrl: () => 'https://example.com/organic-1', getTraffic: () => 100 },
+              ]),
+            },
+            Opportunity: { allBySiteIdAndStatus: sandbox.stub().resolves([]) },
+            LatestAudit: { updateByKeys: sandbox.stub().resolves() },
+          },
+          log: {
+            info: sandbox.stub(),
+            debug: sandbox.stub(),
+            warn: sandbox.stub(),
+            error: sandbox.stub(),
+          },
+          scrapeResultPaths: new Map(), // No scrape results → triggers fallback path
+          s3Client: { send: sandbox.stub().rejects(new Error('No S3 data')) },
+          env: { S3_SCRAPER_BUCKET_NAME: 'test-bucket' },
+          auditContext: {
+            scrapeJobId: 'test-job-id',
+            slackContext: { channelId: 'C123', threadTs: '1.0' },
+          },
+        };
+
+        await mockHandler.processContentAndGenerateOpportunities(context);
+
+        expect(athenaStub).to.not.have.been.called;
+      });
+
       it('should trigger opportunity processing path when prerender is detected', async () => {
         // This test covers line 341 by ensuring the full opportunity processing flow executes
         const mockOpportunity = {