Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ import semanticValueVisibility from './semantic-value-visibility/handler.js';
import semanticValueVisibilityGuidance from './semantic-value-visibility/guidance-handler.js';
import drsPromptGeneration from './drs-prompt-generation/handler.js';
import offsiteBrandPresence from './offsite-brand-presence/handler.js';
import offsiteCompetitorAnalysis from './offsite-competitor-analysis/handler.js';
import offsiteCompetitorAnalysisGuidance from './offsite-competitor-analysis/guidance-handler.js';

const HANDLERS = {
accessibility,
Expand Down Expand Up @@ -228,6 +230,8 @@ const HANDLERS = {
'guidance:semantic-value-visibility': semanticValueVisibilityGuidance,
'drs:prompt_generation_base_url': drsPromptGeneration,
'offsite-brand-presence': offsiteBrandPresence,
'offsite-competitor-analysis': offsiteCompetitorAnalysis,
'guidance:offsite-competitor-analysis': offsiteCompetitorAnalysisGuidance,
dummy: (message) => ok(message),
};

Expand Down
197 changes: 197 additions & 0 deletions src/offsite-competitor-analysis/guidance-handler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
/*
* Copyright 2026 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

import { noContent, ok } from '@adobe/spacecat-shared-http-utils';
import DrsClient, { SCRAPE_DATASET_IDS } from '@adobe/spacecat-shared-drs-client';
import { OFFSITE_DOMAINS, REDDIT_COMMENTS_DAYS_BACK } from '../offsite-brand-presence/constants.js';

const LOG_PREFIX = '[OffsiteCompetitorAnalysis:Guidance]';

const URL_CONFIG = Object.freeze({
wikipedia: OFFSITE_DOMAINS['wikipedia.org'],
youtube: OFFSITE_DOMAINS['youtube.com'],
reddit: OFFSITE_DOMAINS['reddit.com'],
});

/**
* Extracts URLs from competitors in the Mystique response, grouped by platform.
*
* @param {Array} competitors - The competitors array from competitorProfile
* @returns {{ wikipedia: string[], youtube: string[], reddit: string[] }}
*/
function extractUrlsByPlatform(competitors) {
const urls = { wikipedia: [], youtube: [], reddit: [] };

for (const competitor of competitors) {
if (competitor?.wikipediaUrl) {
urls.wikipedia.push(competitor.wikipediaUrl);
}
for (const url of competitor?.youtubeUrls || []) {
if (url) urls.youtube.push(url);
}
for (const url of competitor?.redditUrls || []) {
if (url) urls.reddit.push(url);
}
}

return urls;
}

/**
* Adds URLs to the URL store via AuditUrl.create.
*
* @param {string} siteId - The site ID
* @param {object} urlsByPlatform - URLs grouped by platform
* @param {object} dataAccess - Data access layer
* @param {object} log - Logger
* @returns {Promise<object>} Successfully stored URLs grouped by platform
*/
async function addUrlsToUrlStore(siteId, urlsByPlatform, dataAccess, log) {
const { AuditUrl } = dataAccess;
const stored = { wikipedia: [], youtube: [], reddit: [] };

const entries = [];
for (const [platform, urls] of Object.entries(urlsByPlatform)) {
const { auditType } = URL_CONFIG[platform];
for (const url of urls) {
entries.push({ platform, url, auditType });
}
}

log.info(`${LOG_PREFIX} Adding ${entries.length} URLs to URL store`);

const results = await Promise.all(
entries.map(async ({ platform, url, auditType }) => {
try {
await AuditUrl.create({
siteId,
url,
byCustomer: false,
audits: [auditType],
createdBy: 'system',
updatedBy: 'system',
});
return { platform, url };
} catch (error) {
log.warn(`${LOG_PREFIX} Failed to add URL to store: ${url} - ${error.message}`);
return null;
}
}),
);

for (const result of results) {
if (result) {
stored[result.platform].push(result.url);
}
}

const totalStored = Object.values(stored).reduce((sum, urls) => sum + urls.length, 0);
const totalFailed = entries.length - totalStored;
log.info(`${LOG_PREFIX} URL store complete: ${totalStored} created, ${totalFailed} failed`);

return stored;
}

/**
* Triggers DRS scrape jobs for the stored URLs.
*
* @param {object} storedUrls - Successfully stored URLs grouped by platform
* @param {string} siteId - The site ID
* @param {object} context - Context with env and log
* @returns {Promise<Array>} Results of DRS job submissions
*/
async function triggerDrsScraping(storedUrls, siteId, context) {
const { log } = context;
const drsClient = DrsClient.createFrom(context);

if (!drsClient.isConfigured()) {
log.error(`${LOG_PREFIX} DRS not configured, skipping scraping`);
return [];
}

const jobs = [];
for (const [platform, urls] of Object.entries(storedUrls)) {
if (urls.length === 0) {
// eslint-disable-next-line no-continue
continue;
}

const { datasetIds } = URL_CONFIG[platform];
for (const datasetId of datasetIds) {
const params = { datasetId, siteId, urls };
if (datasetId === SCRAPE_DATASET_IDS.REDDIT_COMMENTS) {
params.daysBack = REDDIT_COMMENTS_DAYS_BACK;
}
jobs.push({ platform, datasetId, params });
}
}

log.info(`${LOG_PREFIX} Submitting ${jobs.length} DRS scrape jobs`);

return Promise.all(
jobs.map(async ({ platform, datasetId, params }) => {
try {
const result = await drsClient.submitScrapeJob(params);
log.info(`${LOG_PREFIX} DRS job created for ${platform}/${datasetId}: jobId=${result.job_id}`);
return {
platform, datasetId, status: 'success', jobId: result.job_id,
};
} catch (err) {
log.error(`${LOG_PREFIX} DRS job failed for ${platform}/${datasetId}: ${err.message}`);
return {
platform, datasetId, status: 'error', error: err.message,
};
}
}),
);
}

/**
* Handles the Mystique response for offsite competitor analysis.
* Extracts competitor URLs (Wikipedia, YouTube, Reddit) and sends them
* to the URL store and DRS for scraping.
*
* @param {object} message - SQS message from Mystique
* @param {object} context - Context with dataAccess, log, etc.
* @returns {Promise<object>} HTTP response
*/
export default async function handler(message, context) {
const { log, dataAccess } = context;
const { siteId, data } = message;

log.info(`${LOG_PREFIX} Received guidance for siteId: ${siteId}`);

const competitors = data?.competitorProfile?.competitors;
if (!competitors || competitors.length === 0) {
log.info(`${LOG_PREFIX} No competitors found in response, nothing to process`);
return noContent();
}

log.info(`${LOG_PREFIX} Processing ${competitors.length} competitors`);

const urlsByPlatform = extractUrlsByPlatform(competitors);
const totalUrls = Object.values(urlsByPlatform).reduce((sum, urls) => sum + urls.length, 0);

if (totalUrls === 0) {
log.info(`${LOG_PREFIX} No URLs found in competitor data`);
return noContent();
}

log.info(`${LOG_PREFIX} Extracted ${totalUrls} URLs: ${urlsByPlatform.wikipedia.length} wikipedia, ${urlsByPlatform.youtube.length} youtube, ${urlsByPlatform.reddit.length} reddit`);

const storedUrls = await addUrlsToUrlStore(siteId, urlsByPlatform, dataAccess, log);
const drsResults = await triggerDrsScraping(storedUrls, siteId, context);

log.info(`${LOG_PREFIX} Guidance processing complete for site ${siteId}: ${drsResults.length} DRS jobs triggered`);

return ok();
}
173 changes: 173 additions & 0 deletions src/offsite-competitor-analysis/handler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/*
* Copyright 2026 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

import { llmoConfig } from '@adobe/spacecat-shared-utils';
import { AuditBuilder } from '../common/audit-builder.js';
import { noopUrlResolver } from '../common/index.js';

const LOG_PREFIX = '[OffsiteCompetitorAnalysis]';

/**
* Extracts deduplicated brand aliases from S3 LLMO config.
* Flattens all brands.aliases[].aliases arrays.
*
* @param {object} s3Config - The LLMO config from S3
* @returns {string[]} Deduplicated array of brand aliases
*/
function getBrandAliases(s3Config) {
return [...new Set(
(s3Config?.brands?.aliases || []).flatMap((entry) => entry?.aliases || []),
)];
}

/**
* Extracts deduplicated competitor names from both the brand profile and S3 LLMO config.
* Merges contrasting_brands and similar_brands from the brand profile with
* competitors.competitors from S3.
*
* @param {object} brandProfile - The brand profile from site config
* @param {object} s3Config - The LLMO config from S3
* @returns {string[]} Deduplicated array of competitor names
*/
function getCompetitors(brandProfile, s3Config) {
const competitiveContext = brandProfile?.competitive_context;
const competitorNames = new Set();

for (const b of competitiveContext?.contrasting_brands || []) {
if (b?.name) competitorNames.add(b.name);
}
for (const b of competitiveContext?.similar_brands || []) {
if (b?.name) competitorNames.add(b.name);
}
for (const c of s3Config?.competitors?.competitors || []) {
if (c?.name) competitorNames.add(c.name);
}

return [...competitorNames];
}

/**
* Runs the offsite competitor analysis audit.
* Extracts brand/competitor data from site config and S3 LLMO config.
* The Mystique message is sent in a post-processor so the persisted audit ID is available.
*
* @param {string} finalUrl - The resolved audit URL
* @param {object} context - Execution context
* @param {object} site - Site model
* @returns {Promise<{auditResult: object, fullAuditRef: string}>}
*/
async function offsiteCompetitorAnalysisRunner(finalUrl, context, site) {
const { log, env, s3Client } = context;
const siteId = site.getId();
const baseURL = site.getBaseURL();

log.info(`${LOG_PREFIX} Starting competitors analysis for site ${siteId} (${baseURL})`);

const siteConfig = site.getConfig();
const brandProfile = siteConfig?.getBrandProfile?.();

const companyName = siteConfig?.getLlmoBrand?.() || baseURL;
const companyWebsite = baseURL;
const industry = brandProfile?.competitive_context?.industry || null;

const s3Bucket = env?.S3_IMPORTER_BUCKET_NAME;
let s3Config = null;

if (s3Client && s3Bucket) {
try {
const result = await llmoConfig.readConfig(siteId, s3Client, { s3Bucket });
s3Config = result.config;
log.debug(`${LOG_PREFIX} S3 LLMO config exists: ${result.exists}`);
} catch (err) {
log.warn(`${LOG_PREFIX} Failed to read S3 LLMO config: ${err.message}`);
}
} else {
log.warn(`${LOG_PREFIX} S3 client or bucket not configured, skipping S3 config read`);
}

const aliases = getBrandAliases(s3Config);
const competitors = getCompetitors(brandProfile, s3Config);

log.info(`${LOG_PREFIX} Extracted data: companyName=${companyName}, aliases=${aliases.length}, competitors=${competitors.length}`);

return {
auditResult: {
success: true,
companyName,
companyWebsite,
industry,
aliases,
competitors,
},
fullAuditRef: finalUrl,
};
}

/**
* Post-processor that sends the competitor analysis data to Mystique.
* Runs after the audit is persisted, so the real audit ID is available.
*
* @param {string} auditUrl - The audit URL
* @param {object} auditData - The persisted audit data
* @param {object} context - Context with sqs, env, audit, etc.
* @returns {Promise<object>} The audit data (unchanged)
*/
async function sendMystiqueMessagePostProcessor(auditUrl, auditData, context) {
const {
log, sqs, env, audit,
} = context;
const { siteId, auditResult } = auditData;

if (!auditResult.success) {
log.info(`${LOG_PREFIX} Audit failed, skipping Mystique message`);
return auditData;
}

if (!sqs || !env?.QUEUE_SPACECAT_TO_MYSTIQUE) {
log.warn(`${LOG_PREFIX} SQS or Mystique queue not configured, skipping message`);
return auditData;
}

const {
companyName, companyWebsite, industry, aliases, competitors,
} = auditResult;

const message = {
type: 'guidance:offsite-competitor-analysis',
siteId,
auditId: audit.getId(),
time: new Date().toISOString(),
data: {
companyName,
companyWebsite,
industry,
aliases,
competitors,
},
};

try {
await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, message);
log.info(`${LOG_PREFIX} Sent message to Mystique for site ${siteId}`);
} catch (error) {
log.error(`${LOG_PREFIX} Failed to send Mystique message: ${error.message}`);
throw error;
}

return auditData;
}

export default new AuditBuilder()
.withUrlResolver(noopUrlResolver)
.withRunner(offsiteCompetitorAnalysisRunner)
.withPostProcessors([sendMystiqueMessagePostProcessor])
.build();
Loading
Loading