diff --git a/package-lock.json b/package-lock.json index a921cb3..4610fda 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@brightdata/mcp", - "version": "2.9.4", + "version": "2.9.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@brightdata/mcp", - "version": "2.9.4", + "version": "2.9.5", "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "1.21.2", diff --git a/package.json b/package.json index 4dd4f6d..1e96cfc 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@brightdata/mcp", - "version": "2.9.4", + "version": "2.9.5", "description": "An MCP interface into the Bright Data toolset", "type": "module", "main": "./server.js", @@ -38,6 +38,7 @@ }, "files": [ "server.js", + "search_utils.js", "browser_tools.js", "browser_session.js", "aria_snapshot_filter.js", diff --git a/search_utils.js b/search_utils.js new file mode 100644 index 0000000..7891dc0 --- /dev/null +++ b/search_utils.js @@ -0,0 +1,41 @@ +'use strict'; /*jslint node:true es9:true*/ + +function truncate_response(response_text, max_length = 300){ + if (typeof response_text != 'string') + return ''; + const trimmed = response_text.trim(); + if (trimmed.length <= max_length) + return trimmed; + return `${trimmed.slice(0, max_length)}...`; +} + +export function clean_google_search_payload(raw_data){ + const data = raw_data && typeof raw_data=='object' ? raw_data : {}; + const organic = Array.isArray(data.organic) ? data.organic : []; + const organic_clean = organic + .map(entry=>{ + if (!entry || typeof entry!='object') + return null; + const link = typeof entry.link=='string' ? entry.link.trim() : ''; + const title = typeof entry.title=='string' + ? entry.title.trim() : ''; + const description = typeof entry.description=='string' + ? entry.description.trim() : ''; + if (!link || !title) + return null; + return {link, title, description}; + }) + .filter(Boolean); + return {organic: organic_clean}; +} + +export function parse_google_search_response(response_text, tool_name){ + try { + return clean_google_search_payload(JSON.parse(response_text)); + } catch(e){ + const snippet = truncate_response(response_text); + const details = snippet ? ` Response snippet: ${snippet}` : ''; + throw new Error(`Unexpected non-JSON response from Bright Data` + +` for ${tool_name}.${details}`, {cause: e}); + } +} diff --git a/server.js b/server.js index dd00e57..ff9e086 100644 --- a/server.js +++ b/server.js @@ -6,6 +6,7 @@ import axios from 'axios'; import {tools as browser_tools} from './browser_tools.js'; import prompts from './prompts.js'; import {GROUPS} from './tool_groups.js'; +import {parse_google_search_response} from './search_utils.js'; import {createRequire} from 'node:module'; import {remark} from 'remark'; import strip from 'strip-markdown'; @@ -198,7 +199,7 @@ const addTool = (tool) => { addTool({ name: 'search_engine', description: 'Scrape search results from Google, Bing or Yandex. Returns ' - +'SERP results in JSON or Markdown (URL, title, description), Ideal for' + +'SERP results in JSON or Markdown (URL, title, description),Ideal for' +'gathering current information, news, and detailed search results.', annotations: { title: 'Search Engine', @@ -238,15 +239,8 @@ addTool({ }); if (!is_google) return response.data; - try { - const search_data = JSON.parse(response.data); - return JSON.stringify( - clean_google_search_payload(search_data), null, 2); - } catch(e){ - return JSON.stringify({ - organic: [] - }, null, 2); - } + return JSON.stringify(parse_google_search_response(response.data, + 'search_engine'), null, 2); }), }); @@ -310,48 +304,51 @@ addTool({ execute: tool_fn('search_engine_batch', async({queries}, ctx)=>{ const search_promises = queries.map(({query, engine, cursor, geo_location})=>{ - const is_google = (engine || 'google') === 'google'; - const url = search_url(engine || 'google', query, cursor, + const normalized_engine = engine || 'google'; + const is_google = normalized_engine === 'google'; + const url = search_url(normalized_engine, query, cursor, geo_location); - - return base_request({ - url: 'https://api.brightdata.com/request', - method: 'POST', - data: { - url: is_google ? `${url}&brd_json=1` : url, - zone: unlocker_zone, - format: 'raw', - data_format: is_google ? 'parsed_light' : 'markdown', - }, - headers: api_headers(ctx.clientName, 'search_engine_batch'), - responseType: 'text', - }).then(response=>{ - if (is_google) - { - try { - const search_data = JSON.parse(response.data); - return { - query, - engine: engine || 'google', - result: clean_google_search_payload(search_data), - }; - } catch(e){ + return (async()=>{ + try { + const response = await base_request({ + url: 'https://api.brightdata.com/request', + method: 'POST', + data: { + url: is_google ? `${url}&brd_json=1` : url, + zone: unlocker_zone, + format: 'raw', + data_format: is_google ? 'parsed_light' + : 'markdown', + }, + headers: api_headers(ctx.clientName, + 'search_engine_batch'), + responseType: 'text', + }); + if (is_google) + { return { query, - engine: engine || 'google', - result: clean_google_search_payload(null), + engine: normalized_engine, + result: parse_google_search_response(response.data, + 'search_engine_batch'), }; } + return { + query, + engine: normalized_engine, + result: response.data, + }; + } catch(e){ + return { + query, + engine: normalized_engine, + error: e instanceof Error ? e.message : String(e), + }; } - return { - query, - engine: engine || 'google', - result: response.data - }; - }); + })(); }); - const results = await Promise.allSettled(search_promises); + const results = await Promise.all(search_promises); return JSON.stringify(results, null, 2); }), }); @@ -1256,28 +1253,6 @@ function tool_fn(name, fn){ }; } -function clean_google_search_payload(raw_data){ - const data = raw_data && typeof raw_data=='object' ? raw_data : {}; - const organic = Array.isArray(data.organic) ? data.organic : []; - - const organic_clean = organic - .map(entry=>{ - if (!entry || typeof entry!='object') - return null; - const link = typeof entry.link=='string' ? entry.link.trim() : ''; - const title = typeof entry.title=='string' - ? entry.title.trim() : ''; - const description = typeof entry.description=='string' - ? entry.description.trim() : ''; - if (!link || !title) - return null; - return {link, title, description}; - }) - .filter(Boolean); - - return {organic: organic_clean}; -} - function search_url(engine, query, cursor, geo_location){ let q = encodeURIComponent(query); let page = cursor ? parseInt(cursor) : 0; diff --git a/test/search-utils.test.js b/test/search-utils.test.js new file mode 100644 index 0000000..6ba79da --- /dev/null +++ b/test/search-utils.test.js @@ -0,0 +1,37 @@ +'use strict'; /*jslint node:true es9:true*/ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import {clean_google_search_payload, parse_google_search_response} + from '../search_utils.js'; + +test('clean_google_search_payload keeps valid organic results', ()=>{ + const payload = clean_google_search_payload({ + organic: [ + { + link: ' https://example.com ', + title: ' Example ', + description: ' Sample ', + }, + { + link: '', + title: 'Missing link', + description: 'Ignored', + }, + ], + }); + + assert.deepEqual(payload, { + organic: [{ + link: 'https://example.com', + title: 'Example', + description: 'Sample', + }], + }); +}); + +test('parse_google_search_response throws on invalid JSON body', ()=>{ + assert.throws( + ()=>parse_google_search_response('blocked', + 'search_engine'), + /Unexpected non-JSON response from Bright Data for search_engine\./); +});