From d42b7c3195d1630b73ea82ae7399af2c9a476e59 Mon Sep 17 00:00:00 2001 From: matt-greathouse Date: Thu, 16 Apr 2026 11:47:07 -0400 Subject: [PATCH 1/4] Surface Google SERP parse errors instead of returning empty results --- search_utils.js | 49 +++++++++++++++++ server.js | 110 ++++++++++++++++---------------------- test/search-utils.test.js | 37 +++++++++++++ 3 files changed, 131 insertions(+), 65 deletions(-) create mode 100644 search_utils.js create mode 100644 test/search-utils.test.js diff --git a/search_utils.js b/search_utils.js new file mode 100644 index 0000000..cbc4633 --- /dev/null +++ b/search_utils.js @@ -0,0 +1,49 @@ +'use strict'; /*jslint node:true es9:true*/ + +function truncate_response(response_text, max_length = 300){ + if (typeof response_text != 'string') + return ''; + const trimmed = response_text.trim(); + // We only need enough of the body to identify whether this was HTML, + // an auth error, or some other upstream response shape mismatch. + if (trimmed.length <= max_length) + return trimmed; + return `${trimmed.slice(0, max_length)}...`; +} + +export function clean_google_search_payload(raw_data){ + const data = raw_data && typeof raw_data=='object' ? raw_data : {}; + const organic = Array.isArray(data.organic) ? data.organic : []; + + const organic_clean = organic + .map(entry=>{ + if (!entry || typeof entry!='object') + return null; + const link = typeof entry.link=='string' ? entry.link.trim() : ''; + const title = typeof entry.title=='string' + ? entry.title.trim() : ''; + const description = typeof entry.description=='string' + ? entry.description.trim() : ''; + // Dropping incomplete rows keeps downstream callers from treating + // malformed upstream entries as real search hits. + if (!link || !title) + return null; + return {link, title, description}; + }) + .filter(Boolean); + + return {organic: organic_clean}; +} + +export function parse_google_search_response(response_text, tool_name){ + try { + return clean_google_search_payload(JSON.parse(response_text)); + } catch (e){ + // A short body snippet gives enough evidence to debug auth/HTML/error + // responses without flooding logs or tool output with full pages. + const snippet = truncate_response(response_text); + const details = snippet ? ` Response snippet: ${snippet}` : ''; + throw new Error(`Unexpected non-JSON response from Bright Data ` + +`for ${tool_name}.${details}`, {cause: e}); + } +} diff --git a/server.js b/server.js index dd00e57..35f0fc9 100644 --- a/server.js +++ b/server.js @@ -6,6 +6,7 @@ import axios from 'axios'; import {tools as browser_tools} from './browser_tools.js'; import prompts from './prompts.js'; import {GROUPS} from './tool_groups.js'; +import {parse_google_search_response} from './search_utils.js'; import {createRequire} from 'node:module'; import {remark} from 'remark'; import strip from 'strip-markdown'; @@ -238,15 +239,10 @@ addTool({ }); if (!is_google) return response.data; - try { - const search_data = JSON.parse(response.data); - return JSON.stringify( - clean_google_search_payload(search_data), null, 2); - } catch(e){ - return JSON.stringify({ - organic: [] - }, null, 2); - } + // An empty organic list looks like a legitimate search miss, so we + // fail here when Bright Data returns something other than Google JSON. + return JSON.stringify(parse_google_search_response(response.data, + 'search_engine'), null, 2); }), }); @@ -309,49 +305,55 @@ addTool({ }), execute: tool_fn('search_engine_batch', async({queries}, ctx)=>{ const search_promises = queries.map(({query, engine, cursor, - geo_location})=>{ - const is_google = (engine || 'google') === 'google'; - const url = search_url(engine || 'google', query, cursor, + geo_location})=> { + const normalized_engine = engine || 'google'; + const is_google = normalized_engine === 'google'; + const url = search_url(normalized_engine, query, cursor, geo_location); - return base_request({ - url: 'https://api.brightdata.com/request', - method: 'POST', - data: { - url: is_google ? `${url}&brd_json=1` : url, - zone: unlocker_zone, - format: 'raw', - data_format: is_google ? 'parsed_light' : 'markdown', - }, - headers: api_headers(ctx.clientName, 'search_engine_batch'), - responseType: 'text', - }).then(response=>{ - if (is_google) - { - try { - const search_data = JSON.parse(response.data); - return { - query, - engine: engine || 'google', - result: clean_google_search_payload(search_data), - }; - } catch(e){ + return (async()=>{ + try { + const response = await base_request({ + url: 'https://api.brightdata.com/request', + method: 'POST', + data: { + url: is_google ? `${url}&brd_json=1` : url, + zone: unlocker_zone, + format: 'raw', + data_format: is_google ? 'parsed_light' + : 'markdown', + }, + headers: api_headers(ctx.clientName, + 'search_engine_batch'), + responseType: 'text', + }); + if (is_google) + { return { query, - engine: engine || 'google', - result: clean_google_search_payload(null), + engine: normalized_engine, + result: parse_google_search_response(response.data, + 'search_engine_batch'), }; } + return { + query, + engine: normalized_engine, + result: response.data, + }; + } catch (e){ + // Batch callers still need partial successes, so each item + // carries its own error instead of hiding it in allSettled. + return { + query, + engine: normalized_engine, + error: e instanceof Error ? e.message : String(e), + }; } - return { - query, - engine: engine || 'google', - result: response.data - }; - }); + })(); }); - const results = await Promise.allSettled(search_promises); + const results = await Promise.all(search_promises); return JSON.stringify(results, null, 2); }), }); @@ -1256,28 +1258,6 @@ function tool_fn(name, fn){ }; } -function clean_google_search_payload(raw_data){ - const data = raw_data && typeof raw_data=='object' ? raw_data : {}; - const organic = Array.isArray(data.organic) ? data.organic : []; - - const organic_clean = organic - .map(entry=>{ - if (!entry || typeof entry!='object') - return null; - const link = typeof entry.link=='string' ? entry.link.trim() : ''; - const title = typeof entry.title=='string' - ? entry.title.trim() : ''; - const description = typeof entry.description=='string' - ? entry.description.trim() : ''; - if (!link || !title) - return null; - return {link, title, description}; - }) - .filter(Boolean); - - return {organic: organic_clean}; -} - function search_url(engine, query, cursor, geo_location){ let q = encodeURIComponent(query); let page = cursor ? parseInt(cursor) : 0; diff --git a/test/search-utils.test.js b/test/search-utils.test.js new file mode 100644 index 0000000..6ba79da --- /dev/null +++ b/test/search-utils.test.js @@ -0,0 +1,37 @@ +'use strict'; /*jslint node:true es9:true*/ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import {clean_google_search_payload, parse_google_search_response} + from '../search_utils.js'; + +test('clean_google_search_payload keeps valid organic results', ()=>{ + const payload = clean_google_search_payload({ + organic: [ + { + link: ' https://example.com ', + title: ' Example ', + description: ' Sample ', + }, + { + link: '', + title: 'Missing link', + description: 'Ignored', + }, + ], + }); + + assert.deepEqual(payload, { + organic: [{ + link: 'https://example.com', + title: 'Example', + description: 'Sample', + }], + }); +}); + +test('parse_google_search_response throws on invalid JSON body', ()=>{ + assert.throws( + ()=>parse_google_search_response('blocked', + 'search_engine'), + /Unexpected non-JSON response from Bright Data for search_engine\./); +}); From 0b6017953a630adfc1f8a63668190c3bc3a6aa35 Mon Sep 17 00:00:00 2001 From: matt-greathouse Date: Thu, 16 Apr 2026 11:59:36 -0400 Subject: [PATCH 2/4] fix packaging error --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 4dd4f6d..ccefa39 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ }, "files": [ "server.js", + "search_utils.js", "browser_tools.js", "browser_session.js", "aria_snapshot_filter.js", From 8361b67baad87d3e65b1534128848d10f18e5007 Mon Sep 17 00:00:00 2001 From: meirk-brd Date: Mon, 20 Apr 2026 08:00:03 +0300 Subject: [PATCH 3/4] style: align PR #133 with coding conventions --- search_utils.js | 14 +++----------- server.js | 11 +++-------- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/search_utils.js b/search_utils.js index cbc4633..7891dc0 100644 --- a/search_utils.js +++ b/search_utils.js @@ -4,8 +4,6 @@ function truncate_response(response_text, max_length = 300){ if (typeof response_text != 'string') return ''; const trimmed = response_text.trim(); - // We only need enough of the body to identify whether this was HTML, - // an auth error, or some other upstream response shape mismatch. if (trimmed.length <= max_length) return trimmed; return `${trimmed.slice(0, max_length)}...`; @@ -14,7 +12,6 @@ function truncate_response(response_text, max_length = 300){ export function clean_google_search_payload(raw_data){ const data = raw_data && typeof raw_data=='object' ? raw_data : {}; const organic = Array.isArray(data.organic) ? data.organic : []; - const organic_clean = organic .map(entry=>{ if (!entry || typeof entry!='object') @@ -24,26 +21,21 @@ export function clean_google_search_payload(raw_data){ ? entry.title.trim() : ''; const description = typeof entry.description=='string' ? entry.description.trim() : ''; - // Dropping incomplete rows keeps downstream callers from treating - // malformed upstream entries as real search hits. if (!link || !title) return null; return {link, title, description}; }) .filter(Boolean); - return {organic: organic_clean}; } export function parse_google_search_response(response_text, tool_name){ try { return clean_google_search_payload(JSON.parse(response_text)); - } catch (e){ - // A short body snippet gives enough evidence to debug auth/HTML/error - // responses without flooding logs or tool output with full pages. + } catch(e){ const snippet = truncate_response(response_text); const details = snippet ? ` Response snippet: ${snippet}` : ''; - throw new Error(`Unexpected non-JSON response from Bright Data ` - +`for ${tool_name}.${details}`, {cause: e}); + throw new Error(`Unexpected non-JSON response from Bright Data` + +` for ${tool_name}.${details}`, {cause: e}); } } diff --git a/server.js b/server.js index 35f0fc9..ff9e086 100644 --- a/server.js +++ b/server.js @@ -199,7 +199,7 @@ const addTool = (tool) => { addTool({ name: 'search_engine', description: 'Scrape search results from Google, Bing or Yandex. Returns ' - +'SERP results in JSON or Markdown (URL, title, description), Ideal for' + +'SERP results in JSON or Markdown (URL, title, description),Ideal for' +'gathering current information, news, and detailed search results.', annotations: { title: 'Search Engine', @@ -239,8 +239,6 @@ addTool({ }); if (!is_google) return response.data; - // An empty organic list looks like a legitimate search miss, so we - // fail here when Bright Data returns something other than Google JSON. return JSON.stringify(parse_google_search_response(response.data, 'search_engine'), null, 2); }), @@ -305,12 +303,11 @@ addTool({ }), execute: tool_fn('search_engine_batch', async({queries}, ctx)=>{ const search_promises = queries.map(({query, engine, cursor, - geo_location})=> { + geo_location})=>{ const normalized_engine = engine || 'google'; const is_google = normalized_engine === 'google'; const url = search_url(normalized_engine, query, cursor, geo_location); - return (async()=>{ try { const response = await base_request({ @@ -341,9 +338,7 @@ addTool({ engine: normalized_engine, result: response.data, }; - } catch (e){ - // Batch callers still need partial successes, so each item - // carries its own error instead of hiding it in allSettled. + } catch(e){ return { query, engine: normalized_engine, From 43eff160c73dba66116e8387a871b8ab1dc1667a Mon Sep 17 00:00:00 2001 From: meirk-brd Date: Mon, 20 Apr 2026 10:04:15 +0300 Subject: [PATCH 4/4] chore: bump version --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index a921cb3..4610fda 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@brightdata/mcp", - "version": "2.9.4", + "version": "2.9.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@brightdata/mcp", - "version": "2.9.4", + "version": "2.9.5", "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "1.21.2", diff --git a/package.json b/package.json index ccefa39..1e96cfc 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@brightdata/mcp", - "version": "2.9.4", + "version": "2.9.5", "description": "An MCP interface into the Bright Data toolset", "type": "module", "main": "./server.js",