From d42b7c3195d1630b73ea82ae7399af2c9a476e59 Mon Sep 17 00:00:00 2001 From: matt-greathouse Date: Thu, 16 Apr 2026 11:47:07 -0400 Subject: [PATCH 1/2] Surface Google SERP parse errors instead of returning empty results --- search_utils.js | 49 +++++++++++++++++ server.js | 110 ++++++++++++++++---------------------- test/search-utils.test.js | 37 +++++++++++++ 3 files changed, 131 insertions(+), 65 deletions(-) create mode 100644 search_utils.js create mode 100644 test/search-utils.test.js diff --git a/search_utils.js b/search_utils.js new file mode 100644 index 0000000..cbc4633 --- /dev/null +++ b/search_utils.js @@ -0,0 +1,49 @@ +'use strict'; /*jslint node:true es9:true*/ + +function truncate_response(response_text, max_length = 300){ + if (typeof response_text != 'string') + return ''; + const trimmed = response_text.trim(); + // We only need enough of the body to identify whether this was HTML, + // an auth error, or some other upstream response shape mismatch. + if (trimmed.length <= max_length) + return trimmed; + return `${trimmed.slice(0, max_length)}...`; +} + +export function clean_google_search_payload(raw_data){ + const data = raw_data && typeof raw_data=='object' ? raw_data : {}; + const organic = Array.isArray(data.organic) ? data.organic : []; + + const organic_clean = organic + .map(entry=>{ + if (!entry || typeof entry!='object') + return null; + const link = typeof entry.link=='string' ? entry.link.trim() : ''; + const title = typeof entry.title=='string' + ? entry.title.trim() : ''; + const description = typeof entry.description=='string' + ? entry.description.trim() : ''; + // Dropping incomplete rows keeps downstream callers from treating + // malformed upstream entries as real search hits. + if (!link || !title) + return null; + return {link, title, description}; + }) + .filter(Boolean); + + return {organic: organic_clean}; +} + +export function parse_google_search_response(response_text, tool_name){ + try { + return clean_google_search_payload(JSON.parse(response_text)); + } catch (e){ + // A short body snippet gives enough evidence to debug auth/HTML/error + // responses without flooding logs or tool output with full pages. + const snippet = truncate_response(response_text); + const details = snippet ? ` Response snippet: ${snippet}` : ''; + throw new Error(`Unexpected non-JSON response from Bright Data ` + +`for ${tool_name}.${details}`, {cause: e}); + } +} diff --git a/server.js b/server.js index dd00e57..35f0fc9 100644 --- a/server.js +++ b/server.js @@ -6,6 +6,7 @@ import axios from 'axios'; import {tools as browser_tools} from './browser_tools.js'; import prompts from './prompts.js'; import {GROUPS} from './tool_groups.js'; +import {parse_google_search_response} from './search_utils.js'; import {createRequire} from 'node:module'; import {remark} from 'remark'; import strip from 'strip-markdown'; @@ -238,15 +239,10 @@ addTool({ }); if (!is_google) return response.data; - try { - const search_data = JSON.parse(response.data); - return JSON.stringify( - clean_google_search_payload(search_data), null, 2); - } catch(e){ - return JSON.stringify({ - organic: [] - }, null, 2); - } + // An empty organic list looks like a legitimate search miss, so we + // fail here when Bright Data returns something other than Google JSON. + return JSON.stringify(parse_google_search_response(response.data, + 'search_engine'), null, 2); }), }); @@ -309,49 +305,55 @@ addTool({ }), execute: tool_fn('search_engine_batch', async({queries}, ctx)=>{ const search_promises = queries.map(({query, engine, cursor, - geo_location})=>{ - const is_google = (engine || 'google') === 'google'; - const url = search_url(engine || 'google', query, cursor, + geo_location})=> { + const normalized_engine = engine || 'google'; + const is_google = normalized_engine === 'google'; + const url = search_url(normalized_engine, query, cursor, geo_location); - return base_request({ - url: 'https://api.brightdata.com/request', - method: 'POST', - data: { - url: is_google ? `${url}&brd_json=1` : url, - zone: unlocker_zone, - format: 'raw', - data_format: is_google ? 'parsed_light' : 'markdown', - }, - headers: api_headers(ctx.clientName, 'search_engine_batch'), - responseType: 'text', - }).then(response=>{ - if (is_google) - { - try { - const search_data = JSON.parse(response.data); - return { - query, - engine: engine || 'google', - result: clean_google_search_payload(search_data), - }; - } catch(e){ + return (async()=>{ + try { + const response = await base_request({ + url: 'https://api.brightdata.com/request', + method: 'POST', + data: { + url: is_google ? `${url}&brd_json=1` : url, + zone: unlocker_zone, + format: 'raw', + data_format: is_google ? 'parsed_light' + : 'markdown', + }, + headers: api_headers(ctx.clientName, + 'search_engine_batch'), + responseType: 'text', + }); + if (is_google) + { return { query, - engine: engine || 'google', - result: clean_google_search_payload(null), + engine: normalized_engine, + result: parse_google_search_response(response.data, + 'search_engine_batch'), }; } + return { + query, + engine: normalized_engine, + result: response.data, + }; + } catch (e){ + // Batch callers still need partial successes, so each item + // carries its own error instead of hiding it in allSettled. + return { + query, + engine: normalized_engine, + error: e instanceof Error ? e.message : String(e), + }; } - return { - query, - engine: engine || 'google', - result: response.data - }; - }); + })(); }); - const results = await Promise.allSettled(search_promises); + const results = await Promise.all(search_promises); return JSON.stringify(results, null, 2); }), }); @@ -1256,28 +1258,6 @@ function tool_fn(name, fn){ }; } -function clean_google_search_payload(raw_data){ - const data = raw_data && typeof raw_data=='object' ? raw_data : {}; - const organic = Array.isArray(data.organic) ? data.organic : []; - - const organic_clean = organic - .map(entry=>{ - if (!entry || typeof entry!='object') - return null; - const link = typeof entry.link=='string' ? entry.link.trim() : ''; - const title = typeof entry.title=='string' - ? entry.title.trim() : ''; - const description = typeof entry.description=='string' - ? entry.description.trim() : ''; - if (!link || !title) - return null; - return {link, title, description}; - }) - .filter(Boolean); - - return {organic: organic_clean}; -} - function search_url(engine, query, cursor, geo_location){ let q = encodeURIComponent(query); let page = cursor ? parseInt(cursor) : 0; diff --git a/test/search-utils.test.js b/test/search-utils.test.js new file mode 100644 index 0000000..6ba79da --- /dev/null +++ b/test/search-utils.test.js @@ -0,0 +1,37 @@ +'use strict'; /*jslint node:true es9:true*/ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import {clean_google_search_payload, parse_google_search_response} + from '../search_utils.js'; + +test('clean_google_search_payload keeps valid organic results', ()=>{ + const payload = clean_google_search_payload({ + organic: [ + { + link: ' https://example.com ', + title: ' Example ', + description: ' Sample ', + }, + { + link: '', + title: 'Missing link', + description: 'Ignored', + }, + ], + }); + + assert.deepEqual(payload, { + organic: [{ + link: 'https://example.com', + title: 'Example', + description: 'Sample', + }], + }); +}); + +test('parse_google_search_response throws on invalid JSON body', ()=>{ + assert.throws( + ()=>parse_google_search_response('blocked', + 'search_engine'), + /Unexpected non-JSON response from Bright Data for search_engine\./); +}); From 0b6017953a630adfc1f8a63668190c3bc3a6aa35 Mon Sep 17 00:00:00 2001 From: matt-greathouse Date: Thu, 16 Apr 2026 11:59:36 -0400 Subject: [PATCH 2/2] fix packaging error --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 4dd4f6d..ccefa39 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ }, "files": [ "server.js", + "search_utils.js", "browser_tools.js", "browser_session.js", "aria_snapshot_filter.js",