Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@brightdata/mcp",
"version": "2.9.4",
"version": "2.9.5",
"description": "An MCP interface into the Bright Data toolset",
"type": "module",
"main": "./server.js",
Expand Down Expand Up @@ -38,6 +38,7 @@
},
"files": [
"server.js",
"search_utils.js",
"browser_tools.js",
"browser_session.js",
"aria_snapshot_filter.js",
Expand Down
41 changes: 41 additions & 0 deletions search_utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
'use strict'; /*jslint node:true es9:true*/

function truncate_response(response_text, max_length = 300){
if (typeof response_text != 'string')
return '';
const trimmed = response_text.trim();
if (trimmed.length <= max_length)
return trimmed;
return `${trimmed.slice(0, max_length)}...`;
}

export function clean_google_search_payload(raw_data){
const data = raw_data && typeof raw_data=='object' ? raw_data : {};
const organic = Array.isArray(data.organic) ? data.organic : [];
const organic_clean = organic
.map(entry=>{
if (!entry || typeof entry!='object')
return null;
const link = typeof entry.link=='string' ? entry.link.trim() : '';
const title = typeof entry.title=='string'
? entry.title.trim() : '';
const description = typeof entry.description=='string'
? entry.description.trim() : '';
if (!link || !title)
return null;
return {link, title, description};
})
.filter(Boolean);
return {organic: organic_clean};
}

export function parse_google_search_response(response_text, tool_name){
try {
return clean_google_search_payload(JSON.parse(response_text));
} catch(e){
const snippet = truncate_response(response_text);
const details = snippet ? ` Response snippet: ${snippet}` : '';
throw new Error(`Unexpected non-JSON response from Bright Data`
+` for ${tool_name}.${details}`, {cause: e});
}
}
107 changes: 41 additions & 66 deletions server.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import axios from 'axios';
import {tools as browser_tools} from './browser_tools.js';
import prompts from './prompts.js';
import {GROUPS} from './tool_groups.js';
import {parse_google_search_response} from './search_utils.js';
import {createRequire} from 'node:module';
import {remark} from 'remark';
import strip from 'strip-markdown';
Expand Down Expand Up @@ -198,7 +199,7 @@ const addTool = (tool) => {
addTool({
name: 'search_engine',
description: 'Scrape search results from Google, Bing or Yandex. Returns '
+'SERP results in JSON or Markdown (URL, title, description), Ideal for'
+'SERP results in JSON or Markdown (URL, title, description),Ideal for'
+'gathering current information, news, and detailed search results.',
annotations: {
title: 'Search Engine',
Expand Down Expand Up @@ -238,15 +239,8 @@ addTool({
});
if (!is_google)
return response.data;
try {
const search_data = JSON.parse(response.data);
return JSON.stringify(
clean_google_search_payload(search_data), null, 2);
} catch(e){
return JSON.stringify({
organic: []
}, null, 2);
}
return JSON.stringify(parse_google_search_response(response.data,
'search_engine'), null, 2);
}),
});

Expand Down Expand Up @@ -310,48 +304,51 @@ addTool({
execute: tool_fn('search_engine_batch', async({queries}, ctx)=>{
const search_promises = queries.map(({query, engine, cursor,
geo_location})=>{
const is_google = (engine || 'google') === 'google';
const url = search_url(engine || 'google', query, cursor,
const normalized_engine = engine || 'google';
const is_google = normalized_engine === 'google';
const url = search_url(normalized_engine, query, cursor,
geo_location);

return base_request({
url: 'https://api.brightdata.com/request',
method: 'POST',
data: {
url: is_google ? `${url}&brd_json=1` : url,
zone: unlocker_zone,
format: 'raw',
data_format: is_google ? 'parsed_light' : 'markdown',
},
headers: api_headers(ctx.clientName, 'search_engine_batch'),
responseType: 'text',
}).then(response=>{
if (is_google)
{
try {
const search_data = JSON.parse(response.data);
return {
query,
engine: engine || 'google',
result: clean_google_search_payload(search_data),
};
} catch(e){
return (async()=>{
try {
const response = await base_request({
url: 'https://api.brightdata.com/request',
method: 'POST',
data: {
url: is_google ? `${url}&brd_json=1` : url,
zone: unlocker_zone,
format: 'raw',
data_format: is_google ? 'parsed_light'
: 'markdown',
},
headers: api_headers(ctx.clientName,
'search_engine_batch'),
responseType: 'text',
});
if (is_google)
{
return {
query,
engine: engine || 'google',
result: clean_google_search_payload(null),
engine: normalized_engine,
result: parse_google_search_response(response.data,
'search_engine_batch'),
};
}
return {
query,
engine: normalized_engine,
result: response.data,
};
} catch(e){
return {
query,
engine: normalized_engine,
error: e instanceof Error ? e.message : String(e),
};
}
return {
query,
engine: engine || 'google',
result: response.data
};
});
})();
});

const results = await Promise.allSettled(search_promises);
const results = await Promise.all(search_promises);
return JSON.stringify(results, null, 2);
}),
});
Expand Down Expand Up @@ -1256,28 +1253,6 @@ function tool_fn(name, fn){
};
}

function clean_google_search_payload(raw_data){
const data = raw_data && typeof raw_data=='object' ? raw_data : {};
const organic = Array.isArray(data.organic) ? data.organic : [];

const organic_clean = organic
.map(entry=>{
if (!entry || typeof entry!='object')
return null;
const link = typeof entry.link=='string' ? entry.link.trim() : '';
const title = typeof entry.title=='string'
? entry.title.trim() : '';
const description = typeof entry.description=='string'
? entry.description.trim() : '';
if (!link || !title)
return null;
return {link, title, description};
})
.filter(Boolean);

return {organic: organic_clean};
}

function search_url(engine, query, cursor, geo_location){
let q = encodeURIComponent(query);
let page = cursor ? parseInt(cursor) : 0;
Expand Down
37 changes: 37 additions & 0 deletions test/search-utils.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
'use strict'; /*jslint node:true es9:true*/
import test from 'node:test';
import assert from 'node:assert/strict';
import {clean_google_search_payload, parse_google_search_response}
from '../search_utils.js';

test('clean_google_search_payload keeps valid organic results', ()=>{
const payload = clean_google_search_payload({
organic: [
{
link: ' https://example.com ',
title: ' Example ',
description: ' Sample ',
},
{
link: '',
title: 'Missing link',
description: 'Ignored',
},
],
});

assert.deepEqual(payload, {
organic: [{
link: 'https://example.com',
title: 'Example',
description: 'Sample',
}],
});
});

test('parse_google_search_response throws on invalid JSON body', ()=>{
assert.throws(
()=>parse_google_search_response('<html>blocked</html>',
'search_engine'),
/Unexpected non-JSON response from Bright Data for search_engine\./);
});
Loading