From 1d8f9a1d6345c899ab01328750a2c5f1b704ff8f Mon Sep 17 00:00:00 2001 From: RinZ27 <222222878+RinZ27@users.noreply.github.com> Date: Tue, 3 Mar 2026 22:44:16 +0700 Subject: [PATCH] Security: fix potential SQL injection and internal info leakage --- backend/modules/vector_db/singlestore.py | 15 ++++++++++++--- backend/server/routers/data_source.py | 5 +++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/backend/modules/vector_db/singlestore.py b/backend/modules/vector_db/singlestore.py index 2b95c3b4..05e21e96 100644 --- a/backend/modules/vector_db/singlestore.py +++ b/backend/modules/vector_db/singlestore.py @@ -235,9 +235,11 @@ def list_data_point_vectors( try: curr = conn.cursor() - # Remove all data point vectors with the same data_source_fqn + # Using parameterized query to prevent SQL injection + query = f"SELECT id, content, vector, metadata FROM {collection_name} WHERE JSON_EXTRACT_JSON(metadata, %s) LIKE %s LIMIT %s" curr.execute( - f"SELECT * FROM {collection_name} WHERE JSON_EXTRACT_JSON(metadata, '{DATA_POINT_FQN_METADATA_KEY}') LIKE '%{data_source_fqn}%' LIMIT {MAX_SCROLL_LIMIT}" + query, + (DATA_POINT_FQN_METADATA_KEY, f"%{data_source_fqn}%", MAX_SCROLL_LIMIT), ) for record in curr: @@ -286,8 +288,15 @@ def delete_data_point_vectors( vectors_to_be_deleted_count = len(data_point_vectors) curr = conn.cursor() + # Using parameterized query for multiple IDs + placeholders = ", ".join(["%s"] * len(data_point_vectors)) + query = f"DELETE FROM {collection_name} WHERE id in ({placeholders})" curr.execute( - f"DELETE FROM {collection_name} WHERE id in ({', '.join(data_point_vector.data_point_vector_id for data_point_vector in data_point_vectors)})" + query, + tuple( + data_point_vector.data_point_vector_id + for data_point_vector in data_point_vectors + ), ) logger.debug( f"[SingleStore] Deleted {vectors_to_be_deleted_count} data point vectors" diff --git a/backend/server/routers/data_source.py b/backend/server/routers/data_source.py index 233bb5d7..68cf0189 100644 --- a/backend/server/routers/data_source.py +++ b/backend/server/routers/data_source.py @@ -45,10 +45,11 @@ async def add_data_source(data_source: CreateDataSource): tfy_client = get_tfy_client() # TODO: Currently, if a TFY data directory does not exist, an exception is thrown. # We need to raise a 404 error instead of failing generically. - data_dir = tfy_client.get_data_directory_by_fqn(data_source.uri) + tfy_client.get_data_directory_by_fqn(data_source.uri) except Exception as e: + logger.error(f"Failed to validate TrueFoundry DataSource URI: {e}") return JSONResponse( - content={"error": f"Invalid DataSource URI: {e}"}, status_code=400 + content={"error": "Invalid DataSource URI or failed to connect to provider"}, status_code=400 ) # Create the data source record created_data_source = await metadata_store_client.acreate_data_source(