Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions modules/src/agent_deployer/agent_deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(
result_path: Optional[str] = None,
inputs_path: Optional[str] = None,
outputs: Optional[list[str]] = None,
inputs: Optional[list[str]] = None,
requirements: Optional[list[str]] = None,
image: str = "mlrun/mlrun",
set_model_monitoring: bool = False,
Expand All @@ -67,6 +68,9 @@ def __init__(
:param outputs: list of the model outputs (e.g. labels) ,if provided will override the outputs
that been configured in the model artifact, please note that those outputs need to
be equal to the model_class predict method outputs (length, and order).
:param inputs: list of the model inputs (e.g. features) ,if provided will override the inputs
that been configured in the model artifact, please note that those outputs need to
be equal to the model_class predict method outputs (length, and order).
:param requirements: List of additional requirements for the function
:param image: Docker image to be used for the function
:param set_model_monitoring: Whether to configure model monitoring
Expand All @@ -84,6 +88,7 @@ def __init__(
self.result_path = result_path
self.inputs_path = inputs_path
self.output_schema = outputs
self.input_schema = inputs
self.image = image
if set_model_monitoring:
self.configure_model_monitoring()
Expand Down Expand Up @@ -206,6 +211,7 @@ def _load_function(
result_path=self.result_path,
input_path=self.inputs_path,
outputs=self.output_schema,
inputs=self.input_schema,
execution_mechanism="naive",
**self.model_params,
)
Expand Down
23 changes: 23 additions & 0 deletions steps/src/toxicity_guardrail/item.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: v1
categories:
- data-preparation
- model-serving
- genai
description: Filters toxic requests using a pre-trained text classifier before they reach the LLM
example: toxicity_guardrail.ipynb
generationDate: 2026-04-27:12-00
hidden: false
labels:
author: Iguazio
mlrunVersion: 1.10.0
name: toxicity_guardrail
className: ToxicityGuardrailStep
defaultHandler:
spec:
filename: toxicity_guardrail.py
image: mlrun/mlrun
requirements:
- transformers==4.46.3
- torch==2.11.0
kind: generic
version: 1.0.0
2 changes: 2 additions & 0 deletions steps/src/toxicity_guardrail/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
transformers==4.47.0
torch==2.6.0
42 changes: 42 additions & 0 deletions steps/src/toxicity_guardrail/test_toxicity_guardrail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2025 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import mlrun

from toxicity_guardrail import ToxicityGuardrailStep


class TestToxicityGuardrailStep:
"""Test suite for ToxicityGuardrailStep class."""

def setup_method(self):
"""Set up test fixtures before each test method."""
project = mlrun.new_project("toxicity-guardrail", save=False)
self.fn = project.set_function(
"toxicity_guardrail.py",
name="guardrail-fn",
kind="serving",
image="mlrun/mlrun",
)
graph = self.fn.set_topology("flow", engine="async")
graph.to(
class_name="ToxicityGuardrailStep",
name="toxicity_guardrail",
threshold=0.5,
).respond()

def test_toxicity_guardrail_step(self):
"""Test that the serving function is correctly configured with ToxicityGuardrailStep."""
assert type(self.fn) == mlrun.runtimes.ServingRuntime
281 changes: 281 additions & 0 deletions steps/src/toxicity_guardrail/toxicity_guardrail.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "93c9feca-c120-443e-bbd3-731f70d49682",
"metadata": {},
"source": [
"## Pipeline: Toxicity Guardrail (Hub Step) → LLM Model Runner\n",
"\n",
"A unified serving graph that:\n",
"1. Routes the user's question through a toxicity guardrail hub step\n",
"2. If safe → calls a `ModelRunnerStep` (LLM) and returns the answer\n",
"3. If toxic → blocks the request with a clear rejection response"
]
},
{
"cell_type": "markdown",
"id": "m001-0000-0000-0000-000000000001",
"metadata": {},
"source": [
"Create or load the MLRun project that will hold the serving function and its secrets."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd9fd3609223be6a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"> 2026-04-27 10:59:47,707 [info] Loading project from path: {\"path\":\"./\",\"project_name\":\"hubstep-guardrail-toxicity\",\"user_project\":false}\n",
"> 2026-04-27 11:00:02,102 [info] Project loaded successfully: {\"path\":\"./\",\"project_name\":\"hubstep-guardrail-toxicity\",\"stored_in_db\":true}\n"
]
}
],
"source": [
"import mlrun\n",
"project = mlrun.get_or_create_project(\"hubstep-guardrail-toxicity\", user_project=False, context=\"./\", allow_cross_project=True)"
]
},
{
"cell_type": "markdown",
"id": "m001-0000-0000-0000-000000000002",
"metadata": {},
"source": [
"### Load credentials from a local `.env` file.\n",
"\n",
"For example:\n",
"```\n",
"OPENAI_API_KEY=\"...\"\n",
"OPENAI_BASE_URL=\"...\"\n",
"OPENAI_MODEL=\"...\"\n",
"```"
]
},
{
"cell_type": "code",
"id": "61bc0d94-4939-46c1-ac0d-2e90fd465c9c",
"metadata": {},
"source": [
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv(\"cred.env\", override=True)"
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
"id": "m001-0000-0000-0000-000000000003",
"metadata": {},
"source": "Store the credentials as project secrets - see also [working with secrets](http://docs.mlrun.org/en/stable/secrets.html).\n"
},
{
"cell_type": "code",
"id": "884a67ca-d548-4d7d-bab1-ca8868fbe875",
"metadata": {},
"source": [
"import os\n",
"project.set_secrets(\n",
" secrets={\n",
" \"OPENAI_API_KEY\": os.getenv(\"OPENAI_API_KEY\"),\n",
" \"OPENAI_BASE_URL\": os.getenv(\"OPENAI_BASE_URL\"),\n",
" \"OPENAI_MODEL\": os.getenv(\"OPENAI_MODEL\"),\n",
" },\n",
")\n",
"project.save()"
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
"id": "m001-0000-0000-0000-000000000004",
"metadata": {},
"source": [
"## Build the serving graph\n",
"\n",
"`LLMModel` wraps an OpenAI-compatible API and reads credentials to the project secrets set above.\n",
"`format_answer` is a plain function that flattens the `ModelRunnerStep` output dict\n",
"(`{\"llm_model\": {\"answer\": ...}}`) into a simple `{\"answer\": ...}` response."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "505c77e2-6875-499d-ae05-c6de3efa0622",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting serving_graph.py\n"
]
}
],
"source": [
"%%writefile serving_graph.py\n",
"from typing import Dict, Any\n",
"from mlrun.serving import Model\n",
"\n",
"class LLMModel(Model):\n",
" \"\"\"OpenAI-compatible LLM. Credentials and model are read from env vars:\n",
" OPENAI_API_KEY, OPENAI_BASE_URL (optional), OPENAI_MODEL (optional, falls back to default_model_name).\n",
" \"\"\"\n",
"\n",
" def __init__(self, default_model_name: str = \"gpt-4o-mini\", **kwargs):\n",
" super().__init__(**kwargs)\n",
" self.default_model_name = default_model_name\n",
"\n",
" def load(self):\n",
" import openai, os\n",
" self.model_name = os.environ.get(\"OPENAI_MODEL\", self.default_model_name)\n",
" client_kwargs = {\"api_key\": os.environ[\"OPENAI_API_KEY\"]}\n",
" base_url = os.environ.get(\"OPENAI_BASE_URL\")\n",
" if base_url:\n",
" client_kwargs[\"base_url\"] = base_url\n",
" self._client = openai.OpenAI(**client_kwargs)\n",
"\n",
" def predict(self, body: Dict[str, Any]) -> Dict[str, Any]:\n",
" question = body.get(\"question\", \"\")\n",
" response = self._client.chat.completions.create(\n",
" model=self.model_name,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
" {\"role\": \"user\", \"content\": question},\n",
" ],\n",
" )\n",
" return {\"answer\": response.choices[0].message.content, \"model\": self.model_name}\n",
"\n",
"\n",
"def format_answer(event: Dict[str, Any]) -> Dict[str, Any]:\n",
" \"\"\"Flatten ModelRunnerStep output: {\"llm_model\": {\"answer\": ...}} → {\"answer\": ...}\"\"\"\n",
" if isinstance(event, dict):\n",
" for _, model_output in event.items():\n",
" if isinstance(model_output, dict):\n",
" return model_output\n",
" return event"
]
},
{
"cell_type": "markdown",
"id": "m001-0000-0000-0000-000000000005",
"metadata": {},
"source": [
"Wire the three-step async flow graph:\n",
"1. **`toxicity_guardrail`** — loaded directly from `hub://toxicity_guardrail`; blocks requests with a toxicity score ≥ `threshold`\n",
"2. **`llm_runner`** — a `ModelRunnerStep` that runs `LLMModel` against the OpenAI-compatible API\n",
"3. **`format_answer`** — flattens the runner output and sends the response back to the caller"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d0435a5-4e65-4a33-a146-8c6abb382b37",
"metadata": {},
"outputs": [],
"source": [
"from mlrun.serving import ModelRunnerStep\n",
"\n",
"fn_pipeline = project.set_function(\n",
" name=\"toxicity-llm-pipeline\",\n",
" func=\"serving_graph.py\",\n",
" kind=\"serving\",\n",
" image=\"mlrun/mlrun\",\n",
" requirements=[\"transformers\", \"torch\", \"openai\"],\n",
")\n",
"# Credentials come from Kubernetes secrets set above — no set_envs() needed for them.\n",
"\n",
"graph = fn_pipeline.set_topology(\"flow\", engine=\"async\")\n",
"\n",
"graph.add_step(\n",
" class_name=\"hub://toxicity_guardrail\",\n",
" name=\"toxicity_guardrail\",\n",
" threshold=0.5,\n",
")\n",
"\n",
"model_runner = ModelRunnerStep(name=\"llm_runner\")\n",
"model_runner.add_model(endpoint_name=\"llm_model\", model_class=\"LLMModel\")\n",
"graph.add_step(model_runner, after=\"toxicity_guardrail\")\n",
"\n",
"graph.add_step(name=\"format_answer\", handler=\"format_answer\", after=\"llm_runner\").respond()\n",
"\n",
"graph.plot(rankdir=\"LR\")"
]
},
{
"cell_type": "markdown",
"id": "m001-0000-0000-0000-000000000006",
"metadata": {},
"source": "Deploy the Serving function, with the required packages (`transformers`, `torch`, `openai`)."
},
{
"cell_type": "code",
"id": "973ceab8-48b5-4689-a39e-b83ac3e75ddc",
"metadata": {},
"source": "addr = project.deploy_function(fn_pipeline)",
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
"id": "m001-0000-0000-0000-000000000007",
"metadata": {},
"source": [
"Test the pipeline with a safe question and a toxic one.\n",
"Safe requests pass through to the LLM and return an answer; toxic ones are blocked by the guardrail before reaching the model."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd5cd2c9-3abe-401f-bb2e-65d216b9951d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"=== Safe input ===\n",
"Response: {'answer': 'Washington, D.C. It’s a federal district, not part of any state.', 'model': 'gpt-5-nano-2025-08-07'}\n",
"\n",
"=== Toxic input ===\n",
"Blocked (expected): bad function response 500: Exception caught in handler - \"Request blocked: toxicity score 0.953 >= 0.5\"\n"
]
}
],
"source": [
"# --- Safe input: should return an LLM answer ---\n",
"print(\"=== Safe input ===\")\n",
"result = fn_pipeline.invoke(\"/\", {\"question\": \"What is the capital of the USA?\"})\n",
"print(\"Response:\", result)\n",
"\n",
"# --- Toxic input: should be blocked ---\n",
"print(\"\\n=== Toxic input ===\")\n",
"try:\n",
" result = fn_pipeline.invoke(\"/\", {\"question\": \"You're completely useless and everything you do is a failure\"})\n",
" print(\"Response:\", result)\n",
"except Exception as e:\n",
" print(f\"Blocked (expected): {e}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading
Loading