diff --git a/README.md b/README.md index cee8a40..f1b3cd6 100644 --- a/README.md +++ b/README.md @@ -295,12 +295,35 @@ model = GenericLoraKbitModel('') ``` Replace `` with a local directory or a Hugging Face model like `mistralai/Mistral-7B-Instruct-v0.2`. +### Cloud Text‑Generation APIs + +xTuring also ships cloud API wrappers for synthetic‑data generation and self‑instruct workflows. + +| Provider | Key | Default model | +| -- | -- | -- | +| OpenAI | openai, openai_davinci, openai_chat | davinci / gpt-3.5-turbo | +| Cohere | cohere, cohere_medium | medium | +| Claude | claude, claude_3_sonnet | claude-3-sonnet | +| MiniMax | minimax, minimax_m2_7, minimax_m2_7_highspeed | MiniMax-M2.7 / MiniMax-M2.7-highspeed | + +```python +from xturing.model_apis import MiniMaxM27 + +api = MiniMaxM27(api_key="your-minimax-api-key") +results = api.generate_text( + prompts=["Explain quantum computing for beginners."], + max_tokens=256, + temperature=0.7, +) +print(results[0]["response"]["choices"][0]["text"]) +``` + ## 📈 Roadmap - [x] Support for `LLaMA`, `LLaMA 2`, `GPT-J`, `GPT-2`, and `GPT-OSS` models - [x] Dataset generation using self-instruction - [x] Low-precision LoRA fine-tuning and unsupervised fine-tuning - [x] INT8 low-precision fine-tuning support -- [x] OpenAI, Cohere, and Claude model APIs for dataset generation +- [x] OpenAI, Cohere, Claude, and MiniMax model APIs for dataset generation - [x] Added fine-tuned checkpoints for some models to the hub - [x] INT4 LLaMA LoRA fine-tuning demo - [x] INT4 LLaMA LoRA fine-tuning with INT4 generation diff --git a/src/xturing/model_apis/__init__.py b/src/xturing/model_apis/__init__.py index 4fced5e..e2e1d0d 100644 --- a/src/xturing/model_apis/__init__.py +++ b/src/xturing/model_apis/__init__.py @@ -2,6 +2,11 @@ from xturing.model_apis.claude import ClaudeSonnet, ClaudeTextGenerationAPI from xturing.model_apis.cohere import CohereTextGenerationAPI from xturing.model_apis.cohere import Medium as CohereMedium +from xturing.model_apis.minimax import ( + MiniMaxM27, + MiniMaxM27HighSpeed, + MiniMaxTextGenerationAPI, +) from xturing.model_apis.openai import ChatGPT as OpenAIChatGPT from xturing.model_apis.openai import Davinci as OpenAIDavinci from xturing.model_apis.openai import OpenAITextGenerationAPI @@ -9,7 +14,12 @@ BaseApi.add_to_registry(OpenAITextGenerationAPI.config_name, OpenAITextGenerationAPI) BaseApi.add_to_registry(CohereTextGenerationAPI.config_name, CohereTextGenerationAPI) BaseApi.add_to_registry(ClaudeTextGenerationAPI.config_name, ClaudeTextGenerationAPI) +BaseApi.add_to_registry( + MiniMaxTextGenerationAPI.config_name, MiniMaxTextGenerationAPI +) BaseApi.add_to_registry(OpenAIDavinci.config_name, OpenAIDavinci) BaseApi.add_to_registry(OpenAIChatGPT.config_name, OpenAIChatGPT) BaseApi.add_to_registry(CohereMedium.config_name, CohereMedium) BaseApi.add_to_registry(ClaudeSonnet.config_name, ClaudeSonnet) +BaseApi.add_to_registry(MiniMaxM27.config_name, MiniMaxM27) +BaseApi.add_to_registry(MiniMaxM27HighSpeed.config_name, MiniMaxM27HighSpeed) diff --git a/src/xturing/model_apis/minimax.py b/src/xturing/model_apis/minimax.py new file mode 100644 index 0000000..a649d09 --- /dev/null +++ b/src/xturing/model_apis/minimax.py @@ -0,0 +1,157 @@ +import time +from datetime import datetime + +try: + from openai import OpenAI + from openai import APIConnectionError as OpenAIAPIConnectionError + from openai import APIError as OpenAIAPIError + from openai import RateLimitError as OpenAIRateLimitError +except ModuleNotFoundError as import_err: # pragma: no cover - optional dependency + OpenAI = None + OpenAIAPIError = OpenAIAPIConnectionError = OpenAIRateLimitError = Exception + _OPENAI_IMPORT_ERROR = import_err +else: # pragma: no cover - dependency import paths exercised in runtime envs + _OPENAI_IMPORT_ERROR = None + +from xturing.model_apis.base import TextGenerationAPI + +_MINIMAX_BASE_URL = "https://api.minimax.io/v1" + + +class MiniMaxTextGenerationAPI(TextGenerationAPI): + config_name = "minimax" + + def __init__(self, model, api_key, request_batch_size=1): + openai_cls = self._ensure_dependency() + super().__init__( + engine=model, api_key=api_key, request_batch_size=request_batch_size + ) + self._client = openai_cls( + api_key=api_key, base_url=_MINIMAX_BASE_URL + ) + + @staticmethod + def _ensure_dependency(): + import importlib + + module = importlib.import_module(__name__) + openai_cls = getattr(module, "OpenAI", None) + if openai_cls is None: + openai_import_error = getattr(module, "_OPENAI_IMPORT_ERROR", None) + message = ( + "The openai SDK is required for MiniMaxTextGenerationAPI. " + "Install it with `pip install openai`." + ) + raise ModuleNotFoundError(message) from openai_import_error + return openai_cls + + def _clamp_temperature(self, temperature): + if temperature is not None and temperature <= 0.0: + return 0.01 + return temperature + + def _make_request(self, prompt, max_tokens, temperature, top_p, stop_sequences): + params = { + "model": self.engine, + "max_tokens": max_tokens, + "temperature": self._clamp_temperature(temperature), + "messages": [{"role": "user", "content": prompt}], + } + if top_p is not None: + params["top_p"] = top_p + if stop_sequences: + params["stop"] = stop_sequences + return self._client.chat.completions.create(**params) + + @staticmethod + def _render_response(response): + if response is None: + return None + choice = response.choices[0] if response.choices else None + if choice is None: + return None + text = choice.message.content or "" + predicts = { + "choices": [ + { + "text": text, + "finish_reason": choice.finish_reason or "stop", + } + ] + } + return predicts + + def generate_text( + self, + prompts, + max_tokens, + temperature, + top_p=None, + frequency_penalty=None, + presence_penalty=None, + stop_sequences=None, + logprobs=None, + n=1, + best_of=1, + retries=3, + **kwargs, + ): + if not isinstance(prompts, list): + prompts = [prompts] + + results = [] + for prompt in prompts: + response = None + retry_cnt = 0 + backoff_time = 30 + while retry_cnt <= retries: + try: + response = self._make_request( + prompt=prompt, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + stop_sequences=stop_sequences, + ) + break + except ( + OpenAIAPIError, + OpenAIAPIConnectionError, + OpenAIRateLimitError, + ) as e: + print(f"MiniMaxError: {e}.") + print(f"Retrying in {backoff_time} seconds...") + time.sleep(backoff_time) + backoff_time *= 1.5 + retry_cnt += 1 + + data = { + "prompt": prompt, + "response": self._render_response(response), + "created_at": str(datetime.now()), + } + results.append(data) + + return results + + +class MiniMaxM27(MiniMaxTextGenerationAPI): + config_name = "minimax_m2_7" + + def __init__(self, api_key, request_batch_size=1): + super().__init__( + model="MiniMax-M2.7", + api_key=api_key, + request_batch_size=request_batch_size, + ) + + +class MiniMaxM27HighSpeed(MiniMaxTextGenerationAPI): + config_name = "minimax_m2_7_highspeed" + + def __init__(self, api_key, request_batch_size=1): + super().__init__( + model="MiniMax-M2.7-highspeed", + api_key=api_key, + request_batch_size=request_batch_size, + ) diff --git a/tests/xturing/model_apis/test_minimax_api.py b/tests/xturing/model_apis/test_minimax_api.py new file mode 100644 index 0000000..ede8955 --- /dev/null +++ b/tests/xturing/model_apis/test_minimax_api.py @@ -0,0 +1,430 @@ +from unittest.mock import MagicMock, patch + +import pytest + + +def _build_openai_error(error_cls, message): + if error_cls is Exception: + return Exception(message) + # APIStatusError subclasses (RateLimitError) take `response`; APIError takes `request` + import inspect + + sig = inspect.signature(error_cls.__init__) + if "response" in sig.parameters: + mock_response = MagicMock() + mock_response.status_code = 429 + return error_cls(message, response=mock_response, body=None) + return error_cls(message, request=MagicMock(), body=None) + + +class TestMiniMaxTextGenerationAPI: + """Test suite for MiniMaxTextGenerationAPI""" + + def test_missing_openai_dependency(self): + """Test that missing openai package raises ModuleNotFoundError""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch.object( + MiniMaxTextGenerationAPI, + "_ensure_dependency", + side_effect=ModuleNotFoundError( + "The openai SDK is required for MiniMaxTextGenerationAPI." + ), + ): + with pytest.raises(ModuleNotFoundError, match="openai SDK is required"): + MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", + api_key="test-key", + ) + + def test_initialization(self): + """Test MiniMaxTextGenerationAPI initialization""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", + api_key="test-key", + request_batch_size=5, + ) + + assert api.engine == "MiniMax-M2.7" + assert api.api_key == "test-key" + assert api.request_batch_size == 5 + mock_openai.assert_called_once_with( + api_key="test-key", + base_url="https://api.minimax.io/v1", + ) + + def test_minimax_m27_initialization(self): + """Test MiniMaxM27 convenience class initialization""" + from xturing.model_apis.minimax import MiniMaxM27 + + with patch("xturing.model_apis.minimax.OpenAI"): + api = MiniMaxM27(api_key="test-key", request_batch_size=3) + + assert api.engine == "MiniMax-M2.7" + assert api.api_key == "test-key" + assert api.request_batch_size == 3 + assert api.config_name == "minimax_m2_7" + + def test_minimax_m27_highspeed_initialization(self): + """Test MiniMaxM27HighSpeed convenience class initialization""" + from xturing.model_apis.minimax import MiniMaxM27HighSpeed + + with patch("xturing.model_apis.minimax.OpenAI"): + api = MiniMaxM27HighSpeed(api_key="test-key") + + assert api.engine == "MiniMax-M2.7-highspeed" + assert api.config_name == "minimax_m2_7_highspeed" + + def test_clamp_temperature_zero(self): + """Test that temperature 0 is clamped to 0.01""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI"): + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + assert api._clamp_temperature(0.0) == 0.01 + assert api._clamp_temperature(-0.5) == 0.01 + assert api._clamp_temperature(0.5) == 0.5 + assert api._clamp_temperature(1.0) == 1.0 + assert api._clamp_temperature(None) is None + + def test_make_request_basic(self): + """Test _make_request with basic parameters""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_openai.return_value = mock_client + + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + + api._make_request( + prompt="Hello, world!", + max_tokens=100, + temperature=0.7, + top_p=None, + stop_sequences=None, + ) + + mock_client.chat.completions.create.assert_called_once_with( + model="MiniMax-M2.7", + max_tokens=100, + temperature=0.7, + messages=[{"role": "user", "content": "Hello, world!"}], + ) + + def test_make_request_with_optional_params(self): + """Test _make_request with optional parameters""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_openai.return_value = mock_client + + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + + api._make_request( + prompt="Hello, world!", + max_tokens=100, + temperature=0.7, + top_p=0.9, + stop_sequences=["STOP", "END"], + ) + + mock_client.chat.completions.create.assert_called_once_with( + model="MiniMax-M2.7", + max_tokens=100, + temperature=0.7, + top_p=0.9, + stop=["STOP", "END"], + messages=[{"role": "user", "content": "Hello, world!"}], + ) + + def test_make_request_temperature_clamped(self): + """Test that temperature=0 is clamped in _make_request""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_openai.return_value = mock_client + + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + + api._make_request( + prompt="Hello", + max_tokens=50, + temperature=0.0, + top_p=None, + stop_sequences=None, + ) + + mock_client.chat.completions.create.assert_called_once_with( + model="MiniMax-M2.7", + max_tokens=50, + temperature=0.01, + messages=[{"role": "user", "content": "Hello"}], + ) + + def test_render_response_success(self): + """Test _render_response with successful response""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "This is a response" + mock_choice.finish_reason = "stop" + mock_response.choices = [mock_choice] + + result = MiniMaxTextGenerationAPI._render_response(mock_response) + + assert result == { + "choices": [ + { + "text": "This is a response", + "finish_reason": "stop", + } + ] + } + + def test_render_response_none(self): + """Test _render_response with None response""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + result = MiniMaxTextGenerationAPI._render_response(None) + assert result is None + + def test_render_response_empty_choices(self): + """Test _render_response with empty choices list""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + mock_response = MagicMock() + mock_response.choices = [] + + result = MiniMaxTextGenerationAPI._render_response(mock_response) + assert result is None + + def test_generate_text_single_prompt(self): + """Test generate_text with single prompt""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_openai.return_value = mock_client + + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Generated text" + mock_choice.finish_reason = "stop" + mock_response.choices = [mock_choice] + + mock_client.chat.completions.create.return_value = mock_response + + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + + results = api.generate_text( + prompts="Test prompt", + max_tokens=100, + temperature=0.7, + ) + + assert len(results) == 1 + assert results[0]["prompt"] == "Test prompt" + assert results[0]["response"]["choices"][0]["text"] == "Generated text" + assert "created_at" in results[0] + + def test_generate_text_multiple_prompts(self): + """Test generate_text with multiple prompts""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_openai.return_value = mock_client + + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Generated text" + mock_choice.finish_reason = "stop" + mock_response.choices = [mock_choice] + + mock_client.chat.completions.create.return_value = mock_response + + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + + results = api.generate_text( + prompts=["Prompt 1", "Prompt 2", "Prompt 3"], + max_tokens=100, + temperature=0.7, + ) + + assert len(results) == 3 + assert results[0]["prompt"] == "Prompt 1" + assert results[1]["prompt"] == "Prompt 2" + assert results[2]["prompt"] == "Prompt 3" + + def test_generate_text_with_retry(self): + """Test generate_text retry logic on API errors""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + with patch("time.sleep"): + mock_client = MagicMock() + mock_openai.return_value = mock_client + + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Generated text" + mock_choice.finish_reason = "stop" + mock_response.choices = [mock_choice] + + from xturing.model_apis import minimax as minimax_module + + mock_client.chat.completions.create.side_effect = [ + _build_openai_error( + minimax_module.OpenAIRateLimitError, "Rate limit exceeded" + ), + mock_response, + ] + + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + + results = api.generate_text( + prompts="Test prompt", + max_tokens=100, + temperature=0.7, + retries=3, + ) + + assert len(results) == 1 + assert results[0]["response"]["choices"][0]["text"] == "Generated text" + assert mock_client.chat.completions.create.call_count == 2 + + def test_generate_text_max_retries_exceeded(self): + """Test generate_text when max retries exceeded""" + from xturing.model_apis.minimax import MiniMaxTextGenerationAPI + + with patch("xturing.model_apis.minimax.OpenAI") as mock_openai: + with patch("time.sleep"): + mock_client = MagicMock() + mock_openai.return_value = mock_client + + from xturing.model_apis import minimax as minimax_module + + mock_client.chat.completions.create.side_effect = _build_openai_error( + minimax_module.OpenAIAPIError, "API Error" + ) + + api = MiniMaxTextGenerationAPI( + model="MiniMax-M2.7", api_key="test-key" + ) + + results = api.generate_text( + prompts="Test prompt", + max_tokens=100, + temperature=0.7, + retries=2, + ) + + assert len(results) == 1 + assert results[0]["prompt"] == "Test prompt" + assert results[0]["response"] is None + assert mock_client.chat.completions.create.call_count == 3 + + def test_config_names(self): + """Test that config names are set correctly""" + from xturing.model_apis.minimax import ( + MiniMaxM27, + MiniMaxM27HighSpeed, + MiniMaxTextGenerationAPI, + ) + + assert MiniMaxTextGenerationAPI.config_name == "minimax" + assert MiniMaxM27.config_name == "minimax_m2_7" + assert MiniMaxM27HighSpeed.config_name == "minimax_m2_7_highspeed" + + def test_registry_entries(self): + """Test that MiniMax APIs are registered in BaseApi registry""" + from xturing.model_apis.base import BaseApi + + assert "minimax" in BaseApi.registry + assert "minimax_m2_7" in BaseApi.registry + assert "minimax_m2_7_highspeed" in BaseApi.registry + + def test_base_url_constant(self): + """Test that the MiniMax base URL is correct""" + from xturing.model_apis.minimax import _MINIMAX_BASE_URL + + assert _MINIMAX_BASE_URL == "https://api.minimax.io/v1" + + +class TestMiniMaxIntegration: + """Integration tests for MiniMax API (require MINIMAX_API_KEY env var)""" + + @pytest.fixture + def api_key(self): + import os + + key = os.environ.get("MINIMAX_API_KEY") + if not key: + pytest.skip("MINIMAX_API_KEY not set") + return key + + def test_m27_generate_text(self, api_key): + """Integration test: generate text with MiniMax M2.7""" + from xturing.model_apis.minimax import MiniMaxM27 + + api = MiniMaxM27(api_key=api_key) + results = api.generate_text( + prompts="Say hello in one word.", + max_tokens=10, + temperature=0.7, + ) + + assert len(results) == 1 + assert results[0]["response"] is not None + assert len(results[0]["response"]["choices"]) == 1 + assert len(results[0]["response"]["choices"][0]["text"]) > 0 + + def test_m27_highspeed_generate_text(self, api_key): + """Integration test: generate text with MiniMax M2.7-highspeed""" + from xturing.model_apis.minimax import MiniMaxM27HighSpeed + + api = MiniMaxM27HighSpeed(api_key=api_key) + results = api.generate_text( + prompts="Say hello in one word.", + max_tokens=10, + temperature=0.7, + ) + + assert len(results) == 1 + assert results[0]["response"] is not None + assert len(results[0]["response"]["choices"][0]["text"]) > 0 + + def test_temperature_zero_integration(self, api_key): + """Integration test: verify temperature clamping works end-to-end""" + from xturing.model_apis.minimax import MiniMaxM27HighSpeed + + api = MiniMaxM27HighSpeed(api_key=api_key) + results = api.generate_text( + prompts="What is 1+1? Reply with only the number.", + max_tokens=5, + temperature=0.0, + ) + + assert len(results) == 1 + assert results[0]["response"] is not None