diff --git a/autoload/ollama.vim b/autoload/ollama.vim index 9cc389b..c4598ca 100644 --- a/autoload/ollama.vim +++ b/autoload/ollama.vim @@ -221,11 +221,24 @@ function! ollama#GetSuggestion(timer) \ "Connecting to Ollama on " .. g:ollama_host \ .. " using model " .. g:ollama_model) call ollama#logger#Debug("model_options=" .. l:model_options) + + if exists('g:ollama_model_sampling_denylist') + \ && len(g:ollama_model_sampling_denylist) > 0 + \ && index(g:ollama_model_sampling_denylist, g:ollama_model) >= 0 + let l:sampling_enabled = 0 + else + let l:sampling_enabled = 1 + endif + call ollama#logger#Debug("sampling_enabled=" .. l:sampling_enabled) + " Convert plugin debug level to python logger levels let l:log_level = ollama#logger#PythonLogLevel(g:ollama_debug) let l:base_url = g:ollama_host if g:ollama_model_provider =~ '^openai' let l:base_url = g:ollama_openai_baseurl + elseif g:ollama_model_provider == 'claude' + " Claude uses default Anthropic API, don't set base_url + let l:base_url = '' endif " Adjust the command to use the prompt as stdin input let l:command = [ g:ollama_python_interpreter, @@ -234,6 +247,7 @@ function! ollama#GetSuggestion(timer) \ "-m", g:ollama_model, \ "-u", l:base_url, \ "-o", l:model_options, + \ "-se", l:sampling_enabled, \ "-l", l:log_level \ ] " Add optional credentialname for looking up the API key @@ -247,6 +261,11 @@ function! ollama#GetSuggestion(timer) " add credentialname option for Mistral let l:command += [ '-k', g:ollama_mistral_credentialname ] endif + elseif g:ollama_model_provider == 'claude' + if exists('g:ollama_claude_credentialname') && g:ollama_claude_credentialname != '' + " add credentialname option for Claude + let l:command += [ '-k', g:ollama_claude_credentialname ] + endif endif call ollama#logger#Debug("command=" .. join(l:command, " ")) let l:job_options = { diff --git a/autoload/ollama/review.vim b/autoload/ollama/review.vim index ee89a7b..7b34bff 100644 --- a/autoload/ollama/review.vim +++ b/autoload/ollama/review.vim @@ -56,6 +56,9 @@ function! s:FindBufferWindow(bufnr) endfunction function! s:StartChat(lines) abort + " Counter for reducing redraw frequency + let s:token_count = 0 + " Function handling a line of text that has been typed. func! TextEntered(text) call ollama#logger#Debug("TextEntered: " .. a:text) @@ -65,12 +68,13 @@ function! s:StartChat(lines) abort endif " Send the text to a shell with Enter appended. call ch_sendraw(s:job, a:text .. "\n") + " Reset token count for new request + let s:token_count = 0 endfunc - " Function handling output from the shell: Add it above the prompt. - func! GotOutput(channel, msg) + " OLD VERSION: Append each token as a new line (non-streaming) + func! GotOutputOld(channel, msg) call ollama#logger#Debug("GotOutput: " .. a:msg) - " append lines let l:lines = split(a:msg, "\n") for l:line in l:lines @@ -96,6 +100,91 @@ function! s:StartChat(lines) abort endfor endfunc + " NEW VERSION: Stream tokens on the same line with real-time cursor tracking + func! GotOutputNew(channel, msg) + " call ollama#logger#Debug("GotOutput: [" .. a:msg .. "]") + + " Check for marker + let l:idx = stridx(a:msg, "") + let l:is_eot = l:idx != -1 + let l:content = l:is_eot ? strpart(a:msg, 0, l:idx) : a:msg + + " Append content to the last line for streaming effect + let l:updated_line_num = 0 + let l:updated_line_content = "" + let l:line_count = 0 + + if !empty(l:content) + " Get buffer line count efficiently + let l:buf_info = getbufinfo(s:buf)[0] + let l:line_count = l:buf_info.linecount + " call ollama#logger#Debug("line_count=" .. l:line_count) + + if l:line_count == 0 + " Buffer is empty, append as new line + " call ollama#logger#Debug("Buffer empty, appending first line") + call appendbufline(s:buf, 0, l:content) + let l:updated_line_num = 1 + let l:updated_line_content = l:content + else + " Get only the last line (much faster than getting all lines) + let l:last_line = getbufline(s:buf, l:line_count, l:line_count)[0] + let l:updated_line_content = l:last_line .. l:content + " call ollama#logger#Debug("Appending to line " .. l:line_count .. ": '" .. l:last_line .. "' + '" .. l:content .. "'") + call setbufline(s:buf, l:line_count, l:updated_line_content) + let l:updated_line_num = l:line_count + endif + endif + + " When streaming is done, add a new line for the next input + if l:is_eot + " call ollama#logger#Debug("EOT received, adding newline") + call appendbufline(s:buf, "$", "") + " Reuse line_count if we already got it, otherwise fetch + if l:line_count > 0 + let l:updated_line_num = l:line_count + 1 + else + let l:buf_info = getbufinfo(s:buf)[0] + let l:updated_line_num = l:buf_info.linecount + endif + let l:updated_line_content = "" + endif + + " Update cursor position if this is the active chat window + if bufname() == s:ollama_bufname " Check if current active window is Ollama Chat + let l:winid = bufwinid(s:buf) + if l:winid != -1 && l:updated_line_num > 0 + " Set cursor position directly (much faster than feedkeys) + let l:col = len(l:updated_line_content) + 1 + call win_execute(l:winid, 'call cursor(' . l:updated_line_num . ', ' . l:col . ')') + + " Increment token counter and only redraw every N tokens (or always for EOT) + let s:token_count += 1 + if l:is_eot || s:token_count % 5 == 0 + redraw + endif + + if l:is_eot + " Streaming done, enter insert mode + if mode() == 'i' + call feedkeys("\") + endif + call feedkeys("a") + endif + endif + endif + endfunc + + " Wrapper function that delegates to new version by default + " To use old version, set g:ollama_use_old_output = 1 + func! GotOutput(channel, msg) + if exists('g:ollama_use_old_output') && g:ollama_use_old_output + call GotOutputOld(a:channel, a:msg) + else + call GotOutputNew(a:channel, a:msg) + endif + endfunc + " Function handling output from the shell: Add it above the prompt. func! GotErrors(channel, msg) call ollama#logger#Debug("GotErrors: " .. a:msg) @@ -136,9 +225,18 @@ function! s:StartChat(lines) abort endfunc let l:model_options = json_encode(g:ollama_chat_options) - call ollama#logger#Debug("Connecting to Ollama on " .. g:ollama_host .. " using model " .. g:ollama_model) + call ollama#logger#Debug("Chat Connecting to Ollama on " .. g:ollama_host .. " using model " .. g:ollama_model) call ollama#logger#Debug("model_options=" .. l:model_options) + if exists('g:ollama_model_sampling_denylist') + \ && len(g:ollama_model_sampling_denylist) > 0 + \ && index(g:ollama_model_sampling_denylist, g:ollama_chat_model) >= 0 + let l:sampling_enabled = 0 + else + let l:sampling_enabled = 1 + endif + call ollama#logger#Debug("sampling_enabled=" .. l:sampling_enabled) + " Convert plugin debug level to python logger levels let l:log_level = ollama#logger#PythonLogLevel(g:ollama_debug) let l:base_url = g:ollama_host @@ -154,6 +252,7 @@ function! s:StartChat(lines) abort \ '-m', g:ollama_chat_model, \ '-u', l:base_url, \ '-o', l:model_options, + \ "-se", l:sampling_enabled, \ '-t', g:ollama_chat_timeout, \ '-l', l:log_level ] " Check if a system prompt was configured @@ -205,7 +304,7 @@ function! s:StartChat(lines) abort silent execute 'new' l:bufname endif " Set the filetype to ollama-chat -" setlocal filetype=ollama-chat + " setlocal filetype=ollama-chat setlocal filetype=markdown setlocal buftype=prompt " enable BufDelete event when closing buffer usig :q! diff --git a/plugin/ollama.vim b/plugin/ollama.vim index 7945af4..8dfecb3 100644 --- a/plugin/ollama.vim +++ b/plugin/ollama.vim @@ -67,6 +67,10 @@ if !exists('g:ollama_openai_credentialname') " UNIX Pass credential name to lookup API key for OpenAI service let g:ollama_openai_credentialname = '' endif +if !exists('g:ollama_claude_credentialname') + " UNIX Pass credential name to lookup API key for Anthropic Claude service + let g:ollama_claude_credentialname = '' +endif " Tab completion specific settings if !exists('g:ollama_debounce_time') let g:ollama_debounce_time = 500 @@ -98,6 +102,10 @@ if !exists('g:ollama_model_options') \ 'max_tokens': 500 \ } endif +if !exists('g:ollama_model_sampling_denylist') + " default model sampling denylist + let g:ollama_model_sampling_denylist = [] +endif " Chat specific settings if !exists('g:ollama_chat_provider') " Provider for chat models: 'ollama' or 'openai' diff --git a/python/OllamaCredentials.py b/python/OllamaCredentials.py index 3affe2a..37595f3 100644 --- a/python/OllamaCredentials.py +++ b/python/OllamaCredentials.py @@ -18,6 +18,7 @@ def GetApiKey(self, provider: str, credentialname: str | None) -> str: - 'openai' → use OPENAI_API_KEY env var or pass entry - 'openai_legacy' → same as 'openai', kept for compatibility - 'mistral' → use MISTRAL_API_KEY env var or pass entry + - 'anthropic' → use ANTHROPIC_API_KEY env var or pass entry Priority: 1. Environment variable override @@ -36,6 +37,8 @@ def GetApiKey(self, provider: str, credentialname: str | None) -> str: env_var = "OPENAI_API_KEY" elif provider == "mistral": env_var = "MISTRAL_API_KEY" + elif provider == "anthropic": + env_var = "ANTHROPIC_API_KEY" else: raise ValueError(f"Unknown provider: {provider}") diff --git a/python/chat.py b/python/chat.py index c947c2e..fde4f2e 100755 --- a/python/chat.py +++ b/python/chat.py @@ -63,7 +63,9 @@ async def stream_chat_message_ollama(messages, endpoint, model, options, timeout if "message" in message and "content" in message["message"]: content = message["message"]["content"] assistant_message += content - print(content, end="", flush=True) + # Print each token followed by newline so Vim's out_cb receives it immediately + # VimScript will need to handle concatenating tokens on the same line + print(content, flush=True) # If is detected, stop processing if "" in content: @@ -90,7 +92,7 @@ async def stream_chat_message_ollama(messages, endpoint, model, options, timeout messages.append({"role": "assistant", "content": assistant_message.strip()}) -async def stream_chat_message_openai(messages, endpoint, model, options, credentialname): +async def stream_chat_message_openai(messages, endpoint, model, options, sampling_enabled, credentialname): """Stream chat responses from OpenAI API.""" if AsyncOpenAI is None: raise ImportError("OpenAI package not found. Please install via 'pip install openai'.") @@ -114,20 +116,30 @@ async def stream_chat_message_openai(messages, endpoint, model, options, credent top_p = options.get('top_p', 1.0) try: - stream = await client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - top_p=top_p, - stream=True, - ) + # Build request parameters + request_params = { + 'model': model, + 'messages': messages, + 'stream': True, + } + + # Check if model supports sampling parameters + if sampling_enabled: + request_params['temperature'] = temperature + request_params['top_p'] = top_p + request_params['max_tokens'] = max_tokens + else: + request_params['max_completion_tokens'] = max_tokens + + stream = await client.chat.completions.create(**request_params) async for chunk in stream: if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: token = chunk.choices[0].delta.content assistant_message += token - print(token, end="", flush=True) + # Print each token followed by newline so Vim's out_cb receives it immediately + # VimScript will need to handle concatenating tokens on the same line + print(token, flush=True) print("", flush=True) @@ -139,7 +151,7 @@ async def stream_chat_message_openai(messages, endpoint, model, options, credent messages.append({"role": "assistant", "content": assistant_message.strip()}) -async def main(provider, endpoint, model, options, systemprompt, timeout, credentialname): +async def main(provider, endpoint, model, options, sampling_enabled, systemprompt, timeout, credentialname): conversation_history = [] log.debug("endpoint: " + str(endpoint)) @@ -169,7 +181,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden ) else: task = asyncio.create_task( - stream_chat_message_openai(conversation_history, endpoint, model, options, credentialname) + stream_chat_message_openai(conversation_history, endpoint, model, options, sampling_enabled, credentialname) ) await task else: @@ -189,7 +201,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden ) else: task = asyncio.create_task( - stream_chat_message_openai(conversation_history, endpoint, model, options, credentialname) + stream_chat_message_openai(conversation_history, endpoint, model, options, sampling_enabled, credentialname) ) await task @@ -213,6 +225,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden help="Base endpoint URL.") parser.add_argument("-o", "--options", type=str, default=DEFAULT_OPTIONS, help="Ollama REST API options.") + parser.add_argument("-se", "--sampling-enabled", type=int, default=1, help="Enable or disable sampling.") parser.add_argument("-s", "--system-prompt", type=str, default="", help="Specify system prompt.") parser.add_argument("-t", "--timeout", type=int, default=DEFAULT_TIMEOUT, help="Timeout in seconds.") parser.add_argument("-l", "--log-level", type=int, default=OllamaLogger.ERROR, help="Log level.") @@ -243,7 +256,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden try: while True: try: - asyncio.run(main(args.provider, endpoint, model, options, args.system_prompt, args.timeout, args.keyname)) + asyncio.run(main(args.provider, endpoint, model, options, args.sampling_enabled, args.system_prompt, args.timeout, args.keyname)) except KeyboardInterrupt: print("Canceled.") break diff --git a/python/complete.py b/python/complete.py index 4145917..b92fe8f 100755 --- a/python/complete.py +++ b/python/complete.py @@ -23,13 +23,23 @@ except ImportError: Mistral = None +# try to load Anthropic package if it exists +try: + from anthropic import Anthropic # type: ignore +except ImportError: + Anthropic = None + # Default values DEFAULT_HOST = 'http://localhost:11434' DEFAULT_PROVIDER = 'ollama' DEFAULT_MODEL = 'codellama:code' DEFAULT_OPTIONS = '{ "temperature": 0, "top_p": 0.95 }' +DEFAULT_TEMPERATURE = 0 +DEFAULT_MAX_TOKENS = 300 DEFAULT_MISTRAL_MODEL = 'codestral-2501' DEFAULT_OPENAI_MODEL = 'gpt-4.1-mini' +DEFAULT_OPENAI_RESPONSES_MODEL = 'gpt-5.1-codex' +DEFAULT_CLAUDE_MODEL = 'claude-sonnet-4-20250514' # When set to true, we use our own templates and don't use the Ollama built-in templates. # Is is the only way to make this work reliable. As soon is this works also with Ollama @@ -170,9 +180,9 @@ def generate_code_completion_mistral(prompt, baseurl, model, options, credential stop_marker = extract_stop_marker(suffix) stops = [stop_marker] if stop_marker else [] - temperature = options.get('temperature', 0) -# min_tokens = options.get('min_tokens', 1) - max_tokens = options.get('max_tokens', 300) + temperature = options.get('temperature', DEFAULT_TEMPERATURE) + # min_tokens = options.get('min_tokens', 1) + max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS) log.debug('model: ' + str(model)) log.debug('temperature: ' + str(temperature)) @@ -209,7 +219,37 @@ def extract_stop_marker(after: str) -> str | None: return line.rstrip() # preserve indentation return None -def generate_code_completion_openai(prompt, baseurl, model, options, credentialname): +def _build_fim_prompt(before: str, after: str, lang: str = 'C') -> str: + """Build fill-in-the-middle prompt for models that don't support native FIM.""" + return f"""Fill in the missing code between the markers below. + +Rules: +- Do NOT repeat any code that appears in the AFTER section. +- Return only the exact code that fits between BEFORE and AFTER. +- Do NOT add explanations or comments. +- Output the missing code only. + +Language: {lang} + +BEFORE: +{before} + +AFTER: +{after} +""" + +def _strip_code_fences(text: str) -> str: + """Remove markdown code fence markers (```) from beginning and end of text.""" + lines = text.splitlines() + if lines: + if lines[0].startswith("```"): + lines.pop(0) + if lines and lines[-1].startswith("```"): + lines.pop() + return "\n".join(lines) + return text + +def generate_code_completion_openai(prompt, baseurl, model, options, sampling_enabled, credentialname): """Generate code completion using OpenAI's official Python SDK""" if OpenAI is None: raise ImportError("OpenAI package not found. Please install via 'pip install openai'.") @@ -256,21 +296,30 @@ def generate_code_completion_openai(prompt, baseurl, model, options, credentialn stop_marker = extract_stop_marker(after) stops = [stop_marker] if stop_marker else [] - temperature = options.get('temperature', 0) - max_tokens = options.get('max_tokens', 300) + temperature = options.get('temperature', DEFAULT_TEMPERATURE) + max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS) log.debug('model: ' + str(model)) log.debug('temperature: ' + str(temperature)) log.debug('max_tokens: ' + str(max_tokens)) log.debug('stops: ' + str(stops)) + log.debug('sampling_enabled: ' + str(sampling_enabled)) try: - response = client.chat.completions.create( - model=model, - messages=[{"role": "user", "content": full_prompt}], - temperature=temperature, - max_tokens=max_tokens, - stop=stops - ) + # Build request parameters + request_params = { + 'model': model, + 'messages': [{"role": "user", "content": full_prompt}], + } + + # Check if model supports sampling parameters + if sampling_enabled: + request_params['temperature'] = temperature + request_params['max_tokens'] = max_tokens + request_params['stop'] = stops + else: + request_params['max_completion_tokens'] = max_tokens + + response = client.chat.completions.create(**request_params) response = response.choices[0].message.content.strip() log.debug('response: ' + response) except Exception as e: @@ -279,19 +328,59 @@ def generate_code_completion_openai(prompt, baseurl, model, options, credentialn log.error(str(e)) sys.exit(1) - # convert response to lines - lines = response.splitlines() - if lines: - # remove 1st element from array if it starts with ``` - if lines[0].startswith("```"): - lines.pop(0) - # remove last element from array if it starts with ``` - if lines[-1].startswith("```"): - lines.pop() + # Remove markdown code fences if present + return _strip_code_fences(response) - response = "\n".join(lines) +def generate_code_completion_claude(prompt, baseurl, model, options, credentialname): + """Generate code completion using Anthropic Claude API""" + if Anthropic is None: + raise ImportError("Anthropic package not found. Please install via 'pip install anthropic'.") - return response + cred = OllamaCredentials() + api_key = cred.GetApiKey('anthropic', credentialname) + + log.debug('Using Anthropic Claude API') + if baseurl: + log.debug(f'baseurl={baseurl}') + client = Anthropic(api_key=api_key, base_url=baseurl) + else: + log.debug(f'Using default Anthropic URL') + client = Anthropic(api_key=api_key) + + parts = prompt.split('') + if len(parts) != 2: + log.error("Prompt must contain marker for Claude mode.") + sys.exit(1) + + # Build FIM prompt using helper function + lang = options.get('lang', 'C') + full_prompt = _build_fim_prompt(parts[0], parts[1], lang) + log.debug('full_prompt: ' + full_prompt) + + temperature = options.get('temperature', DEFAULT_TEMPERATURE) + max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS) + + log.debug('model: ' + str(model)) + log.debug('temperature: ' + str(temperature)) + log.debug('max_tokens: ' + str(max_tokens)) + + try: + response = client.messages.create( + model=model or DEFAULT_CLAUDE_MODEL, + max_tokens=max_tokens, + temperature=temperature, + messages=[{"role": "user", "content": full_prompt}] + ) + + response_text = response.content[0].text.strip() + log.debug('response: ' + response_text) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + log.error(str(e)) + sys.exit(1) + + # Remove markdown code fences if present + return _strip_code_fences(response_text) def generate_code_completion_openai_legacy(prompt, baseurl, model, options, credentialname): """Generate code completion using OpenAI's official Python SDK""" @@ -318,8 +407,8 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred full_prompt = fill_in_the_middle(config, prompt) log.debug('full_prompt: ' + full_prompt) - temperature = options.get('temperature', 0) - max_tokens = options.get('max_tokens', 300) + temperature = options.get('temperature', DEFAULT_TEMPERATURE) + max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS) log.debug('model: ' + str(model)) log.debug('temperature: ' + str(temperature)) @@ -335,6 +424,94 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred return response.rstrip() +def _extract_output_text_from_message(item): + """Extract output_text from a message item in OpenAI responses format""" + if item.get('type') == 'message' and item.get('status') == 'completed': + content = item.get('content', []) + for content_item in content: + if content_item.get('type') == 'output_text': + return content_item.get('text', '') + return None + +def generate_code_completion_openai_responses(prompt, baseurl, model, options, credentialname): + """Generate code completion using OpenAI's /v1/responses endpoint for GPT-5.1-Codex""" + if OpenAI is None: + raise ImportError("OpenAI package not found. Please install via 'pip install openai'.") + + log.debug('Using OpenAI responses endpoint (for GPT-5.1-Codex)') + + # Get API credentials + cred = OllamaCredentials() + api_key = cred.GetApiKey('openai', credentialname) + endpoint = f"{baseurl}/v1/responses" if baseurl else "https://api.openai.com/v1/responses" + + # Parse prompt + parts = prompt.split('') + if len(parts) != 2: + log.error("Prompt must contain marker.") + sys.exit(1) + + # Build FIM prompt using helper function + lang = options.get('lang', 'C') + full_input = _build_fim_prompt(parts[0], parts[1], lang) + + max_output_tokens = options.get('max_completion_tokens', options.get('max_tokens', DEFAULT_MAX_TOKENS)) + log.debug(f'endpoint: {endpoint}, model: {model}, max_output_tokens: {max_output_tokens}') + + # Make API request + try: + response = requests.post( + endpoint, + headers={'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}, + json={'model': model, 'input': full_input, 'max_output_tokens': max_output_tokens} + ) + + if response.status_code != 200: + log.error(f'API error: {response.text}') + response.raise_for_status() + + result = response.json() + log.debug('response: ' + json.dumps(result, indent=2)) + + # Extract completion from response (supports both list and dict formats) + completion = None + output_items = result.get('output', []) if isinstance(result, dict) else result if isinstance(result, list) else [] + + for item in output_items: + completion = _extract_output_text_from_message(item) + if completion: + break + + # Check for incomplete responses + if not completion and isinstance(result, dict) and result.get('status') == 'incomplete': + log.warning(f'Response incomplete: {result.get("incomplete_details")}') + + # Fallback for other formats + if not completion: + if isinstance(result, dict): + if 'text' in result: + completion = result['text'] + elif 'choices' in result and result['choices']: + choice = result['choices'][0] + completion = choice.get('text') or choice.get('message', {}).get('content') + + if not completion: + log.error('Could not extract completion from response') + return "" + + # Ensure completion is a string + if not isinstance(completion, str): + log.error(f'Completion is not a string, type: {type(completion)}, value: {completion}') + return "" + + log.debug(f'Final completion: {completion}') + return completion.strip() + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + log.error(str(e)) + sys.exit(1) + if __name__ == "__main__": try: parser = argparse.ArgumentParser(description="Complete code using Ollama or OpenAI LLM.") @@ -346,6 +523,7 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred help="Base endpoint URL (for Ollama only).") parser.add_argument('-o', '--options', type=str, default=DEFAULT_OPTIONS, help="Ollama REST API options (JSON string).") + parser.add_argument("-se", "--sampling-enabled", type=int, default=1, help="Enable or disable sampling.") parser.add_argument('-l', '--log-level', type=int, default=OllamaLogger.ERROR, help="Specify log level") parser.add_argument('-f', '--log-filename', type=str, default="complete.log", @@ -391,7 +569,7 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred else: modelname = DEFAULT_OPENAI_MODEL baseurl = args.url or None - response = generate_code_completion_openai(prompt, baseurl, modelname, options, args.keyname) + response = generate_code_completion_openai(prompt, baseurl, modelname, options, args.sampling_enabled, args.keyname) elif args.provider == "openai_legacy": if args.model: modelname = args.model @@ -399,6 +577,20 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred modelname = DEFAULT_OPENAI_MODEL baseurl = args.url or None response = generate_code_completion_openai_legacy(prompt, baseurl, modelname, options, args.keyname) + elif args.provider == "claude": + if args.model: + modelname = args.model + else: + modelname = DEFAULT_CLAUDE_MODEL + baseurl = args.url or None + response = generate_code_completion_claude(prompt, baseurl, modelname, options, args.keyname) + elif args.provider == "openai_responses": + if args.model: + modelname = args.model + else: + modelname = DEFAULT_OPENAI_RESPONSES_MODEL + baseurl = args.url or None + response = generate_code_completion_openai_responses(prompt, baseurl, modelname, options, args.keyname) else: log.error(f"Unknown provider: {args.provider}") sys.exit(1)