diff --git a/autoload/ollama.vim b/autoload/ollama.vim
index 9cc389b..c4598ca 100644
--- a/autoload/ollama.vim
+++ b/autoload/ollama.vim
@@ -221,11 +221,24 @@ function! ollama#GetSuggestion(timer)
           \ "Connecting to Ollama on " .. g:ollama_host
           \ .. " using model " .. g:ollama_model)
     call ollama#logger#Debug("model_options=" .. l:model_options)
+
+    if exists('g:ollama_model_sampling_denylist')
+            \ && len(g:ollama_model_sampling_denylist) > 0
+            \ && index(g:ollama_model_sampling_denylist, g:ollama_model) >= 0
+        let l:sampling_enabled = 0
+    else
+        let l:sampling_enabled = 1
+    endif
+    call ollama#logger#Debug("sampling_enabled=" .. l:sampling_enabled)
+
     " Convert plugin debug level to python logger levels
     let l:log_level = ollama#logger#PythonLogLevel(g:ollama_debug)
     let l:base_url = g:ollama_host
     if g:ollama_model_provider =~ '^openai'
         let l:base_url = g:ollama_openai_baseurl
+    elseif g:ollama_model_provider == 'claude'
+        " Claude uses default Anthropic API, don't set base_url
+        let l:base_url = ''
     endif
     " Adjust the command to use the prompt as stdin input
     let l:command = [ g:ollama_python_interpreter,
@@ -234,6 +247,7 @@ function! ollama#GetSuggestion(timer)
         \ "-m", g:ollama_model,
         \ "-u", l:base_url,
         \ "-o", l:model_options,
+        \ "-se", l:sampling_enabled,
         \ "-l", l:log_level
         \ ]
     " Add optional credentialname for looking up the API key
@@ -247,6 +261,11 @@ function! ollama#GetSuggestion(timer)
             " add credentialname option for Mistral
             let l:command += [ '-k', g:ollama_mistral_credentialname ]
         endif
+    elseif g:ollama_model_provider == 'claude'
+        if exists('g:ollama_claude_credentialname') && g:ollama_claude_credentialname != ''
+            " add credentialname option for Claude
+            let l:command += [ '-k', g:ollama_claude_credentialname ]
+        endif
     endif
     call ollama#logger#Debug("command=" .. join(l:command, " "))
     let l:job_options = {
diff --git a/autoload/ollama/review.vim b/autoload/ollama/review.vim
index ee89a7b..7b34bff 100644
--- a/autoload/ollama/review.vim
+++ b/autoload/ollama/review.vim
@@ -56,6 +56,9 @@ function! s:FindBufferWindow(bufnr)
 endfunction
 
 function! s:StartChat(lines) abort
+    " Counter for reducing redraw frequency
+    let s:token_count = 0
+
     " Function handling a line of text that has been typed.
     func! TextEntered(text)
         call ollama#logger#Debug("TextEntered: " .. a:text)
@@ -65,12 +68,13 @@ function! s:StartChat(lines) abort
         endif
         " Send the text to a shell with Enter appended.
         call ch_sendraw(s:job, a:text .. "\n")
+        " Reset token count for new request
+        let s:token_count = 0
     endfunc
 
-    " Function handling output from the shell: Add it above the prompt.
-    func! GotOutput(channel, msg)
+    " OLD VERSION: Append each token as a new line (non-streaming)
+    func! GotOutputOld(channel, msg)
         call ollama#logger#Debug("GotOutput: " .. a:msg)
-
         " append lines
         let l:lines = split(a:msg, "\n")
         for l:line in l:lines
@@ -96,6 +100,91 @@ function! s:StartChat(lines) abort
         endfor
     endfunc
 
+    " NEW VERSION: Stream tokens on the same line with real-time cursor tracking
+    func! GotOutputNew(channel, msg)
+        " call ollama#logger#Debug("GotOutput: [" .. a:msg .. "]")
+
+        " Check for <EOT> marker
+        let l:idx = stridx(a:msg, "<EOT>")
+        let l:is_eot = l:idx != -1
+        let l:content = l:is_eot ? strpart(a:msg, 0, l:idx) : a:msg
+
+        " Append content to the last line for streaming effect
+        let l:updated_line_num = 0
+        let l:updated_line_content = ""
+        let l:line_count = 0
+
+        if !empty(l:content)
+            " Get buffer line count efficiently
+            let l:buf_info = getbufinfo(s:buf)[0]
+            let l:line_count = l:buf_info.linecount
+            " call ollama#logger#Debug("line_count=" .. l:line_count)
+
+            if l:line_count == 0
+                " Buffer is empty, append as new line
+                " call ollama#logger#Debug("Buffer empty, appending first line")
+                call appendbufline(s:buf, 0, l:content)
+                let l:updated_line_num = 1
+                let l:updated_line_content = l:content
+            else
+                " Get only the last line (much faster than getting all lines)
+                let l:last_line = getbufline(s:buf, l:line_count, l:line_count)[0]
+                let l:updated_line_content = l:last_line .. l:content
+                " call ollama#logger#Debug("Appending to line " .. l:line_count .. ": '" .. l:last_line .. "' + '" .. l:content .. "'")
+                call setbufline(s:buf, l:line_count, l:updated_line_content)
+                let l:updated_line_num = l:line_count
+            endif
+        endif
+
+        " When streaming is done, add a new line for the next input
+        if l:is_eot
+            " call ollama#logger#Debug("EOT received, adding newline")
+            call appendbufline(s:buf, "$", "")
+            " Reuse line_count if we already got it, otherwise fetch
+            if l:line_count > 0
+                let l:updated_line_num = l:line_count + 1
+            else
+                let l:buf_info = getbufinfo(s:buf)[0]
+                let l:updated_line_num = l:buf_info.linecount
+            endif
+            let l:updated_line_content = ""
+        endif
+
+        " Update cursor position if this is the active chat window
+        if bufname() == s:ollama_bufname " Check if current active window is Ollama Chat
+            let l:winid = bufwinid(s:buf)
+            if l:winid != -1 && l:updated_line_num > 0
+                " Set cursor position directly (much faster than feedkeys)
+                let l:col = len(l:updated_line_content) + 1
+                call win_execute(l:winid, 'call cursor(' . l:updated_line_num . ', ' . l:col . ')')
+
+                " Increment token counter and only redraw every N tokens (or always for EOT)
+                let s:token_count += 1
+                if l:is_eot || s:token_count % 5 == 0
+                    redraw
+                endif
+
+                if l:is_eot
+                    " Streaming done, enter insert mode
+                    if mode() == 'i'
+                        call feedkeys("\<Esc>")
+                    endif
+                    call feedkeys("a")
+                endif
+            endif
+        endif
+    endfunc
+
+    " Wrapper function that delegates to new version by default
+    " To use old version, set g:ollama_use_old_output = 1
+    func! GotOutput(channel, msg)
+        if exists('g:ollama_use_old_output') && g:ollama_use_old_output
+            call GotOutputOld(a:channel, a:msg)
+        else
+            call GotOutputNew(a:channel, a:msg)
+        endif
+    endfunc
+
     " Function handling output from the shell: Add it above the prompt.
     func! GotErrors(channel, msg)
         call ollama#logger#Debug("GotErrors: " .. a:msg)
@@ -136,9 +225,18 @@ function! s:StartChat(lines) abort
     endfunc
 
     let l:model_options = json_encode(g:ollama_chat_options)
-    call ollama#logger#Debug("Connecting to Ollama on " .. g:ollama_host .. " using model " .. g:ollama_model)
+    call ollama#logger#Debug("Chat Connecting to Ollama on " .. g:ollama_host .. " using model " .. g:ollama_model)
     call ollama#logger#Debug("model_options=" .. l:model_options)
 
+    if exists('g:ollama_model_sampling_denylist')
+            \ && len(g:ollama_model_sampling_denylist) > 0
+            \ && index(g:ollama_model_sampling_denylist, g:ollama_chat_model) >= 0
+        let l:sampling_enabled = 0
+    else
+        let l:sampling_enabled = 1
+    endif
+    call ollama#logger#Debug("sampling_enabled=" .. l:sampling_enabled)
+
     " Convert plugin debug level to python logger levels
     let l:log_level = ollama#logger#PythonLogLevel(g:ollama_debug)
     let l:base_url = g:ollama_host
@@ -154,6 +252,7 @@ function! s:StartChat(lines) abort
                 \ '-m', g:ollama_chat_model,
                 \ '-u', l:base_url,
                 \ '-o', l:model_options,
+                \ "-se", l:sampling_enabled,
                 \ '-t', g:ollama_chat_timeout,
                 \ '-l', l:log_level ]
     " Check if a system prompt was configured
@@ -205,7 +304,7 @@ function! s:StartChat(lines) abort
         silent execute 'new' l:bufname
     endif
     " Set the filetype to ollama-chat
-"    setlocal filetype=ollama-chat
+    " setlocal filetype=ollama-chat
     setlocal filetype=markdown
     setlocal buftype=prompt
     " enable BufDelete event when closing buffer usig :q!
diff --git a/plugin/ollama.vim b/plugin/ollama.vim
index 7945af4..8dfecb3 100644
--- a/plugin/ollama.vim
+++ b/plugin/ollama.vim
@@ -67,6 +67,10 @@ if !exists('g:ollama_openai_credentialname')
     " UNIX Pass credential name to lookup API key for OpenAI service
     let g:ollama_openai_credentialname = ''
 endif
+if !exists('g:ollama_claude_credentialname')
+    " UNIX Pass credential name to lookup API key for Anthropic Claude service
+    let g:ollama_claude_credentialname = ''
+endif
 " Tab completion specific settings
 if !exists('g:ollama_debounce_time')
     let g:ollama_debounce_time = 500
@@ -98,6 +102,10 @@ if !exists('g:ollama_model_options')
                 \ 'max_tokens': 500
                 \ }
 endif
+if !exists('g:ollama_model_sampling_denylist')
+    " default model sampling denylist
+  let g:ollama_model_sampling_denylist = []
+endif
 " Chat specific settings
 if !exists('g:ollama_chat_provider')
     " Provider for chat models: 'ollama' or 'openai'
diff --git a/python/OllamaCredentials.py b/python/OllamaCredentials.py
index 3affe2a..37595f3 100644
--- a/python/OllamaCredentials.py
+++ b/python/OllamaCredentials.py
@@ -18,6 +18,7 @@ def GetApiKey(self, provider: str, credentialname: str | None) -> str:
           - 'openai'         → use OPENAI_API_KEY env var or pass entry
           - 'openai_legacy'  → same as 'openai', kept for compatibility
           - 'mistral'        → use MISTRAL_API_KEY env var or pass entry
+          - 'anthropic'      → use ANTHROPIC_API_KEY env var or pass entry
 
         Priority:
           1. Environment variable override
@@ -36,6 +37,8 @@ def GetApiKey(self, provider: str, credentialname: str | None) -> str:
             env_var = "OPENAI_API_KEY"
         elif provider == "mistral":
             env_var = "MISTRAL_API_KEY"
+        elif provider == "anthropic":
+            env_var = "ANTHROPIC_API_KEY"
         else:
             raise ValueError(f"Unknown provider: {provider}")
 
diff --git a/python/chat.py b/python/chat.py
index c947c2e..fde4f2e 100755
--- a/python/chat.py
+++ b/python/chat.py
@@ -63,7 +63,9 @@ async def stream_chat_message_ollama(messages, endpoint, model, options, timeout
                             if "message" in message and "content" in message["message"]:
                                 content = message["message"]["content"]
                                 assistant_message += content
-                                print(content, end="", flush=True)
+                                # Print each token followed by newline so Vim's out_cb receives it immediately
+                                # VimScript will need to handle concatenating tokens on the same line
+                                print(content, flush=True)
 
                                 # If <EOT> is detected, stop processing
                                 if "<EOT>" in content:
@@ -90,7 +92,7 @@ async def stream_chat_message_ollama(messages, endpoint, model, options, timeout
         messages.append({"role": "assistant", "content": assistant_message.strip()})
 
 
-async def stream_chat_message_openai(messages, endpoint, model, options, credentialname):
+async def stream_chat_message_openai(messages, endpoint, model, options, sampling_enabled, credentialname):
     """Stream chat responses from OpenAI API."""
     if AsyncOpenAI is None:
         raise ImportError("OpenAI package not found. Please install via 'pip install openai'.")
@@ -114,20 +116,30 @@ async def stream_chat_message_openai(messages, endpoint, model, options, credent
     top_p = options.get('top_p', 1.0)
 
     try:
-        stream = await client.chat.completions.create(
-            model=model,
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_p=top_p,
-            stream=True,
-        )
+        # Build request parameters
+        request_params = {
+            'model': model,
+            'messages': messages,
+            'stream': True,
+        }
+
+        # Check if model supports sampling parameters
+        if sampling_enabled:
+            request_params['temperature'] = temperature
+            request_params['top_p'] = top_p
+            request_params['max_tokens'] = max_tokens
+        else:
+            request_params['max_completion_tokens'] = max_tokens
+
+        stream = await client.chat.completions.create(**request_params)
 
         async for chunk in stream:
             if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
                 token = chunk.choices[0].delta.content
                 assistant_message += token
-                print(token, end="", flush=True)
+                # Print each token followed by newline so Vim's out_cb receives it immediately
+                # VimScript will need to handle concatenating tokens on the same line
+                print(token, flush=True)
 
         print("<EOT>", flush=True)
 
@@ -139,7 +151,7 @@ async def stream_chat_message_openai(messages, endpoint, model, options, credent
         messages.append({"role": "assistant", "content": assistant_message.strip()})
 
 
-async def main(provider, endpoint, model, options, systemprompt, timeout, credentialname):
+async def main(provider, endpoint, model, options, sampling_enabled, systemprompt, timeout, credentialname):
     conversation_history = []
     log.debug("endpoint: " + str(endpoint))
 
@@ -169,7 +181,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
                         )
                     else:
                         task = asyncio.create_task(
-                            stream_chat_message_openai(conversation_history, endpoint, model, options, credentialname)
+                            stream_chat_message_openai(conversation_history, endpoint, model, options, sampling_enabled, credentialname)
                         )
                     await task
                 else:
@@ -189,7 +201,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
                         )
                     else:
                         task = asyncio.create_task(
-                            stream_chat_message_openai(conversation_history, endpoint, model, options, credentialname)
+                            stream_chat_message_openai(conversation_history, endpoint, model, options, sampling_enabled, credentialname)
                         )
                     await task
 
@@ -213,6 +225,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
                         help="Base endpoint URL.")
     parser.add_argument("-o", "--options", type=str, default=DEFAULT_OPTIONS,
                         help="Ollama REST API options.")
+    parser.add_argument("-se", "--sampling-enabled", type=int, default=1, help="Enable or disable sampling.")
     parser.add_argument("-s", "--system-prompt", type=str, default="", help="Specify system prompt.")
     parser.add_argument("-t", "--timeout", type=int, default=DEFAULT_TIMEOUT, help="Timeout in seconds.")
     parser.add_argument("-l", "--log-level", type=int, default=OllamaLogger.ERROR, help="Log level.")
@@ -243,7 +256,7 @@ async def main(provider, endpoint, model, options, systemprompt, timeout, creden
     try:
         while True:
             try:
-                asyncio.run(main(args.provider, endpoint, model, options, args.system_prompt, args.timeout, args.keyname))
+                asyncio.run(main(args.provider, endpoint, model, options, args.sampling_enabled, args.system_prompt, args.timeout, args.keyname))
             except KeyboardInterrupt:
                 print("Canceled.")
                 break
diff --git a/python/complete.py b/python/complete.py
index 4145917..b92fe8f 100755
--- a/python/complete.py
+++ b/python/complete.py
@@ -23,13 +23,23 @@
 except ImportError:
     Mistral = None
 
+# try to load Anthropic package if it exists
+try:
+    from anthropic import Anthropic  # type: ignore
+except ImportError:
+    Anthropic = None
+
 # Default values
 DEFAULT_HOST = 'http://localhost:11434'
 DEFAULT_PROVIDER = 'ollama'
 DEFAULT_MODEL = 'codellama:code'
 DEFAULT_OPTIONS = '{ "temperature": 0, "top_p": 0.95 }'
+DEFAULT_TEMPERATURE = 0
+DEFAULT_MAX_TOKENS = 300
 DEFAULT_MISTRAL_MODEL = 'codestral-2501'
 DEFAULT_OPENAI_MODEL = 'gpt-4.1-mini'
+DEFAULT_OPENAI_RESPONSES_MODEL = 'gpt-5.1-codex'
+DEFAULT_CLAUDE_MODEL = 'claude-sonnet-4-20250514'
 
 # When set to true, we use our own templates and don't use the Ollama built-in templates.
 # Is is the only way to make this work reliable. As soon is this works also with Ollama
@@ -170,9 +180,9 @@ def generate_code_completion_mistral(prompt, baseurl, model, options, credential
     stop_marker = extract_stop_marker(suffix)
     stops = [stop_marker] if stop_marker else []
 
-    temperature = options.get('temperature', 0)
-#    min_tokens = options.get('min_tokens', 1)
-    max_tokens = options.get('max_tokens', 300)
+    temperature = options.get('temperature', DEFAULT_TEMPERATURE)
+    # min_tokens = options.get('min_tokens', 1)
+    max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS)
 
     log.debug('model: ' + str(model))
     log.debug('temperature: ' + str(temperature))
@@ -209,7 +219,37 @@ def extract_stop_marker(after: str) -> str | None:
             return line.rstrip()  # preserve indentation
     return None
 
-def generate_code_completion_openai(prompt, baseurl, model, options, credentialname):
+def _build_fim_prompt(before: str, after: str, lang: str = 'C') -> str:
+    """Build fill-in-the-middle prompt for models that don't support native FIM."""
+    return f"""Fill in the missing code between the markers below.
+
+Rules:
+- Do NOT repeat any code that appears in the AFTER section.
+- Return only the exact code that fits between BEFORE and AFTER.
+- Do NOT add explanations or comments.
+- Output the missing code only.
+
+Language: {lang}
+
+BEFORE:
+{before}
+
+AFTER:
+{after}
+"""
+
+def _strip_code_fences(text: str) -> str:
+    """Remove markdown code fence markers (```) from beginning and end of text."""
+    lines = text.splitlines()
+    if lines:
+        if lines[0].startswith("```"):
+            lines.pop(0)
+        if lines and lines[-1].startswith("```"):
+            lines.pop()
+        return "\n".join(lines)
+    return text
+
+def generate_code_completion_openai(prompt, baseurl, model, options, sampling_enabled, credentialname):
     """Generate code completion using OpenAI's official Python SDK"""
     if OpenAI is None:
         raise ImportError("OpenAI package not found. Please install via 'pip install openai'.")
@@ -256,21 +296,30 @@ def generate_code_completion_openai(prompt, baseurl, model, options, credentialn
     stop_marker = extract_stop_marker(after)
     stops = [stop_marker] if stop_marker else []
 
-    temperature = options.get('temperature', 0)
-    max_tokens = options.get('max_tokens', 300)
+    temperature = options.get('temperature', DEFAULT_TEMPERATURE)
+    max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS)
 
     log.debug('model: ' + str(model))
     log.debug('temperature: ' + str(temperature))
     log.debug('max_tokens: ' + str(max_tokens))
     log.debug('stops: ' + str(stops))
+    log.debug('sampling_enabled: ' + str(sampling_enabled))
     try:
-        response = client.chat.completions.create(
-            model=model,
-            messages=[{"role": "user", "content": full_prompt}],
-            temperature=temperature,
-            max_tokens=max_tokens,
-            stop=stops
-        )
+        # Build request parameters
+        request_params = {
+            'model': model,
+            'messages': [{"role": "user", "content": full_prompt}],
+        }
+
+        # Check if model supports sampling parameters
+        if sampling_enabled:
+            request_params['temperature'] = temperature
+            request_params['max_tokens'] = max_tokens
+            request_params['stop'] = stops
+        else:
+            request_params['max_completion_tokens'] = max_tokens
+
+        response = client.chat.completions.create(**request_params)
         response = response.choices[0].message.content.strip()
         log.debug('response: ' + response)
     except Exception as e:
@@ -279,19 +328,59 @@ def generate_code_completion_openai(prompt, baseurl, model, options, credentialn
         log.error(str(e))
         sys.exit(1)
 
-    # convert response to lines
-    lines = response.splitlines()
-    if lines:
-        # remove 1st element from array if it starts with ```
-        if lines[0].startswith("```"):
-            lines.pop(0)
-        # remove last element from array if it starts with ```
-        if lines[-1].startswith("```"):
-            lines.pop()
+    # Remove markdown code fences if present
+    return _strip_code_fences(response)
 
-        response = "\n".join(lines)
+def generate_code_completion_claude(prompt, baseurl, model, options, credentialname):
+    """Generate code completion using Anthropic Claude API"""
+    if Anthropic is None:
+        raise ImportError("Anthropic package not found. Please install via 'pip install anthropic'.")
 
-    return response
+    cred = OllamaCredentials()
+    api_key = cred.GetApiKey('anthropic', credentialname)
+
+    log.debug('Using Anthropic Claude API')
+    if baseurl:
+        log.debug(f'baseurl={baseurl}')
+        client = Anthropic(api_key=api_key, base_url=baseurl)
+    else:
+        log.debug(f'Using default Anthropic URL')
+        client = Anthropic(api_key=api_key)
+
+    parts = prompt.split('<FILL_IN_HERE>')
+    if len(parts) != 2:
+        log.error("Prompt must contain <FILL_IN_HERE> marker for Claude mode.")
+        sys.exit(1)
+
+    # Build FIM prompt using helper function
+    lang = options.get('lang', 'C')
+    full_prompt = _build_fim_prompt(parts[0], parts[1], lang)
+    log.debug('full_prompt: ' + full_prompt)
+
+    temperature = options.get('temperature', DEFAULT_TEMPERATURE)
+    max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS)
+
+    log.debug('model: ' + str(model))
+    log.debug('temperature: ' + str(temperature))
+    log.debug('max_tokens: ' + str(max_tokens))
+    
+    try:
+        response = client.messages.create(
+            model=model or DEFAULT_CLAUDE_MODEL,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            messages=[{"role": "user", "content": full_prompt}]
+        )
+        
+        response_text = response.content[0].text.strip()
+        log.debug('response: ' + response_text)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        log.error(str(e))
+        sys.exit(1)
+
+    # Remove markdown code fences if present
+    return _strip_code_fences(response_text)
 
 def generate_code_completion_openai_legacy(prompt, baseurl, model, options, credentialname):
     """Generate code completion using OpenAI's official Python SDK"""
@@ -318,8 +407,8 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred
     full_prompt = fill_in_the_middle(config, prompt)
     log.debug('full_prompt: ' + full_prompt)
 
-    temperature = options.get('temperature', 0)
-    max_tokens = options.get('max_tokens', 300)
+    temperature = options.get('temperature', DEFAULT_TEMPERATURE)
+    max_tokens = options.get('max_tokens', DEFAULT_MAX_TOKENS)
 
     log.debug('model: ' + str(model))
     log.debug('temperature: ' + str(temperature))
@@ -335,6 +424,94 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred
 
     return response.rstrip()
 
+def _extract_output_text_from_message(item):
+    """Extract output_text from a message item in OpenAI responses format"""
+    if item.get('type') == 'message' and item.get('status') == 'completed':
+        content = item.get('content', [])
+        for content_item in content:
+            if content_item.get('type') == 'output_text':
+                return content_item.get('text', '')
+    return None
+
+def generate_code_completion_openai_responses(prompt, baseurl, model, options, credentialname):
+    """Generate code completion using OpenAI's /v1/responses endpoint for GPT-5.1-Codex"""
+    if OpenAI is None:
+        raise ImportError("OpenAI package not found. Please install via 'pip install openai'.")
+
+    log.debug('Using OpenAI responses endpoint (for GPT-5.1-Codex)')
+
+    # Get API credentials
+    cred = OllamaCredentials()
+    api_key = cred.GetApiKey('openai', credentialname)
+    endpoint = f"{baseurl}/v1/responses" if baseurl else "https://api.openai.com/v1/responses"
+
+    # Parse prompt
+    parts = prompt.split('<FILL_IN_HERE>')
+    if len(parts) != 2:
+        log.error("Prompt must contain <FILL_IN_HERE> marker.")
+        sys.exit(1)
+
+    # Build FIM prompt using helper function
+    lang = options.get('lang', 'C')
+    full_input = _build_fim_prompt(parts[0], parts[1], lang)
+
+    max_output_tokens = options.get('max_completion_tokens', options.get('max_tokens', DEFAULT_MAX_TOKENS))
+    log.debug(f'endpoint: {endpoint}, model: {model}, max_output_tokens: {max_output_tokens}')
+
+    # Make API request
+    try:
+        response = requests.post(
+            endpoint,
+            headers={'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'},
+            json={'model': model, 'input': full_input, 'max_output_tokens': max_output_tokens}
+        )
+
+        if response.status_code != 200:
+            log.error(f'API error: {response.text}')
+        response.raise_for_status()
+
+        result = response.json()
+        log.debug('response: ' + json.dumps(result, indent=2))
+
+        # Extract completion from response (supports both list and dict formats)
+        completion = None
+        output_items = result.get('output', []) if isinstance(result, dict) else result if isinstance(result, list) else []
+
+        for item in output_items:
+            completion = _extract_output_text_from_message(item)
+            if completion:
+                break
+
+        # Check for incomplete responses
+        if not completion and isinstance(result, dict) and result.get('status') == 'incomplete':
+            log.warning(f'Response incomplete: {result.get("incomplete_details")}')
+
+        # Fallback for other formats
+        if not completion:
+            if isinstance(result, dict):
+                if 'text' in result:
+                    completion = result['text']
+                elif 'choices' in result and result['choices']:
+                    choice = result['choices'][0]
+                    completion = choice.get('text') or choice.get('message', {}).get('content')
+
+        if not completion:
+            log.error('Could not extract completion from response')
+            return ""
+
+        # Ensure completion is a string
+        if not isinstance(completion, str):
+            log.error(f'Completion is not a string, type: {type(completion)}, value: {completion}')
+            return ""
+
+        log.debug(f'Final completion: {completion}')
+        return completion.strip()
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        log.error(str(e))
+        sys.exit(1)
+
 if __name__ == "__main__":
     try:
         parser = argparse.ArgumentParser(description="Complete code using Ollama or OpenAI LLM.")
@@ -346,6 +523,7 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred
                             help="Base endpoint URL (for Ollama only).")
         parser.add_argument('-o', '--options', type=str, default=DEFAULT_OPTIONS,
                             help="Ollama REST API options (JSON string).")
+        parser.add_argument("-se", "--sampling-enabled", type=int, default=1, help="Enable or disable sampling.")
         parser.add_argument('-l', '--log-level', type=int, default=OllamaLogger.ERROR,
                             help="Specify log level")
         parser.add_argument('-f', '--log-filename', type=str, default="complete.log",
@@ -391,7 +569,7 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred
             else:
                 modelname = DEFAULT_OPENAI_MODEL
             baseurl = args.url or None
-            response = generate_code_completion_openai(prompt, baseurl, modelname, options, args.keyname)
+            response = generate_code_completion_openai(prompt, baseurl, modelname, options, args.sampling_enabled, args.keyname)
         elif args.provider == "openai_legacy":
             if args.model:
                 modelname = args.model
@@ -399,6 +577,20 @@ def generate_code_completion_openai_legacy(prompt, baseurl, model, options, cred
                 modelname = DEFAULT_OPENAI_MODEL
             baseurl = args.url or None
             response = generate_code_completion_openai_legacy(prompt, baseurl, modelname, options, args.keyname)
+        elif args.provider == "claude":
+            if args.model:
+                modelname = args.model
+            else:
+                modelname = DEFAULT_CLAUDE_MODEL
+            baseurl = args.url or None
+            response = generate_code_completion_claude(prompt, baseurl, modelname, options, args.keyname)
+        elif args.provider == "openai_responses":
+            if args.model:
+                modelname = args.model
+            else:
+                modelname = DEFAULT_OPENAI_RESPONSES_MODEL
+            baseurl = args.url or None
+            response = generate_code_completion_openai_responses(prompt, baseurl, modelname, options, args.keyname)
         else:
             log.error(f"Unknown provider: {args.provider}")
             sys.exit(1)