☰', elem_id='gr-hover')
with gr.Column(scale=10, elem_id='chat-input-container'):
- shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
+ shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder=t('Send a message'), file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
shared.gradio['typing-dots'] = gr.HTML(value='
', label='typing', elem_id='typing-container')
with gr.Column(scale=1, elem_id='generate-stop-container'):
with gr.Row():
- shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop', visible=False)
- shared.gradio['Generate'] = gr.Button('Send', elem_id='Generate', variant='primary')
+ shared.gradio['Stop'] = gr.Button(t('Stop'), elem_id='stop', visible=False)
+ shared.gradio['Generate'] = gr.Button(t('Send'), elem_id='Generate', variant='primary')
# Hover menu buttons
with gr.Column(elem_id='chat-buttons'):
- shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
- shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
- shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
- shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
- shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
- shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
- shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
- shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
+ shared.gradio['Regenerate'] = gr.Button(t('Regenerate (Ctrl + Enter)'), elem_id='Regenerate')
+ shared.gradio['Continue'] = gr.Button(t('Continue (Alt + Enter)'), elem_id='Continue')
+ shared.gradio['Remove last'] = gr.Button(t('Remove last reply (Ctrl + Shift + Backspace)'), elem_id='Remove-last')
+ shared.gradio['Impersonate'] = gr.Button(t('Impersonate (Ctrl + Shift + M)'), elem_id='Impersonate')
+ shared.gradio['Send dummy message'] = gr.Button(t('Send dummy message'))
+ shared.gradio['Send dummy reply'] = gr.Button(t('Send dummy reply'))
+ shared.gradio['send-chat-to-notebook'] = gr.Button(t('Send to Notebook'))
+ shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label=t('Show controls (Ctrl+S)'), elem_id='show-controls')
with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row():
- shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
+ shared.gradio['start_with'] = gr.Textbox(label=t('Start reply with'), placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
gr.HTML("")
- shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')
- shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='For models with thinking support.')
+ shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label=t('Reasoning effort'), info=t('Used by GPT-OSS.'))
+ shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label=t('Enable thinking'), info=t('For models with thinking support.'))
gr.HTML("")
- shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
+ shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label=t('Activate web search'), elem_id='web-search')
with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
- shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
+ shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label=t('Number of pages to download'), minimum=1, maximum=10)
gr.HTML("")
@@ -108,18 +109,18 @@ def sync_web_tools(selected):
gr.HTML("")
with gr.Row():
- shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='In instruct and chat-instruct modes, the template under Parameters > Instruction template is used.', elem_id='chat-mode')
+ shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label=t('Mode'), info=t('In instruct and chat-instruct modes, the template under Parameters > Instruction template is used.'), elem_id='chat-mode')
with gr.Row():
- shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
+ shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label=t('Chat style'), value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
with gr.Row():
- shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
+ shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label=t('Command for chat-instruct mode'), info=t('<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.'), visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
gr.HTML("")
with gr.Row():
- shared.gradio['count_tokens'] = gr.Button('Count tokens', size='sm')
+ shared.gradio['count_tokens'] = gr.Button(t('Count tokens'), size='sm')
shared.gradio['token_display'] = gr.HTML(value='', elem_classes='token-display')
@@ -137,72 +138,72 @@ def sync_web_tools(selected):
def create_character_settings_ui():
mu = shared.args.multi_user
- with gr.Tab('Character', elem_id="character-tab"):
+ with gr.Tab(t('Character'), elem_id="character-tab"):
with gr.Row():
with gr.Column(scale=8):
- with gr.Tab("Character"):
+ with gr.Tab(t("Character")):
with gr.Row():
- shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
+ shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label=t('Character'), elem_id='character-menu', info=t('Used in chat and chat-instruct modes.'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')
+ shared.gradio['restore_character'] = gr.Button(t('Restore character'), elem_classes='refresh-button', interactive=True, elem_id='restore-character')
- shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
- shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'], elem_id="character-context")
- shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'], elem_id="character-greeting")
+ shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label=t('Character\'s name'))
+ shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label=t('Context'), elem_classes=['add_scrollbar'], elem_id="character-context")
+ shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label=t('Greeting'), elem_classes=['add_scrollbar'], elem_id="character-greeting")
- with gr.Tab("User"):
+ with gr.Tab(t("User")):
with gr.Row():
- shared.gradio['user_menu'] = gr.Dropdown(value=shared.settings['user'], choices=utils.get_available_users(), label='User', elem_id='user-menu', info='Select a user profile.', elem_classes='slim-dropdown')
+ shared.gradio['user_menu'] = gr.Dropdown(value=shared.settings['user'], choices=utils.get_available_users(), label=t('User'), elem_id='user-menu', info=t('Select a user profile.'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['user_menu'], lambda: None, lambda: {'choices': utils.get_available_users()}, 'refresh-button', interactive=not mu)
shared.gradio['save_user'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-user", interactive=not mu)
shared.gradio['delete_user'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
- shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label='Description', info='Here you can optionally write a description of yourself.', placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar'], elem_id="user-description")
+ shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label=t('Name'))
+ shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label=t('Description'), info=t('Here you can optionally write a description of yourself.'), placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar'], elem_id="user-description")
- with gr.Tab('Chat history'):
+ with gr.Tab(t('Chat history')):
with gr.Row():
with gr.Column():
- shared.gradio['save_chat_history'] = gr.Button(value='Save history')
+ shared.gradio['save_chat_history'] = gr.Button(value=t('Save history'))
with gr.Column():
- shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')
+ shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label=t('Upload History JSON'))
- with gr.Tab('Upload character'):
- with gr.Tab('YAML or JSON'):
+ with gr.Tab(t('Upload character')):
+ with gr.Tab(t('YAML or JSON')):
with gr.Row():
- shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)
- shared.gradio['upload_img_bot'] = gr.Image(type='filepath', label='Profile Picture (optional)', interactive=not mu)
+ shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label=t('JSON or YAML File'), interactive=not mu)
+ shared.gradio['upload_img_bot'] = gr.Image(type='filepath', label=t('Profile Picture (optional)'), interactive=not mu)
- shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)
+ shared.gradio['Submit character'] = gr.Button(value=t('Submit'), interactive=False)
with gr.Tab('TavernAI PNG'):
with gr.Row():
with gr.Column():
- shared.gradio['upload_img_tavern'] = gr.Image(type='filepath', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)
+ shared.gradio['upload_img_tavern'] = gr.Image(type='filepath', label=t('TavernAI PNG File'), elem_id='upload_img_tavern', interactive=not mu)
shared.gradio['tavern_json'] = gr.State()
with gr.Column():
- shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
- shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label='Description', interactive=False, elem_classes=['add_scrollbar'])
+ shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label=t('Name'), interactive=False)
+ shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label=t('Description'), interactive=False, elem_classes=['add_scrollbar'])
- shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)
+ shared.gradio['Submit tavern character'] = gr.Button(value=t('Submit'), interactive=False)
with gr.Column(scale=1):
- shared.gradio['character_picture'] = gr.Image(label='Character picture', type='filepath', interactive=not mu)
- shared.gradio['your_picture'] = gr.Image(label='Your picture', type='filepath', value=Image.open(shared.user_data_dir / 'cache' / 'pfp_me.png') if (shared.user_data_dir / 'cache' / 'pfp_me.png').exists() else None, interactive=not mu)
+ shared.gradio['character_picture'] = gr.Image(label=t('Character picture'), type='filepath', interactive=not mu)
+ shared.gradio['your_picture'] = gr.Image(label=t('Your picture'), type='filepath', value=Image.open(shared.user_data_dir / 'cache' / 'pfp_me.png') if (shared.user_data_dir / 'cache' / 'pfp_me.png').exists() else None, interactive=not mu)
def create_chat_settings_ui():
mu = shared.args.multi_user
- with gr.Tab('Instruction template'):
+ with gr.Tab(t('Instruction template')):
with gr.Row():
with gr.Column():
with gr.Row():
- shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info="After selecting the template, click on \"Load\" to load and apply it.", value='None', elem_classes='slim-dropdown')
+ shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label=t('Saved instruction templates'), info=t("After selecting the template, click on \"Load\" to load and apply it."), value='None', elem_classes=['slim-dropdown'])
ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
- shared.gradio['load_template'] = gr.Button("Load", elem_classes='refresh-button')
+ shared.gradio['load_template'] = gr.Button(t("Load"), elem_classes='refresh-button')
shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
@@ -211,12 +212,12 @@ def create_chat_settings_ui():
with gr.Row():
with gr.Column():
- shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'], elem_id='instruction-template-str')
+ shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label=t('Instruction template'), lines=24, info=t("This gets autodetected; you usually don't need to change it. Used in instruct and chat-instruct modes."), elem_classes=['add_scrollbar', 'monospace'], elem_id='instruction-template-str')
with gr.Row():
- shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
+ shared.gradio['send_instruction_to_notebook'] = gr.Button(t('Send to notebook'), elem_classes=['small-button'])
with gr.Column():
- shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace'], info='Defines how the chat prompt in chat/chat-instruct modes is generated.', elem_id='chat-template-str')
+ shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label=t('Chat template'), lines=22, elem_classes=['add_scrollbar', 'monospace'], info=t('Defines how the chat prompt in chat/chat-instruct modes is generated.'), elem_id='chat-template-str')
def create_event_handlers():
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 16505afa55..9ab87e2fca 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -21,126 +21,127 @@
update_model_parameters
)
from modules.utils import gradio
+from modules.i18n import t
def create_ui():
mu = shared.args.multi_user
- with gr.Tab("Model", elem_id="model-tab"):
+ with gr.Tab(t("Model"), elem_id="model-tab"):
with gr.Row():
with gr.Column():
with gr.Row():
- shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
+ shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label=t('Model'), elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
- shared.gradio['load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
- shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
- shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['load_model'] = gr.Button(t("Load"), elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['unload_model'] = gr.Button(t("Unload"), elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['save_model_settings'] = gr.Button(t("Save settings"), elem_classes='refresh-button', interactive=not mu)
- shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
+ shared.gradio['loader'] = gr.Dropdown(label=t("Model loader"), choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
with gr.Blocks():
- gr.Markdown("## Main options")
+ gr.Markdown(t("## Main options"))
with gr.Row():
with gr.Column():
- shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=-1, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Number of layers to offload to the GPU. -1 = auto.')
- shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=0, maximum=1048576, step=1024, value=shared.args.ctx_size, info='Context length. 0 = auto for llama.cpp (requires gpu-layers=-1), 8192 for other loaders. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
- shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
- shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
- shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
- shared.gradio['fit_target'] = gr.Textbox(label='fit-target', value=shared.args.fit_target, info='Target VRAM margin per device for auto GPU layers (MiB). Comma-separated list for multiple devices.')
- shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')
+ shared.gradio['gpu_layers'] = gr.Slider(label=t("gpu-layers"), minimum=-1, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info=t('Number of layers to offload to the GPU. -1 = auto.'))
+ shared.gradio['ctx_size'] = gr.Slider(label=t('ctx-size'), minimum=0, maximum=1048576, step=1024, value=shared.args.ctx_size, info=t('Context length. 0 = auto for llama.cpp (requires gpu-layers=-1), 8192 for other loaders. Common values: 4096, 8192, 16384, 32768, 65536, 131072.'))
+ shared.gradio['gpu_split'] = gr.Textbox(label=t('gpu-split'), info=t('Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7'))
+ shared.gradio['attn_implementation'] = gr.Dropdown(label=t("attn-implementation"), choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info=t('Attention implementation.'))
+ shared.gradio['cache_type'] = gr.Dropdown(label=t("cache-type"), choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info=t('Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).'))
+ shared.gradio['fit_target'] = gr.Textbox(label=t('fit-target'), value=shared.args.fit_target, info=t('Target VRAM margin per device for auto GPU layers (MiB). Comma-separated list for multiple devices.'))
+ shared.gradio['tp_backend'] = gr.Dropdown(label=t("tp-backend"), choices=['native', 'nccl'], value=shared.args.tp_backend, info=t('The backend for tensor parallelism.'))
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
if not shared.args.portable:
- shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
+ shared.gradio['ik'] = gr.Checkbox(label=t("ik"), value=shared.args.ik, info=t('Use ik_llama.cpp instead of upstream llama.cpp.'))
- shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
- shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
+ shared.gradio['cpu_moe'] = gr.Checkbox(label=t("cpu-moe"), value=shared.args.cpu_moe, info=t('Move the experts to the CPU. Saves VRAM on MoE models.'))
+ shared.gradio['streaming_llm'] = gr.Checkbox(label=t("streaming-llm"), value=shared.args.streaming_llm, info=t('Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.'))
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
- shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
- shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
+ shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info=t('Used by load-in-4bit.'))
+ shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info=t('Enable tensor parallelism (TP).'))
shared.gradio['tensorrt_llm_info'] = gr.Markdown(
- '* TensorRT-LLM has to be installed manually: `pip install tensorrt_llm==1.1.0 --extra-index-url https://pypi.nvidia.com`.\n\n'
- '* You can load either a pre-built TensorRT engine or a regular HF model. '
- 'HF models will be compiled to a TensorRT engine automatically on each load (this can take a while).'
+ t('* TensorRT-LLM has to be installed manually: `pip install tensorrt_llm==1.1.0 --extra-index-url https://pypi.nvidia.com`.\n\n')
+ + t('* You can load either a pre-built TensorRT engine or a regular HF model. ')
+ + t('HF models will be compiled to a TensorRT engine automatically on each load (this can take a while).')
)
# Multimodal
- with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
+ with gr.Accordion(t("Multimodal (vision)"), open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
with gr.Row():
- shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
+ shared.gradio['mmproj'] = gr.Dropdown(label=t("mmproj file"), choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f"{t('Select a file that matches your model. Must be placed in')} {shared.user_data_dir}/mmproj/", interactive=not mu)
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
# Speculative decoding
- with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
- shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
+ with gr.Accordion(t("Speculative decoding"), open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
+ shared.gradio['draft_max'] = gr.Number(label=t("draft-max"), precision=0, step=1, value=shared.args.draft_max, info=t('Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.'))
- gr.Markdown('#### Draft model')
+ gr.Markdown(t('#### Draft model'))
with gr.Row():
- shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Must share the same vocabulary as the main model.', interactive=not mu)
+ shared.gradio['model_draft'] = gr.Dropdown(label=t("model-draft"), choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info=t('Draft model. Must share the same vocabulary as the main model.'), interactive=not mu)
ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)
- shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
- shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
- shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
+ shared.gradio['gpu_layers_draft'] = gr.Slider(label=t("gpu-layers-draft"), minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info=t('Number of layers to offload to the GPU for the draft model.'))
+ shared.gradio['device_draft'] = gr.Textbox(label=t("device-draft"), value=shared.args.device_draft, info=t('Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1'))
+ shared.gradio['ctx_size_draft'] = gr.Number(label=t("ctx-size-draft"), precision=0, step=256, value=shared.args.ctx_size_draft, info=t('Size of the prompt context for the draft model. If 0, uses the same as the main model.'))
- shared.gradio['ngram_header'] = gr.Markdown('#### N-gram (draftless)')
- shared.gradio['spec_type'] = gr.Dropdown(label="spec-type", choices=['none', 'ngram-mod', 'ngram-simple', 'ngram-map-k', 'ngram-map-k4v', 'ngram-cache'], value=shared.args.spec_type, info='Draftless speculative decoding type. Recommended: ngram-mod.')
- shared.gradio['spec_ngram_size_n'] = gr.Number(label="spec-ngram-size-n", precision=0, step=1, value=shared.args.spec_ngram_size_n, info='N-gram lookup size for speculative decoding.', visible=shared.args.spec_type != 'none')
- shared.gradio['spec_ngram_size_m'] = gr.Number(label="spec-ngram-size-m", precision=0, step=1, value=shared.args.spec_ngram_size_m, info='Draft n-gram size for speculative decoding.', visible=shared.args.spec_type != 'none')
- shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
+ shared.gradio['ngram_header'] = gr.Markdown(t('#### N-gram (draftless)'))
+ shared.gradio['spec_type'] = gr.Dropdown(label=t("spec-type"), choices=['none', 'ngram-mod', 'ngram-simple', 'ngram-map-k', 'ngram-map-k4v', 'ngram-cache'], value=shared.args.spec_type, info=t('Draftless speculative decoding type. Recommended: ngram-mod.'))
+ shared.gradio['spec_ngram_size_n'] = gr.Number(label=t("spec-ngram-size-n"), precision=0, step=1, value=shared.args.spec_ngram_size_n, info=t('N-gram lookup size for speculative decoding.'), visible=shared.args.spec_type != 'none')
+ shared.gradio['spec_ngram_size_m'] = gr.Number(label=t("spec-ngram-size-m"), precision=0, step=1, value=shared.args.spec_ngram_size_m, info=t('Draft n-gram size for speculative decoding.'), visible=shared.args.spec_type != 'none')
+ shared.gradio['spec_ngram_min_hits'] = gr.Number(label=t("spec-ngram-min-hits"), precision=0, step=1, value=shared.args.spec_ngram_min_hits, info=t('Minimum n-gram hits for ngram-map speculative decoding.'), visible=shared.args.spec_type != 'none')
- gr.Markdown("## Other options")
- with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
+ gr.Markdown(t("## Other options"))
+ with gr.Accordion(t("See more options"), open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
- shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
- shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
- shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
- shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
- shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
- shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
- shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Extra flags to pass to llama-server. Example: --jinja --rpc 192.168.1.100:50052', value=shared.args.extra_flags)
- shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
- shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
- shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
+ shared.gradio['parallel'] = gr.Slider(label=t("parallel"), minimum=1, step=1, maximum=64, value=shared.args.parallel, info=t('Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.'))
+ shared.gradio['threads'] = gr.Slider(label=t("threads"), minimum=0, step=1, maximum=256, value=shared.args.threads)
+ shared.gradio['threads_batch'] = gr.Slider(label=t("threads_batch"), minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
+ shared.gradio['batch_size'] = gr.Slider(label=t("batch_size"), minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
+ shared.gradio['ubatch_size'] = gr.Slider(label=t("ubatch_size"), minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
+ shared.gradio['tensor_split'] = gr.Textbox(label=t('tensor_split'), info=t('List of proportions to split the model across multiple GPUs. Example: 60,40'))
+ shared.gradio['extra_flags'] = gr.Textbox(label=t('extra-flags'), info=t('Extra flags to pass to llama-server. Example: --jinja --rpc 192.168.1.100:50052'), value=shared.args.extra_flags)
+ shared.gradio['cpu_memory'] = gr.Number(label=t("Maximum CPU memory in GiB. Use this for CPU offloading."), value=shared.args.cpu_memory)
+ shared.gradio['compute_dtype'] = gr.Dropdown(label=t("compute_dtype"), choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info=t('Used by load-in-4bit.'))
+ shared.gradio['quant_type'] = gr.Dropdown(label=t("quant_type"), choices=["nf4", "fp4"], value=shared.args.quant_type, info=t('Used by load-in-4bit.'))
with gr.Column():
- shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
+ shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info=t('Use PyTorch in CPU mode.'))
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
- shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
- shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
- shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
- shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
- shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
+ shared.gradio['row_split'] = gr.Checkbox(label=t("row_split"), value=shared.args.row_split, info=t('Split the model by rows across GPUs. This may improve multi-gpu performance.'))
+ shared.gradio['no_kv_offload'] = gr.Checkbox(label=t("no_kv_offload"), value=shared.args.no_kv_offload, info=t('Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.'))
+ shared.gradio['no_mmap'] = gr.Checkbox(label=t("no-mmap"), value=shared.args.no_mmap)
+ shared.gradio['mlock'] = gr.Checkbox(label=t("mlock"), value=shared.args.mlock)
+ shared.gradio['numa'] = gr.Checkbox(label=t("numa"), value=shared.args.numa, info=t('NUMA support can help on some systems with non-uniform memory access.'))
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
- shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
- shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
+ shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info=t('Necessary to use CFG with this loader.'))
+ shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info=t('Set use_fast=False while loading the tokenizer.'))
if not shared.args.portable:
with gr.Row():
- shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)
+ shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label=t('LoRA(s)'), elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button', interactive=not mu)
- shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['lora_menu_apply'] = gr.Button(value=t('Apply LoRAs'), elem_classes='refresh-button', interactive=not mu)
with gr.Column():
- with gr.Tab("Download"):
- shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
- shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
+ with gr.Tab(t("Download")):
+ shared.gradio['custom_model_menu'] = gr.Textbox(label=t("Download model or LoRA"), info=t("Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box."), interactive=not mu)
+ shared.gradio['download_specific_file'] = gr.Textbox(placeholder=t("File name (for GGUF models)"), show_label=False, max_lines=1, interactive=not mu)
with gr.Row():
- shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
- shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
+ shared.gradio['download_model_button'] = gr.Button(t("Download"), variant='primary', interactive=not mu)
+ shared.gradio['get_file_list'] = gr.Button(t("Get file list"), interactive=not mu)
- with gr.Tab("Customize instruction template"):
+ with gr.Tab(t("Customize instruction template")):
with gr.Row():
- shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
+ shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label=t('Select the desired instruction template'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
- shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
- gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's metadata, which sometimes is wrong.")
+ shared.gradio['customized_template_submit'] = gr.Button(t("Submit"), variant="primary", interactive=not mu)
+ gr.Markdown(t("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's metadata, which sometimes is wrong."))
with gr.Row():
- shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
+ shared.gradio['model_status'] = gr.Markdown(t('No model is loaded') if shared.model_name == 'None' else t('Ready'))
def create_event_handlers():
@@ -257,7 +258,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
return
if not repo_id:
- yield "Please enter a model path."
+ yield t("Please enter a model path.")
progress(0.0)
return
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 5411b29427..81aa5d147e 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -4,107 +4,108 @@
from modules import loaders, presets, shared, ui, ui_chat, utils
from modules.utils import gradio
+from modules.i18n import t
def create_ui():
mu = shared.args.multi_user
- with gr.Tab("Parameters", elem_id="parameters"):
- with gr.Tab("Generation"):
+ with gr.Tab(t("Parameters"), elem_id="parameters"):
+ with gr.Tab(t("Generation")):
with gr.Row():
with gr.Column():
with gr.Row():
- shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')
+ shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label=t('Preset'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)
- shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)
+ shared.gradio['reset_preset'] = gr.Button(t('Restore preset'), elem_classes='refresh-button', interactive=True)
+ shared.gradio['neutralize_samplers'] = gr.Button(t('Neutralize samplers'), elem_classes='refresh-button', interactive=True)
with gr.Column():
- shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
+ shared.gradio['filter_by_loader'] = gr.Dropdown(label=t("Filter by loader"), choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
with gr.Row():
with gr.Column():
with gr.Row():
with gr.Column():
- gr.Markdown('## Curve shape')
- shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')
- shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])
- shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])
- shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
- shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
- shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
- shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
-
- gr.Markdown('## Curve cutoff')
- shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')
- shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')
- shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
- shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')
- shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')
- shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
- shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
+ gr.Markdown(t('## Curve shape'))
+ shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label=t('temperature'))
+ shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label=t('dynatemp_low'), visible=shared.settings['dynamic_temperature'])
+ shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label=t('dynatemp_high'), visible=shared.settings['dynamic_temperature'])
+ shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label=t('dynatemp_exponent'), visible=shared.settings['dynamic_temperature'])
+ shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info=t('Activates Quadratic Sampling.'))
+ shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info=t('Adjusts the dropoff curve of Quadratic Sampling.'))
+ shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label=t('dynamic_temperature'))
+
+ gr.Markdown(t('## Curve cutoff'))
+ shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label=t('top_p'))
+ shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label=t('top_k'))
+ shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label=t('min_p'))
+ shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label=t('top_n_sigma'))
+ shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label=t('typical_p'))
+ shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label=t('xtc_threshold'), info=t('If 2 or more tokens have probability above this threshold, consider removing all but the last one.'))
+ shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label=t('xtc_probability'), info=t('Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.'))
shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')
shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')
shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')
- gr.Markdown('## Repetition suppression')
- shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
- shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
- shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
- shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')
- shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')
- shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')
+ gr.Markdown(t('## Repetition suppression'))
+ shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label=t('dry_multiplier'), info=t('Set to greater than 0 to enable DRY. Recommended value: 0.8.'))
+ shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label=t('dry_allowed_length'), info=t('Longest sequence that can be repeated without being penalized.'))
+ shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label=t('dry_base'), info=t('Controls how fast the penalty grows with increasing sequence length.'))
+ shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label=t('repetition_penalty'))
+ shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label=t('frequency_penalty'))
+ shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label=t('presence_penalty'))
shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')
- shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')
+ shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label=t('repetition_penalty_range'))
with gr.Column():
- gr.Markdown('## Alternative sampling methods')
- shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
- shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
- shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
- shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')
- shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')
- shared.gradio['adaptive_target'] = gr.Slider(0.0, 1.0, value=shared.settings['adaptive_target'], step=0.01, label='adaptive_target', info='Target probability for adaptive-p sampling. Tokens near this probability are favored. 0 disables.')
- shared.gradio['adaptive_decay'] = gr.Slider(0.0, 0.99, value=shared.settings['adaptive_decay'], step=0.01, label='adaptive_decay', info='EMA decay rate for adaptive-p. Controls history window (~1/(1-decay) tokens). Default: 0.9.')
-
- gr.Markdown('## Other options')
+ gr.Markdown(t('## Alternative sampling methods'))
+ shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label=t('penalty_alpha'), info=t('For Contrastive Search. do_sample must be unchecked.'))
+ shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label=t('guidance_scale'), info=t('For CFG. 1.5 is a good value.'))
+ shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label=t('mirostat_mode'), info=t('mode=1 is for llama.cpp only.'))
+ shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label=t('mirostat_tau'))
+ shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label=t('mirostat_eta'))
+ shared.gradio['adaptive_target'] = gr.Slider(0.0, 1.0, value=shared.settings['adaptive_target'], step=0.01, label=t('adaptive_target'), info=t('Target probability for adaptive-p sampling. Tokens near this probability are favored. 0 disables.'))
+ shared.gradio['adaptive_decay'] = gr.Slider(0.0, 0.99, value=shared.settings['adaptive_decay'], step=0.01, label=t('adaptive_decay'), info=t('EMA decay rate for adaptive-p. Controls history window (~1/(1-decay) tokens). Default: 0.9.'))
+
+ gr.Markdown(t('## Other options'))
shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
- shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
- shared.gradio['sampler_priority'] = gr.DragDrop(value=shared.settings['sampler_priority'], label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
- shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
+ shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label=t('temperature_last'), info=t('Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".'))
+ shared.gradio['sampler_priority'] = gr.DragDrop(value=shared.settings['sampler_priority'], label=t('Sampler priority'), info=t('Parameter names separated by new lines or commas.'), elem_classes=['add_scrollbar'])
+ shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label=t('dry_sequence_breakers'), info=t('Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.'))
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Blocks():
- shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
- shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
- shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+ shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label=t('max_new_tokens'), info=t('⚠️ Setting this too high can cause prompt truncation.'))
+ shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info=t('Activates Prompt Lookup Decoding.'))
+ shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label=t('Maximum tokens/second'), info=t('To make text readable in real time.'))
- shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
- shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
- shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Only applies to text completion (notebook). In chat mode, templates control BOS tokens.')
- shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
- shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
- shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')
+ shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label=t('auto_max_new_tokens'), info=t('Expand max_new_tokens to the available context length.'))
+ shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label=t('Ban the eos_token'), info=t('Forces the model to never end the generation prematurely.'))
+ shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label=t('Add the bos_token to the beginning of prompts'), info=t('Only applies to text completion (notebook). In chat mode, templates control BOS tokens.'))
+ shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info=t('Some specific models need this unset.'))
+ shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label=t('Activate text streaming'))
+ shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label=t('Static KV cache'), info=t('Use a static cache for improved performance.'))
with gr.Column():
- shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
- shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
- shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
- shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
- shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
- shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
+ shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label=t('Truncate the prompt up to this length'), info=t('The leftmost tokens are removed if the prompt exceeds this length.'))
+ shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label=t('Seed (-1 for random)'))
+ shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label=t('Custom system message'), info=t('If not empty, will be used instead of the default one.'), elem_classes=['add_scrollbar'])
+ shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label=t('Custom stopping strings'), info=t('Written between \"\" and separated by commas.'), placeholder='"\\n", "\\nYou:"')
+ shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label=t('Token bans'), info=t('Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.'))
+ shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label=t('Negative prompt'), info=t('For CFG. Only used when guidance_scale is different than 1.'), lines=3, elem_classes=['add_scrollbar'])
with gr.Row() as shared.gradio['grammar_file_row']:
- shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
+ shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label=t('Load grammar from file (.gbnf)'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
+ shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label=t('Grammar'), lines=16, elem_classes=['add_scrollbar', 'monospace'])
ui_chat.create_chat_settings_ui()
From e2a8eb936756767f8264fb03efb09577e58e9311 Mon Sep 17 00:00:00 2001
From: ystartgo
Date: Sun, 15 Feb 2026 20:50:50 +0800
Subject: [PATCH 11/12] lint
---
modules/i18n.py | 4 ++--
modules/ui_model_menu.py | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/i18n.py b/modules/i18n.py
index 4fb8f60336..a379dee0a8 100644
--- a/modules/i18n.py
+++ b/modules/i18n.py
@@ -95,7 +95,7 @@
"Send to notebook": "發送到筆記本",
"Send to Notebook": "發送到筆記本",
"Chat template": "聊天模板",
- "Send": "傳送",
+ # duplicate key removed; keep single mapping for "Send"
"Regenerate (Ctrl + Enter)": "重新生成(Ctrl + Enter)",
"Continue (Alt + Enter)": "繼續(Alt + Enter)",
"Remove last reply (Ctrl + Shift + Backspace)": "移除上一則回覆(Ctrl + Shift + Backspace)",
@@ -253,7 +253,7 @@
"top_k": "top_k Top-k",
"typical_p": "typical_p Typical-p",
"dry_multiplier": "dry 乘數",
- "dry_allowed_length": "允許重複長度",
+ # duplicate key removed; keep prefixed form above
"Good morning!": "早安!",
"Good afternoon!": "午安!",
"Good evening!": "晚安!",
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 9ab87e2fca..e847477a45 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -343,7 +343,7 @@ def downloader_thread_target():
specific_file=specific_file
)
update_queue.put(("COMPLETED", f"Model successfully saved to `{output_folder}/`."))
- except Exception as e:
+ except Exception:
tb_str = traceback.format_exc().replace('\n', '\n\n')
update_queue.put(("ERROR", tb_str))
@@ -378,7 +378,7 @@ def downloader_thread_target():
download_thread.join()
- except Exception as e:
+ except Exception:
progress(0.0)
tb_str = traceback.format_exc().replace('\n', '\n\n')
yield tb_str
From aa77a3ac9b1992f7e887cf113a3554f869b79f34 Mon Sep 17 00:00:00 2001
From: ystartgo
Date: Sun, 15 Feb 2026 21:02:04 +0800
Subject: [PATCH 12/12] fix
---
modules/i18n.py | 10 ++++++++++
modules/ui_model_menu.py | 15 +++++++++++----
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/modules/i18n.py b/modules/i18n.py
index a379dee0a8..d3e3ab860c 100644
--- a/modules/i18n.py
+++ b/modules/i18n.py
@@ -107,6 +107,16 @@
"Reasoning effort": "推理強度",
"Enable thinking": "啟用思考",
"For models with thinking support.": "適用於支援思考的模型。",
+ "compute_dtype": "compute_dtype 計算精度",
+ "quant_type": "quant_type 量化類型",
+ "Number of experts per token": "每個詞元的專家數",
+ "cpu": "cpu CPU 模式",
+ "disk": "disk 硬碟卸載",
+ "bf16": "bf16 bfloat16",
+ "no_flash_attn": "no_flash_attn 停用 Flash-Attn",
+ "no_xformers": "no_xformers 停用 xFormers",
+ "no_sdpa": "no_sdpa 停用 SDPA",
+ "cfg-cache": "cfg-cache CFG 快取",
"Activate web search": "啟用網頁搜尋",
"Number of pages to download": "下載頁數",
"Mode": "模式",
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index e847477a45..d0603e320b 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -104,19 +104,26 @@ def create_ui():
shared.gradio['tensor_split'] = gr.Textbox(label=t('tensor_split'), info=t('List of proportions to split the model across multiple GPUs. Example: 60,40'))
shared.gradio['extra_flags'] = gr.Textbox(label=t('extra-flags'), info=t('Extra flags to pass to llama-server. Example: --jinja --rpc 192.168.1.100:50052'), value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label=t("Maximum CPU memory in GiB. Use this for CPU offloading."), value=shared.args.cpu_memory)
+ shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info=t('Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.'))
+ shared.gradio['rope_freq_base'] = gr.Number(label=t('rope_freq_base'), value=shared.args.rope_freq_base, precision=0, info=t('Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.'))
+ shared.gradio['compress_pos_emb'] = gr.Number(label=t('compress_pos_emb'), value=shared.args.compress_pos_emb, precision=2, info=t("Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale."))
shared.gradio['compute_dtype'] = gr.Dropdown(label=t("compute_dtype"), choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info=t('Used by load-in-4bit.'))
shared.gradio['quant_type'] = gr.Dropdown(label=t("quant_type"), choices=["nf4", "fp4"], value=shared.args.quant_type, info=t('Used by load-in-4bit.'))
+ shared.gradio['num_experts_per_token'] = gr.Number(label=t("Number of experts per token"), value=shared.args.num_experts_per_token, info=t('Only applies to MoE models like Mixtral.'))
with gr.Column():
- shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info=t('Use PyTorch in CPU mode.'))
- shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
+ shared.gradio['cpu'] = gr.Checkbox(label=t("cpu"), value=shared.args.cpu, info=t('Use PyTorch in CPU mode.'))
+ shared.gradio['disk'] = gr.Checkbox(label=t("disk"), value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label=t("row_split"), value=shared.args.row_split, info=t('Split the model by rows across GPUs. This may improve multi-gpu performance.'))
shared.gradio['no_kv_offload'] = gr.Checkbox(label=t("no_kv_offload"), value=shared.args.no_kv_offload, info=t('Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.'))
shared.gradio['no_mmap'] = gr.Checkbox(label=t("no-mmap"), value=shared.args.no_mmap)
shared.gradio['mlock'] = gr.Checkbox(label=t("mlock"), value=shared.args.mlock)
shared.gradio['numa'] = gr.Checkbox(label=t("numa"), value=shared.args.numa, info=t('NUMA support can help on some systems with non-uniform memory access.'))
- shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
- shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info=t('Necessary to use CFG with this loader.'))
+ shared.gradio['bf16'] = gr.Checkbox(label=t("bf16"), value=shared.args.bf16)
+ shared.gradio['no_flash_attn'] = gr.Checkbox(label=t("no_flash_attn"), value=shared.args.no_flash_attn)
+ shared.gradio['no_xformers'] = gr.Checkbox(label=t("no_xformers"), value=shared.args.no_xformers)
+ shared.gradio['no_sdpa'] = gr.Checkbox(label=t("no_sdpa"), value=shared.args.no_sdpa)
+ shared.gradio['cfg_cache'] = gr.Checkbox(label=t("cfg-cache"), value=shared.args.cfg_cache, info=t('Necessary to use CFG with this loader.'))
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info=t('Set use_fast=False while loading the tokenizer.'))
if not shared.args.portable:
with gr.Row():