From 6b08a250d84cb9e0ca3f5bc65f6787b10fd237a6 Mon Sep 17 00:00:00 2001
From: Ignazio De Santis <ignaziodesantisofficial@gmail.com>
Date: Sun, 29 Mar 2026 06:17:47 +0800
Subject: [PATCH] fix: add explicit utf-8 encoding to file open calls

Add encoding='utf-8' to open() calls across the codebase to prevent
UnicodeDecodeError on non-English Windows systems where the default
encoding is not UTF-8 (e.g., CP950 for Traditional Chinese).

Files fixed:
- magentic-one-cli/_m1.py (config file reading)
- autogen-ext/code_executors/docker_jupyter/_docker_jupyter.py (HTML writing)
- autogen-studio/database/schema_manager.py (alembic config read/write)
- autogen-studio/web/auth/manager.py (YAML config reading)
- autogen-studio/lite/studio.py (env file writing)
- autogen-studio/cli.py (env file writing)
- autogen-studio/gallery/builder.py (JSON file writing)
- agbench run_cmd.py, tabulate_cmd.py, remove_missing_cmd.py (log/config reading)
- agbench linter cli.py and oai_coder.py (file read/write)
- agbench benchmarks process_logs.py, custom_tabulate.py (log reading)
- fixup_generated_files.py (generated file read/write)

Fixes #5566
---
 python/fixup_generated_files.py                |  4 ++--
 .../benchmarks/GAIA/Scripts/custom_tabulate.py |  4 ++--
 .../agbench/benchmarks/process_logs.py         | 18 +++++++++---------
 .../packages/agbench/src/agbench/linter/cli.py |  2 +-
 .../src/agbench/linter/coders/oai_coder.py     |  4 ++--
 .../agbench/src/agbench/remove_missing_cmd.py  |  2 +-
 python/packages/agbench/src/agbench/run_cmd.py |  8 ++++----
 .../agbench/src/agbench/tabulate_cmd.py        |  4 ++--
 .../docker_jupyter/_docker_jupyter.py          |  2 +-
 .../autogen-studio/autogenstudio/cli.py        |  2 +-
 .../autogenstudio/database/schema_manager.py   | 14 +++++++-------
 .../autogenstudio/gallery/builder.py           |  2 +-
 .../autogenstudio/lite/studio.py               |  2 +-
 .../autogenstudio/web/auth/manager.py          |  2 +-
 .../src/magentic_one_cli/_m1.py                |  4 ++--
 15 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/python/fixup_generated_files.py b/python/fixup_generated_files.py
index 58db7c98b7d5..8fa91aa919bf 100644
--- a/python/fixup_generated_files.py
+++ b/python/fixup_generated_files.py
@@ -24,12 +24,12 @@
 
 def main():
     for file in files:
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf-8") as f:
             content = f.read()
 
         print("Fixing imports in file:", file)
         for old, new in substitutions.items():
             content = content.replace(old, new)
 
-        with open(file, "w") as f:
+        with open(file, "w", encoding="utf-8") as f:
             f.write(content)
diff --git a/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py b/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py
index 1b23ee219f7f..33b88481dd5d 100644
--- a/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py
+++ b/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py
@@ -131,7 +131,7 @@ def scorer(instance_dir):
         return None
 
     expected_answer = None
-    with open(expected_answer_file, "rt") as fh:
+    with open(expected_answer_file, "rt", encoding="utf-8") as fh:
         expected_answer = fh.read().strip()
 
     # Read the console
@@ -140,7 +140,7 @@ def scorer(instance_dir):
         return None
 
     console_log = ""
-    with open(console_log_file, "rt") as fh:
+    with open(console_log_file, "rt", encoding="utf-8") as fh:
         console_log = fh.read()
 
         final_answer = None 
diff --git a/python/packages/agbench/benchmarks/process_logs.py b/python/packages/agbench/benchmarks/process_logs.py
index e9aa52532f82..d1327c3d74b8 100644
--- a/python/packages/agbench/benchmarks/process_logs.py
+++ b/python/packages/agbench/benchmarks/process_logs.py
@@ -96,7 +96,7 @@ def scorer(instance_dir, benchmark_name):
         if not os.path.isfile(expected_answer_file):
             return None
 
-        with open(expected_answer_file, "rt") as fh:
+        with open(expected_answer_file, "rt", encoding="utf-8") as fh:
             expected_answer = fh.read().strip()
 
         # Read the console log
@@ -104,7 +104,7 @@ def scorer(instance_dir, benchmark_name):
         if not os.path.isfile(console_log_file):
             return None
 
-        with open(console_log_file, "rt") as fh:
+        with open(console_log_file, "rt", encoding="utf-8") as fh:
             console_log = fh.read()
             final_answer = None
             m = re.search(r"FINAL ANSWER:(.*?)\n", console_log, re.DOTALL)
@@ -125,7 +125,7 @@ def scorer(instance_dir, benchmark_name):
         if not os.path.isfile(console_log_file):
             return None
 
-        with open(console_log_file, "rt") as fh:
+        with open(console_log_file, "rt", encoding="utf-8") as fh:
             console_log = fh.read()
             final_score = None
             m = re.search(r"FINAL SCORE:(.*?)\n", console_log, re.DOTALL)
@@ -145,7 +145,7 @@ def get_number_of_chat_messages(chat_messages_dir):
     # Count the number of chat messages in the chat_messages_dir
     result = 0
     for file in glob.glob(f"{chat_messages_dir}/*_messages.json"):
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf-8") as f:
             content = json.load(f)
             for agent, messages in content.items():
                 result += len(messages)
@@ -158,7 +158,7 @@ def did_agent_stall(instance_dir):
     if not os.path.isfile(log_file_path):
         return None
     # Stalled.... Replanning...
-    with open(log_file_path, "r") as f:
+    with open(log_file_path, "r", encoding="utf-8") as f:
         for line in f:
             if "Stalled.... Replanning..." in line:
                 return True
@@ -172,7 +172,7 @@ def get_message_logs(instance_dir):
         return None
     messages = []
     # for each line, convert to dict, check if it has a message and source key, and append to messages
-    with open(log_file_path, "r") as f:
+    with open(log_file_path, "r", encoding="utf-8") as f:
         for line in f:
             line_dict = json.loads(line)
             if "message" in line_dict and "source" in line_dict:
@@ -186,13 +186,13 @@ def get_task_information(instance_dir, benchmark_name):
         prompt_file = os.path.join(instance_dir, "prompt.txt")
         if not os.path.isfile(prompt_file):
             return None
-        with open(prompt_file, "r") as f:
+        with open(prompt_file, "r", encoding="utf-8") as f:
             return f.read().strip()
     elif benchmark_name == "webarena":
         task_prompt_file = os.path.join(instance_dir, "task_prompt.json")
         if not os.path.isfile(task_prompt_file):
             return None
-        with open(task_prompt_file, "r") as f:
+        with open(task_prompt_file, "r", encoding="utf-8") as f:
             return json.load(f)["intent"]
     else:
         raise ValueError(f"Unsupported benchmark_name: {benchmark_name}")
@@ -204,7 +204,7 @@ def is_progress_not_being_made(instance_dir):
     log_file_path = os.path.join(instance_dir, "log.jsonl")
     if not os.path.isfile(log_file_path):
         return None
-    with open(log_file_path, "r") as f:
+    with open(log_file_path, "r", encoding="utf-8") as f:
         for line in f:
             line_dict = json.loads(line)
             if (
diff --git a/python/packages/agbench/src/agbench/linter/cli.py b/python/packages/agbench/src/agbench/linter/cli.py
index 14f428929b17..73fd1b11c6c7 100644
--- a/python/packages/agbench/src/agbench/linter/cli.py
+++ b/python/packages/agbench/src/agbench/linter/cli.py
@@ -19,7 +19,7 @@ def prepend_line_numbers(lines: List[str]) -> List[str]:
 
 
 def load_log_file(path: str, prepend_numbers: bool = False) -> Document:
-    with open(path, "r") as f:
+    with open(path, "r", encoding="utf-8") as f:
         lines = f.readlines()
     if prepend_numbers:
         lines = prepend_line_numbers(lines)
diff --git a/python/packages/agbench/src/agbench/linter/coders/oai_coder.py b/python/packages/agbench/src/agbench/linter/coders/oai_coder.py
index 01322e0c5ccc..b293ad632d01 100644
--- a/python/packages/agbench/src/agbench/linter/coders/oai_coder.py
+++ b/python/packages/agbench/src/agbench/linter/coders/oai_coder.py
@@ -41,7 +41,7 @@ def code_document(
             if not os.path.exists(self.cache_dir):
                 os.makedirs(self.cache_dir)
             if cache_file and os.path.exists(cache_file):
-                with open(cache_file, "r") as f:
+                with open(cache_file, "r", encoding="utf-8") as f:
                     cached_coded_doc_json = f.read()
                     return CodedDocument.from_json(cached_coded_doc_json)
 
@@ -203,6 +203,6 @@ def code_document(
             raise ValueError("Error in coding document with OpenAI")
 
         if self.cache_enabled and cache_file:
-            with open(cache_file, "w") as f:
+            with open(cache_file, "w", encoding="utf-8") as f:
                 f.write(coded_document.model_dump_json(indent=4))
         return coded_document
diff --git a/python/packages/agbench/src/agbench/remove_missing_cmd.py b/python/packages/agbench/src/agbench/remove_missing_cmd.py
index 21c9a6aba572..53bc511a2e33 100644
--- a/python/packages/agbench/src/agbench/remove_missing_cmd.py
+++ b/python/packages/agbench/src/agbench/remove_missing_cmd.py
@@ -11,7 +11,7 @@ def default_scorer(instance_dir: str) -> bool:
     """
     console_log = os.path.join(instance_dir, "console_log.txt")
     if os.path.isfile(console_log):
-        with open(console_log, "rt") as fh:
+        with open(console_log, "rt", encoding="utf-8") as fh:
             content = fh.read()
             # Use a regular expression to match the expected ending pattern
             has_final_answer = "FINAL ANSWER:" in content
diff --git a/python/packages/agbench/src/agbench/run_cmd.py b/python/packages/agbench/src/agbench/run_cmd.py
index 55f181360d0f..82c0c2c5b964 100644
--- a/python/packages/agbench/src/agbench/run_cmd.py
+++ b/python/packages/agbench/src/agbench/run_cmd.py
@@ -295,10 +295,10 @@ def get_scenario_env(token_provider: Optional[Callable[[], str]] = None, env_fil
     if env_file is None:
         # Env file was not specified, so read the default, or warn if the default file is missing.
         if os.path.isfile(DEFAULT_ENV_FILE_YAML):
-            with open(DEFAULT_ENV_FILE_YAML, "r") as fh:
+            with open(DEFAULT_ENV_FILE_YAML, "r", encoding="utf-8") as fh:
                 env_file_contents = yaml.safe_load(fh)
         elif os.path.isfile(DEFAULT_ENV_FILE_JSON):
-            with open(DEFAULT_ENV_FILE_JSON, "rt") as fh:
+            with open(DEFAULT_ENV_FILE_JSON, "rt", encoding="utf-8") as fh:
                 env_file_contents = json.loads(fh.read())
             logging.warning(f"JSON environment files are deprecated. Migrate to '{DEFAULT_ENV_FILE_YAML}'")
         else:
@@ -307,7 +307,7 @@ def get_scenario_env(token_provider: Optional[Callable[[], str]] = None, env_fil
             )
     else:
         # Env file was specified. Throw an error if the file can't be read.
-        with open(env_file, "rt") as fh:
+        with open(env_file, "rt", encoding="utf-8") as fh:
             if env_file.endswith(".json"):
                 logging.warning("JSON environment files are deprecated. Migrate to YAML")
                 env_file_contents = json.loads(fh.read())
@@ -737,7 +737,7 @@ def split_jsonl(file_path: str, num_parts: int) -> List[List[Dict[str, Any]]]:
     """
     Split a JSONL file into num_parts approximately equal parts.
     """
-    with open(file_path, "r") as f:
+    with open(file_path, "r", encoding="utf-8") as f:
         data = [json.loads(line) for line in f]
 
     random.shuffle(data)  # Shuffle the data for better distribution
diff --git a/python/packages/agbench/src/agbench/tabulate_cmd.py b/python/packages/agbench/src/agbench/tabulate_cmd.py
index e5ee93db00c8..b44e40af4147 100644
--- a/python/packages/agbench/src/agbench/tabulate_cmd.py
+++ b/python/packages/agbench/src/agbench/tabulate_cmd.py
@@ -68,7 +68,7 @@ def find_tabulate_module(search_dir: str, stop_dir: Optional[str] = None) -> Opt
 def default_scorer(instance_dir: str, success_strings: List[str] = SUCCESS_STRINGS) -> Optional[bool]:
     console_log = os.path.join(instance_dir, "console_log.txt")
     if os.path.isfile(console_log):
-        with open(console_log, "rt") as fh:
+        with open(console_log, "rt", encoding="utf-8") as fh:
             content = fh.read()
 
             # It succeeded
@@ -90,7 +90,7 @@ def default_scorer(instance_dir: str, success_strings: List[str] = SUCCESS_STRIN
 def default_timer(instance_dir: str, timer_regex: str = TIMER_REGEX) -> Optional[float]:
     console_log = os.path.join(instance_dir, "console_log.txt")
     if os.path.isfile(console_log):
-        with open(console_log, "rt") as fh:
+        with open(console_log, "rt", encoding="utf-8") as fh:
             content = fh.read()
 
             # It succeeded
diff --git a/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py b/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py
index a7dbccc43381..4ac5c3f19518 100644
--- a/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py
+++ b/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py
@@ -275,7 +275,7 @@ def _save_html(self, html_data: str) -> str:
         """Save html data to a file."""
         filename = f"{uuid.uuid4().hex}.html"
         path = os.path.join(str(self._output_dir), filename)
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write(html_data)
         return os.path.abspath(path)
 
diff --git a/python/packages/autogen-studio/autogenstudio/cli.py b/python/packages/autogen-studio/autogenstudio/cli.py
index 373f13e087f2..aa92013c98df 100644
--- a/python/packages/autogen-studio/autogenstudio/cli.py
+++ b/python/packages/autogen-studio/autogenstudio/cli.py
@@ -69,7 +69,7 @@ def ui(
 
     # Create temporary env file to share configuration with uvicorn workers
     env_file_path = get_env_file_path()
-    with open(env_file_path, "w") as temp_env:
+    with open(env_file_path, "w", encoding="utf-8") as temp_env:
         for key, value in env_vars.items():
             temp_env.write(f"{key}={value}\n")
 
diff --git a/python/packages/autogen-studio/autogenstudio/database/schema_manager.py b/python/packages/autogen-studio/autogenstudio/database/schema_manager.py
index 0762b0890d30..6b942354c4ce 100644
--- a/python/packages/autogen-studio/autogenstudio/database/schema_manager.py
+++ b/python/packages/autogen-studio/autogenstudio/database/schema_manager.py
@@ -75,7 +75,7 @@ def _update_configuration(self) -> None:
 
         # Update alembic.ini
         config_content = self._generate_alembic_ini_content()
-        with open(self.alembic_ini_path, "w") as f:
+        with open(self.alembic_ini_path, "w", encoding="utf-8") as f:
             f.write(config_content)
 
         # Update env.py
@@ -115,7 +115,7 @@ def _initialize_alembic(self) -> bool:
 
             # Create initial config file for alembic init
             config_content = self._generate_alembic_ini_content()
-            with open(self.alembic_ini_path, "w") as f:
+            with open(self.alembic_ini_path, "w", encoding="utf-8") as f:
                 f.write(config_content)
 
             # Use the config we just created
@@ -187,7 +187,7 @@ def run_migrations_online() -> None:
 else:
     run_migrations_online()"""
 
-        with open(env_path, "w") as f:
+        with open(env_path, "w", encoding="utf-8") as f:
             f.write(content)
 
     def _generate_alembic_ini_content(self) -> str:
@@ -239,7 +239,7 @@ def update_script_template(self):
         """Update the Alembic script template to include SQLModel."""
         template_path = self.alembic_dir / "script.py.mako"
         try:
-            with open(template_path, "r") as f:
+            with open(template_path, "r", encoding="utf-8") as f:
                 content = f.read()
 
             # Add sqlmodel import to imports section
@@ -248,7 +248,7 @@ def update_script_template(self):
 
             content = content.replace(import_section, new_imports)
 
-            with open(template_path, "w") as f:
+            with open(template_path, "w", encoding="utf-8") as f:
                 f.write(content)
 
             return True
@@ -265,7 +265,7 @@ def _update_env_py(self, env_path: Path) -> None:
             self._create_minimal_env_py(env_path)
             return
         try:
-            with open(env_path, "r") as f:
+            with open(env_path, "r", encoding="utf-8") as f:
                 content = f.read()
 
             # Add SQLModel import if not present
@@ -303,7 +303,7 @@ def _update_env_py(self, env_path: Path) -> None:
             )""",
             )
 
-            with open(env_path, "w") as f:
+            with open(env_path, "w", encoding="utf-8") as f:
                 f.write(content)
         except Exception as e:
             logger.error(f"Failed to update env.py: {e}")
diff --git a/python/packages/autogen-studio/autogenstudio/gallery/builder.py b/python/packages/autogen-studio/autogenstudio/gallery/builder.py
index 55a124367dd4..b83456ec0ee6 100644
--- a/python/packages/autogen-studio/autogenstudio/gallery/builder.py
+++ b/python/packages/autogen-studio/autogenstudio/gallery/builder.py
@@ -630,5 +630,5 @@ def create_default_lite_team():
     gallery = create_default_gallery()
 
     # Save to file
-    with open("gallery_default.json", "w") as f:
+    with open("gallery_default.json", "w", encoding="utf-8") as f:
         f.write(gallery.model_dump_json(indent=2))
diff --git a/python/packages/autogen-studio/autogenstudio/lite/studio.py b/python/packages/autogen-studio/autogenstudio/lite/studio.py
index 94b25cd85b6e..6ff0b4011f82 100644
--- a/python/packages/autogen-studio/autogenstudio/lite/studio.py
+++ b/python/packages/autogen-studio/autogenstudio/lite/studio.py
@@ -151,7 +151,7 @@ def _setup_environment(self) -> str:
         }
 
         env_file_path = self._get_env_file_path()
-        with open(env_file_path, "w") as temp_env:
+        with open(env_file_path, "w", encoding="utf-8") as temp_env:
             for key, value in env_vars.items():
                 temp_env.write(f"{key}={value}\n")
 
diff --git a/python/packages/autogen-studio/autogenstudio/web/auth/manager.py b/python/packages/autogen-studio/autogenstudio/web/auth/manager.py
index ab16e0432d0a..de391e093261 100644
--- a/python/packages/autogen-studio/autogenstudio/web/auth/manager.py
+++ b/python/packages/autogen-studio/autogenstudio/web/auth/manager.py
@@ -117,7 +117,7 @@ def is_valid_token(self, token: str) -> bool:
     def from_yaml(cls, yaml_path: str) -> Self:
         """Create AuthManager from YAML config file."""
         try:
-            with open(yaml_path, "r") as f:
+            with open(yaml_path, "r", encoding="utf-8") as f:
                 config_data = yaml.safe_load(f)
             config = AuthConfig(**config_data)
             return cls(config)
diff --git a/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py b/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py
index 1b159da4e91d..0a45711334d4 100644
--- a/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py
+++ b/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py
@@ -97,12 +97,12 @@ def main() -> None:
 
     if args.config is None:
         if os.path.isfile(DEFAULT_CONFIG_FILE):
-            with open(DEFAULT_CONFIG_FILE, "r") as f:
+            with open(DEFAULT_CONFIG_FILE, "r", encoding="utf-8") as f:
                 config = yaml.safe_load(f)
         else:
             config = yaml.safe_load(DEFAULT_CONFIG_CONTENTS)
     else:
-        with open(args.config if isinstance(args.config, str) else args.config[0], "r") as f:
+        with open(args.config if isinstance(args.config, str) else args.config[0], "r", encoding="utf-8") as f:
             config = yaml.safe_load(f)
 
     # Run the task