From 6b08a250d84cb9e0ca3f5bc65f6787b10fd237a6 Mon Sep 17 00:00:00 2001 From: Ignazio De Santis Date: Sun, 29 Mar 2026 06:17:47 +0800 Subject: [PATCH] fix: add explicit utf-8 encoding to file open calls Add encoding='utf-8' to open() calls across the codebase to prevent UnicodeDecodeError on non-English Windows systems where the default encoding is not UTF-8 (e.g., CP950 for Traditional Chinese). Files fixed: - magentic-one-cli/_m1.py (config file reading) - autogen-ext/code_executors/docker_jupyter/_docker_jupyter.py (HTML writing) - autogen-studio/database/schema_manager.py (alembic config read/write) - autogen-studio/web/auth/manager.py (YAML config reading) - autogen-studio/lite/studio.py (env file writing) - autogen-studio/cli.py (env file writing) - autogen-studio/gallery/builder.py (JSON file writing) - agbench run_cmd.py, tabulate_cmd.py, remove_missing_cmd.py (log/config reading) - agbench linter cli.py and oai_coder.py (file read/write) - agbench benchmarks process_logs.py, custom_tabulate.py (log reading) - fixup_generated_files.py (generated file read/write) Fixes #5566 --- python/fixup_generated_files.py | 4 ++-- .../benchmarks/GAIA/Scripts/custom_tabulate.py | 4 ++-- .../agbench/benchmarks/process_logs.py | 18 +++++++++--------- .../packages/agbench/src/agbench/linter/cli.py | 2 +- .../src/agbench/linter/coders/oai_coder.py | 4 ++-- .../agbench/src/agbench/remove_missing_cmd.py | 2 +- python/packages/agbench/src/agbench/run_cmd.py | 8 ++++---- .../agbench/src/agbench/tabulate_cmd.py | 4 ++-- .../docker_jupyter/_docker_jupyter.py | 2 +- .../autogen-studio/autogenstudio/cli.py | 2 +- .../autogenstudio/database/schema_manager.py | 14 +++++++------- .../autogenstudio/gallery/builder.py | 2 +- .../autogenstudio/lite/studio.py | 2 +- .../autogenstudio/web/auth/manager.py | 2 +- .../src/magentic_one_cli/_m1.py | 4 ++-- 15 files changed, 37 insertions(+), 37 deletions(-) diff --git a/python/fixup_generated_files.py b/python/fixup_generated_files.py index 58db7c98b7d5..8fa91aa919bf 100644 --- a/python/fixup_generated_files.py +++ b/python/fixup_generated_files.py @@ -24,12 +24,12 @@ def main(): for file in files: - with open(file, "r") as f: + with open(file, "r", encoding="utf-8") as f: content = f.read() print("Fixing imports in file:", file) for old, new in substitutions.items(): content = content.replace(old, new) - with open(file, "w") as f: + with open(file, "w", encoding="utf-8") as f: f.write(content) diff --git a/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py b/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py index 1b23ee219f7f..33b88481dd5d 100644 --- a/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py +++ b/python/packages/agbench/benchmarks/GAIA/Scripts/custom_tabulate.py @@ -131,7 +131,7 @@ def scorer(instance_dir): return None expected_answer = None - with open(expected_answer_file, "rt") as fh: + with open(expected_answer_file, "rt", encoding="utf-8") as fh: expected_answer = fh.read().strip() # Read the console @@ -140,7 +140,7 @@ def scorer(instance_dir): return None console_log = "" - with open(console_log_file, "rt") as fh: + with open(console_log_file, "rt", encoding="utf-8") as fh: console_log = fh.read() final_answer = None diff --git a/python/packages/agbench/benchmarks/process_logs.py b/python/packages/agbench/benchmarks/process_logs.py index e9aa52532f82..d1327c3d74b8 100644 --- a/python/packages/agbench/benchmarks/process_logs.py +++ b/python/packages/agbench/benchmarks/process_logs.py @@ -96,7 +96,7 @@ def scorer(instance_dir, benchmark_name): if not os.path.isfile(expected_answer_file): return None - with open(expected_answer_file, "rt") as fh: + with open(expected_answer_file, "rt", encoding="utf-8") as fh: expected_answer = fh.read().strip() # Read the console log @@ -104,7 +104,7 @@ def scorer(instance_dir, benchmark_name): if not os.path.isfile(console_log_file): return None - with open(console_log_file, "rt") as fh: + with open(console_log_file, "rt", encoding="utf-8") as fh: console_log = fh.read() final_answer = None m = re.search(r"FINAL ANSWER:(.*?)\n", console_log, re.DOTALL) @@ -125,7 +125,7 @@ def scorer(instance_dir, benchmark_name): if not os.path.isfile(console_log_file): return None - with open(console_log_file, "rt") as fh: + with open(console_log_file, "rt", encoding="utf-8") as fh: console_log = fh.read() final_score = None m = re.search(r"FINAL SCORE:(.*?)\n", console_log, re.DOTALL) @@ -145,7 +145,7 @@ def get_number_of_chat_messages(chat_messages_dir): # Count the number of chat messages in the chat_messages_dir result = 0 for file in glob.glob(f"{chat_messages_dir}/*_messages.json"): - with open(file, "r") as f: + with open(file, "r", encoding="utf-8") as f: content = json.load(f) for agent, messages in content.items(): result += len(messages) @@ -158,7 +158,7 @@ def did_agent_stall(instance_dir): if not os.path.isfile(log_file_path): return None # Stalled.... Replanning... - with open(log_file_path, "r") as f: + with open(log_file_path, "r", encoding="utf-8") as f: for line in f: if "Stalled.... Replanning..." in line: return True @@ -172,7 +172,7 @@ def get_message_logs(instance_dir): return None messages = [] # for each line, convert to dict, check if it has a message and source key, and append to messages - with open(log_file_path, "r") as f: + with open(log_file_path, "r", encoding="utf-8") as f: for line in f: line_dict = json.loads(line) if "message" in line_dict and "source" in line_dict: @@ -186,13 +186,13 @@ def get_task_information(instance_dir, benchmark_name): prompt_file = os.path.join(instance_dir, "prompt.txt") if not os.path.isfile(prompt_file): return None - with open(prompt_file, "r") as f: + with open(prompt_file, "r", encoding="utf-8") as f: return f.read().strip() elif benchmark_name == "webarena": task_prompt_file = os.path.join(instance_dir, "task_prompt.json") if not os.path.isfile(task_prompt_file): return None - with open(task_prompt_file, "r") as f: + with open(task_prompt_file, "r", encoding="utf-8") as f: return json.load(f)["intent"] else: raise ValueError(f"Unsupported benchmark_name: {benchmark_name}") @@ -204,7 +204,7 @@ def is_progress_not_being_made(instance_dir): log_file_path = os.path.join(instance_dir, "log.jsonl") if not os.path.isfile(log_file_path): return None - with open(log_file_path, "r") as f: + with open(log_file_path, "r", encoding="utf-8") as f: for line in f: line_dict = json.loads(line) if ( diff --git a/python/packages/agbench/src/agbench/linter/cli.py b/python/packages/agbench/src/agbench/linter/cli.py index 14f428929b17..73fd1b11c6c7 100644 --- a/python/packages/agbench/src/agbench/linter/cli.py +++ b/python/packages/agbench/src/agbench/linter/cli.py @@ -19,7 +19,7 @@ def prepend_line_numbers(lines: List[str]) -> List[str]: def load_log_file(path: str, prepend_numbers: bool = False) -> Document: - with open(path, "r") as f: + with open(path, "r", encoding="utf-8") as f: lines = f.readlines() if prepend_numbers: lines = prepend_line_numbers(lines) diff --git a/python/packages/agbench/src/agbench/linter/coders/oai_coder.py b/python/packages/agbench/src/agbench/linter/coders/oai_coder.py index 01322e0c5ccc..b293ad632d01 100644 --- a/python/packages/agbench/src/agbench/linter/coders/oai_coder.py +++ b/python/packages/agbench/src/agbench/linter/coders/oai_coder.py @@ -41,7 +41,7 @@ def code_document( if not os.path.exists(self.cache_dir): os.makedirs(self.cache_dir) if cache_file and os.path.exists(cache_file): - with open(cache_file, "r") as f: + with open(cache_file, "r", encoding="utf-8") as f: cached_coded_doc_json = f.read() return CodedDocument.from_json(cached_coded_doc_json) @@ -203,6 +203,6 @@ def code_document( raise ValueError("Error in coding document with OpenAI") if self.cache_enabled and cache_file: - with open(cache_file, "w") as f: + with open(cache_file, "w", encoding="utf-8") as f: f.write(coded_document.model_dump_json(indent=4)) return coded_document diff --git a/python/packages/agbench/src/agbench/remove_missing_cmd.py b/python/packages/agbench/src/agbench/remove_missing_cmd.py index 21c9a6aba572..53bc511a2e33 100644 --- a/python/packages/agbench/src/agbench/remove_missing_cmd.py +++ b/python/packages/agbench/src/agbench/remove_missing_cmd.py @@ -11,7 +11,7 @@ def default_scorer(instance_dir: str) -> bool: """ console_log = os.path.join(instance_dir, "console_log.txt") if os.path.isfile(console_log): - with open(console_log, "rt") as fh: + with open(console_log, "rt", encoding="utf-8") as fh: content = fh.read() # Use a regular expression to match the expected ending pattern has_final_answer = "FINAL ANSWER:" in content diff --git a/python/packages/agbench/src/agbench/run_cmd.py b/python/packages/agbench/src/agbench/run_cmd.py index 55f181360d0f..82c0c2c5b964 100644 --- a/python/packages/agbench/src/agbench/run_cmd.py +++ b/python/packages/agbench/src/agbench/run_cmd.py @@ -295,10 +295,10 @@ def get_scenario_env(token_provider: Optional[Callable[[], str]] = None, env_fil if env_file is None: # Env file was not specified, so read the default, or warn if the default file is missing. if os.path.isfile(DEFAULT_ENV_FILE_YAML): - with open(DEFAULT_ENV_FILE_YAML, "r") as fh: + with open(DEFAULT_ENV_FILE_YAML, "r", encoding="utf-8") as fh: env_file_contents = yaml.safe_load(fh) elif os.path.isfile(DEFAULT_ENV_FILE_JSON): - with open(DEFAULT_ENV_FILE_JSON, "rt") as fh: + with open(DEFAULT_ENV_FILE_JSON, "rt", encoding="utf-8") as fh: env_file_contents = json.loads(fh.read()) logging.warning(f"JSON environment files are deprecated. Migrate to '{DEFAULT_ENV_FILE_YAML}'") else: @@ -307,7 +307,7 @@ def get_scenario_env(token_provider: Optional[Callable[[], str]] = None, env_fil ) else: # Env file was specified. Throw an error if the file can't be read. - with open(env_file, "rt") as fh: + with open(env_file, "rt", encoding="utf-8") as fh: if env_file.endswith(".json"): logging.warning("JSON environment files are deprecated. Migrate to YAML") env_file_contents = json.loads(fh.read()) @@ -737,7 +737,7 @@ def split_jsonl(file_path: str, num_parts: int) -> List[List[Dict[str, Any]]]: """ Split a JSONL file into num_parts approximately equal parts. """ - with open(file_path, "r") as f: + with open(file_path, "r", encoding="utf-8") as f: data = [json.loads(line) for line in f] random.shuffle(data) # Shuffle the data for better distribution diff --git a/python/packages/agbench/src/agbench/tabulate_cmd.py b/python/packages/agbench/src/agbench/tabulate_cmd.py index e5ee93db00c8..b44e40af4147 100644 --- a/python/packages/agbench/src/agbench/tabulate_cmd.py +++ b/python/packages/agbench/src/agbench/tabulate_cmd.py @@ -68,7 +68,7 @@ def find_tabulate_module(search_dir: str, stop_dir: Optional[str] = None) -> Opt def default_scorer(instance_dir: str, success_strings: List[str] = SUCCESS_STRINGS) -> Optional[bool]: console_log = os.path.join(instance_dir, "console_log.txt") if os.path.isfile(console_log): - with open(console_log, "rt") as fh: + with open(console_log, "rt", encoding="utf-8") as fh: content = fh.read() # It succeeded @@ -90,7 +90,7 @@ def default_scorer(instance_dir: str, success_strings: List[str] = SUCCESS_STRIN def default_timer(instance_dir: str, timer_regex: str = TIMER_REGEX) -> Optional[float]: console_log = os.path.join(instance_dir, "console_log.txt") if os.path.isfile(console_log): - with open(console_log, "rt") as fh: + with open(console_log, "rt", encoding="utf-8") as fh: content = fh.read() # It succeeded diff --git a/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py b/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py index a7dbccc43381..4ac5c3f19518 100644 --- a/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py +++ b/python/packages/autogen-ext/src/autogen_ext/code_executors/docker_jupyter/_docker_jupyter.py @@ -275,7 +275,7 @@ def _save_html(self, html_data: str) -> str: """Save html data to a file.""" filename = f"{uuid.uuid4().hex}.html" path = os.path.join(str(self._output_dir), filename) - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: f.write(html_data) return os.path.abspath(path) diff --git a/python/packages/autogen-studio/autogenstudio/cli.py b/python/packages/autogen-studio/autogenstudio/cli.py index 373f13e087f2..aa92013c98df 100644 --- a/python/packages/autogen-studio/autogenstudio/cli.py +++ b/python/packages/autogen-studio/autogenstudio/cli.py @@ -69,7 +69,7 @@ def ui( # Create temporary env file to share configuration with uvicorn workers env_file_path = get_env_file_path() - with open(env_file_path, "w") as temp_env: + with open(env_file_path, "w", encoding="utf-8") as temp_env: for key, value in env_vars.items(): temp_env.write(f"{key}={value}\n") diff --git a/python/packages/autogen-studio/autogenstudio/database/schema_manager.py b/python/packages/autogen-studio/autogenstudio/database/schema_manager.py index 0762b0890d30..6b942354c4ce 100644 --- a/python/packages/autogen-studio/autogenstudio/database/schema_manager.py +++ b/python/packages/autogen-studio/autogenstudio/database/schema_manager.py @@ -75,7 +75,7 @@ def _update_configuration(self) -> None: # Update alembic.ini config_content = self._generate_alembic_ini_content() - with open(self.alembic_ini_path, "w") as f: + with open(self.alembic_ini_path, "w", encoding="utf-8") as f: f.write(config_content) # Update env.py @@ -115,7 +115,7 @@ def _initialize_alembic(self) -> bool: # Create initial config file for alembic init config_content = self._generate_alembic_ini_content() - with open(self.alembic_ini_path, "w") as f: + with open(self.alembic_ini_path, "w", encoding="utf-8") as f: f.write(config_content) # Use the config we just created @@ -187,7 +187,7 @@ def run_migrations_online() -> None: else: run_migrations_online()""" - with open(env_path, "w") as f: + with open(env_path, "w", encoding="utf-8") as f: f.write(content) def _generate_alembic_ini_content(self) -> str: @@ -239,7 +239,7 @@ def update_script_template(self): """Update the Alembic script template to include SQLModel.""" template_path = self.alembic_dir / "script.py.mako" try: - with open(template_path, "r") as f: + with open(template_path, "r", encoding="utf-8") as f: content = f.read() # Add sqlmodel import to imports section @@ -248,7 +248,7 @@ def update_script_template(self): content = content.replace(import_section, new_imports) - with open(template_path, "w") as f: + with open(template_path, "w", encoding="utf-8") as f: f.write(content) return True @@ -265,7 +265,7 @@ def _update_env_py(self, env_path: Path) -> None: self._create_minimal_env_py(env_path) return try: - with open(env_path, "r") as f: + with open(env_path, "r", encoding="utf-8") as f: content = f.read() # Add SQLModel import if not present @@ -303,7 +303,7 @@ def _update_env_py(self, env_path: Path) -> None: )""", ) - with open(env_path, "w") as f: + with open(env_path, "w", encoding="utf-8") as f: f.write(content) except Exception as e: logger.error(f"Failed to update env.py: {e}") diff --git a/python/packages/autogen-studio/autogenstudio/gallery/builder.py b/python/packages/autogen-studio/autogenstudio/gallery/builder.py index 55a124367dd4..b83456ec0ee6 100644 --- a/python/packages/autogen-studio/autogenstudio/gallery/builder.py +++ b/python/packages/autogen-studio/autogenstudio/gallery/builder.py @@ -630,5 +630,5 @@ def create_default_lite_team(): gallery = create_default_gallery() # Save to file - with open("gallery_default.json", "w") as f: + with open("gallery_default.json", "w", encoding="utf-8") as f: f.write(gallery.model_dump_json(indent=2)) diff --git a/python/packages/autogen-studio/autogenstudio/lite/studio.py b/python/packages/autogen-studio/autogenstudio/lite/studio.py index 94b25cd85b6e..6ff0b4011f82 100644 --- a/python/packages/autogen-studio/autogenstudio/lite/studio.py +++ b/python/packages/autogen-studio/autogenstudio/lite/studio.py @@ -151,7 +151,7 @@ def _setup_environment(self) -> str: } env_file_path = self._get_env_file_path() - with open(env_file_path, "w") as temp_env: + with open(env_file_path, "w", encoding="utf-8") as temp_env: for key, value in env_vars.items(): temp_env.write(f"{key}={value}\n") diff --git a/python/packages/autogen-studio/autogenstudio/web/auth/manager.py b/python/packages/autogen-studio/autogenstudio/web/auth/manager.py index ab16e0432d0a..de391e093261 100644 --- a/python/packages/autogen-studio/autogenstudio/web/auth/manager.py +++ b/python/packages/autogen-studio/autogenstudio/web/auth/manager.py @@ -117,7 +117,7 @@ def is_valid_token(self, token: str) -> bool: def from_yaml(cls, yaml_path: str) -> Self: """Create AuthManager from YAML config file.""" try: - with open(yaml_path, "r") as f: + with open(yaml_path, "r", encoding="utf-8") as f: config_data = yaml.safe_load(f) config = AuthConfig(**config_data) return cls(config) diff --git a/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py b/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py index 1b159da4e91d..0a45711334d4 100644 --- a/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py +++ b/python/packages/magentic-one-cli/src/magentic_one_cli/_m1.py @@ -97,12 +97,12 @@ def main() -> None: if args.config is None: if os.path.isfile(DEFAULT_CONFIG_FILE): - with open(DEFAULT_CONFIG_FILE, "r") as f: + with open(DEFAULT_CONFIG_FILE, "r", encoding="utf-8") as f: config = yaml.safe_load(f) else: config = yaml.safe_load(DEFAULT_CONFIG_CONTENTS) else: - with open(args.config if isinstance(args.config, str) else args.config[0], "r") as f: + with open(args.config if isinstance(args.config, str) else args.config[0], "r", encoding="utf-8") as f: config = yaml.safe_load(f) # Run the task