diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index 9a9739a72..63baf285d 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -12,13 +12,15 @@ import datetime import re +import os from collections.abc import Iterable, Iterator +from collections import defaultdict from copy import copy from difflib import SequenceMatcher from email import message_from_string from heapq import nlargest from string import Formatter -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict from babel import __version__ as VERSION from babel.core import Locale, UnknownLocaleError @@ -357,6 +359,13 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') return str(s) +class ConflictInfo(TypedDict): + message: Message + filename: str + project: str + version: str + + class Catalog: """Representation of a message catalog.""" @@ -400,6 +409,7 @@ def __init__( self.locale = locale self._header_comment = header_comment self._messages: dict[str | tuple[str, str], Message] = {} + self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = defaultdict(list) self.project = project or 'PROJECT' self.version = version or 'VERSION' @@ -780,6 +790,36 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: ) self._messages[key] = message + def add_conflict(self, message: Message, filename: str, project: str, version: str) -> None: + """Record a conflicting translation for a message. + + When the same message ID has different translations across input files, + the conflicting entry is stored and the message is marked as fuzzy in + the output catalog. + + :param message: the conflicting :class:`Message` object + :param filename: the basename of the file where the conflict originates + :param project: the project name of the conflicting file + :param version: the project version of the conflicting file + """ + key = self._key_for(message.id, message.context) + self._conflicts[key].append({ + 'message': message, + 'filename': filename, + 'project': project, + 'version': version, + }) + + def get_conflicts(self, id: _MessageID, context: str | None = None) -> list[ConflictInfo]: + """Return all recorded conflicts for a message ID. + + :param id: the message ID to look up conflicts for + :param context: optional message context (msgctxt) + :return: list of :class:`ConflictInfo` dicts, or an empty list if none + """ + key = self._key_for(id, context) + return self._conflicts.get(key, []) + def add( self, id: _MessageID, diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index f63dd9ded..ff647bbbf 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -21,14 +21,18 @@ import sys import tempfile import warnings +from collections import Counter, defaultdict from configparser import RawConfigParser from io import StringIO -from typing import Any, BinaryIO, Iterable, Literal +from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Literal + +if TYPE_CHECKING: + from babel.messages.catalog import _MessageID from babel import Locale, localedata from babel import __version__ as VERSION from babel.core import UnknownLocaleError -from babel.messages.catalog import DEFAULT_HEADER, Catalog +from babel.messages.catalog import DEFAULT_HEADER, Catalog, ConflictInfo from babel.messages.extract import ( DEFAULT_KEYWORDS, DEFAULT_MAPPING, @@ -887,6 +891,260 @@ def run(self): return +class ConcatenateCatalog(CommandMixin): + description = 'concatenates the specified PO files into single one' + user_options = [ + ('input-files', None, 'input files'), + ('output-file=', 'o', 'write output to specified file, the results are written ' + 'to standard output if no output file is specified or if it is \'-\''), + ('less-than=', '<', 'print messages with less than this many ' + 'definitions, defaults to infinite if not set'), + ('more-than=', '>', 'print messages with more than this many ' + 'definitions, defaults to 0 if not set'), + ('unique', 'u', 'shorthand for --less-than=2, requests ' + 'that only unique messages be printed'), + ('use-first', None, 'use first available translation for each ' + 'message, don\'t merge several translations'), + ('no-location', None, 'do not include location comments with filename and line number'), + ('width=', 'w', 'set output line width (default 76)'), + ('no-wrap', None, 'do not break long message lines, longer than ' + 'the output line width, into several lines'), + ('sort-output', 's', 'generate sorted output'), + ('sort-by-file', 'F', 'sort output by file location'), + ] + + as_args = 'input-files' + + boolean_options = [ + 'unique', + 'use-first', + 'no-location', + 'strict', + 'no-wrap', + 'sort-output', + 'sort-by-file', + ] + + def initialize_options(self): + self.input_files = None + self.output_file = None + self.less_than = None + self.more_than = 0 + self.unique = False + self.use_first = False + self.no_location = None + self.width = None + self.no_wrap = False + self.sort_output = False + self.sort_by_file = False + + def finalize_options(self): + if not self.input_files: + raise OptionError('you must specify the input files') + + if self.no_wrap and self.width: + raise OptionError("'--no-wrap' and '--width' are mutually exclusive") + if not self.no_wrap and not self.width: + self.width = 76 + elif self.width is not None: + self.width = int(self.width) + + if self.more_than is None: + self.more_than = 0 + else: + self.more_than = int(self.more_than) + if self.less_than is not None: + self.less_than = int(self.less_than) + + if self.unique: + if self.less_than is not None or self.more_than: + raise OptionError("'--unique' is mutually exclusive with '--less-than' and '--more-than'") + self.less_than = 2 + + def _collect_message_info(self): + templates: list[tuple[str, Catalog]] = [] + message_counts: Counter[_MessageID] = Counter() + message_strings: dict[_MessageID, set[str | tuple[str, ...]]] = defaultdict(set) + + for filename in self.input_files: + with open(filename, 'r') as pofile: + template = read_po(pofile) + for message in template: + if not message.id: + continue + message_counts[message.id] += 1 + message_strings[message.id].add( + message.string if isinstance(message.string, str) else tuple(message.string) + ) + templates.append((filename, template)) + + return templates, message_counts, message_strings + + def run(self): + catalog = Catalog(fuzzy=False) + templates, message_counts, message_strings = self._collect_message_info() + + for path, template in templates: + if catalog.locale is None: + catalog.locale = template.locale + + for message in template: + if not message.id: + continue + + count = message_counts[message.id] + if count <= self.more_than or (self.less_than is not None and count >= self.less_than): + continue + + if count > 1 and not self.use_first and len(message_strings[message.id]) > 1: + filename = os.path.basename(path) + catalog.add_conflict(message, filename, template.project, template.version) + message.flags |= {'fuzzy'} + + catalog[message.id] = message + + catalog.fuzzy = any(message.fuzzy for message in catalog) + + output_file = self.output_file + if not output_file or output_file == '-': + write_po( + sys.stdout.buffer, + catalog, + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, + no_location=self.no_location, + ) + else: + with open(output_file, 'wb') as outfile: + write_po( + outfile, + catalog, + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, + no_location=self.no_location, + ) + + +class MergeCatalog(CommandMixin): + description = 'update a PO file by merging it with a newer POT template, optionally using a compendium' + user_options = [ + ('input-files', None, 'exactly two input files: def.po (obsolete translations); ref.pot (current template)'), + ('compendium=', 'C', 'additional library of message translations, may be specified more than once'), + ('compendium-overwrite', None, 'overwrite existing translations with compendium entries'), + ('no-compendium-comment', None, 'do not add a comment for translations taken from a compendium'), + ('update', 'U', 'update def.po, do nothing if def.po already up to date'), + ('output-file=', 'o', 'write output to specified file, the results are written ' + 'to standard output if no output file is specified'), + ('backup', None, 'make a backup of def.po'), + ('suffix=', None, 'use SUFFIX as backup suffix instead of ~ (tilde)'), + ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), + ('no-location', None, 'do not include location comments with filename and line number'), + ('width=', 'w', 'set output line width (default 76)'), + ('no-wrap', None, 'do not break long message lines, longer ' + 'than the output line width, into several lines'), + ('sort-output', 's', 'generate sorted output'), + ('sort-by-file', 'F', 'sort output by file location'), + ] + + as_args = 'input-files' + + multiple_value_options = ( + 'compendium', + ) + + boolean_options = [ + 'compendium-overwrite', + 'no-compendium-comment', + 'update', + 'backup', + 'no-fuzzy-matching', + 'no-location', + 'no-wrap', + 'sort-output', + 'sort-by-file', + ] + + def initialize_options(self): + self.input_files = None + self.compendium: list[str] = [] + self.compendium_overwrite = False + self.no_compendium_comment = False + self.update = False + self.output_file = None + self.backup = False + self.suffix = '~' + self.no_fuzzy_matching = False + self.no_location = False + self.width = None + self.no_wrap = False + self.sort_output = False + self.sort_by_file = False + + def finalize_options(self): + if not self.input_files or len(self.input_files) != 2: + raise OptionError( + f'exactly two input files are required (def.po and ref.pot), got: {self.input_files!r}' + ) + if not self.output_file and not self.update: + raise OptionError('you must specify the output file or use --update') + + if self.no_wrap and self.width: + raise OptionError("'--no-wrap' and '--width' are mutually exclusive") + if not self.no_wrap and not self.width: + self.width = 76 + elif self.width is not None: + self.width = int(self.width) + + def _get_messages_from_compendiums(self, compendium_paths): + for file_path in compendium_paths: + with open(file_path, 'r') as pofile: + catalog = read_po(pofile) + for message in catalog: + yield message, file_path + + def run(self): + def_file, ref_file = self.input_files + + with open(def_file, 'r') as pofile: + catalog = read_po(pofile) + with open(ref_file, 'r') as pofile: + ref_catalog = read_po(pofile) + catalog.update( + ref_catalog, + no_fuzzy_matching=self.no_fuzzy_matching + ) + + for message, compendium_path in self._get_messages_from_compendiums(self.compendium): + if (current := catalog.get(message.id)) and (not current.string or current.fuzzy or self.compendium_overwrite): + if self.compendium_overwrite and not current.fuzzy and current.string: + catalog.obsolete[message.id] = current.clone() + + current.string = message.string + if current.fuzzy: + current.flags.remove('fuzzy') + + if not self.no_compendium_comment: + current.auto_comments.append(compendium_path) + + catalog.fuzzy = any(message.fuzzy for message in catalog) + output_path = def_file if self.update else self.output_file + + if self.update and self.backup: + shutil.copy(def_file, def_file + self.suffix) + + with open(output_path, 'wb') as outfile: + write_po( + outfile, + catalog, + no_location=self.no_location, + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, + ) + + class CommandLineInterface: """Command-line interface. @@ -901,6 +1159,8 @@ class CommandLineInterface: 'extract': 'extract messages from source files and generate a POT file', 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', + 'concat': 'concatenates and merges the specified PO files', + 'merge': 'combines two PO files into one', } command_classes = { @@ -908,6 +1168,8 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, + 'concat': ConcatenateCatalog, + 'merge': MergeCatalog, } log = None # Replaced on instance level diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index b9678a924..4002ce8b4 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -17,8 +17,8 @@ from typing import TYPE_CHECKING, Literal from babel.core import Locale -from babel.messages.catalog import Catalog, Message -from babel.util import TextWrapper +from babel.messages.catalog import Catalog, Message, ConflictInfo +from babel.util import TextWrapper, _cmp if TYPE_CHECKING: from typing import IO, AnyStr @@ -351,8 +351,11 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None: continue if needs_decode: line = line.decode(self.catalog.charset) - if line[0] == '#': - if line[:2] == '#~': + if line[:1] == '#': + if line[1:2] == '-': + self._invalid_pofile(line, lineno, 'cannot parse po file with conflicts') + + if line[1:2] == '~': self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) else: try: @@ -646,6 +649,37 @@ def _format_comment(comment, prefix=''): for line in comment_wrapper.wrap(comment): yield f"#{prefix} {line.strip()}\n" + def _format_conflict_comment(file, project, version, prefix=''): + comment = f"#-#-#-#-# {file} ({project} {version}) #-#-#-#-#" + yield f"{normalize(comment, prefix=prefix, width=width)}\n" + + def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], prefix=''): + for conflict in conflicts: + message = conflict['message'] + if message.context: + yield from _format_conflict_comment(conflict['filename'], conflict['project'], conflict['version'], prefix=prefix) + yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n" + + if isinstance(key, (list, tuple)): + yield f"{prefix}msgid {normalize(key[0], prefix=prefix, width=width)}\n" + yield f"{prefix}msgid_plural {normalize(key[1], prefix=prefix, width=width)}\n" + else: + yield f"{prefix}msgid {normalize(key, prefix=prefix, width=width)}\n" + yield f"{prefix}msgstr {normalize('', prefix=prefix, width=width)}\n" + + for conflict in conflicts: + message = conflict['message'] + yield from _format_conflict_comment(conflict['filename'], conflict['project'], conflict['version'], prefix=prefix) + if isinstance(key, (list, tuple)): + for idx in range(catalog.num_plurals): + try: + string = message.string[idx] + except IndexError: + string = '' + yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n" + else: + yield f"{normalize(message.string, prefix=prefix, width=width)}\n" + def _format_message(message, prefix=''): if isinstance(message.id, (list, tuple)): if message.context: @@ -717,7 +751,10 @@ def _format_message(message, prefix=''): norm_previous_id = normalize(message.previous_id[1], width=width) yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|') - yield from _format_message(message) + if len(conflicts := catalog.get_conflicts(message.id)) > 0: + yield from _format_conflict(message.id, conflicts) + else: + yield from _format_message(message) yield '\n' if not ignore_obsolete: diff --git a/docs/cmdline.rst b/docs/cmdline.rst index e1328fe8f..672bbfe70 100644 --- a/docs/cmdline.rst +++ b/docs/cmdline.rst @@ -247,3 +247,136 @@ filename of the output file will be:: If neither the ``output_file`` nor the ``locale`` option is set, this command looks for all catalog files in the base directory that match the given domain, and updates each of them. + +concat +====== + +The ``concat`` command merges multiple PO files into one. If a message has +different translations in different PO files, the conflicting translations are +marked with a conflict comment and the message itself is marked with a ``fuzzy`` flag:: + + $ pybabel concat --help + Usage: pybabel concat [options] + + concatenates the specified PO files into single one + + Options: + -h, --help show this help message and exit + -o OUTPUT_FILE, --output-file=OUTPUT_FILE + write output to specified file, the results are written + to standard output if no output file is specified or if it is '-' + --less-than=NUMBER print messages with less than this many + definitions, defaults to infinite if not set + --more-than=NUMBER print messages with more than this many + definitions, defaults to 0 if not set + -u, --unique shorthand for --less-than=2, requests + that only unique messages be printed + --use-first use first available translation for each + message, don't merge several translations + --no-location do not include location comments with filename and line number + -w WIDTH, --width=WIDTH + set output line width (default 76) + --no-wrap do not break long message lines, longer than + the output line width, into several lines + -s, --sort-output generate sorted output + -F, --sort-by-file sort output by file location + +merge +===== + +The ``merge`` command allows updating files, optionally using a compendium as a translation memory:: + + $ pybabel merge --help + Usage: pybabel merge [options] + + update a PO file by merging it with a newer POT template, optionally using a compendium + + Options: + -C COMPENDIUM_FILE, --compendium=COMPENDIUM_FILE + additional library of message translations, may + be specified more than once + --compendium-overwrite + overwrite existing translations with compendium entries + --no-compendium-comment + do not add a comment for translations taken from a compendium + -U, --update update def.po, do nothing if def.po already up to date + -o OUTPUT_FILE, --output-file=OUTPUT_FILE + write output to specified file, the results are written + to standard output if no output file is specified + --backup make a backup of def.po + --suffix=SUFFIX use SUFFIX as backup suffix instead of ~ (tilde) + -N, --no-fuzzy-matching + do not use fuzzy matching + --no-location do not include location comments with filename and line number + -w WIDTH, --width=WIDTH + set output line width (default 76) + --no-wrap do not break long message lines, longer + than the output line width, into several lines + -s, --sort-output generate sorted output + -F, --sort-by-file sort output by file location + +The ``input-files`` option accepts exactly two arguments: a file with obsolete translations, and +the current template file for updating translations. + +The ``compendium`` option can be specified multiple times to use several compendiums. +The compendium can be used in two modes: + +- Default mode: translations from the compendium are used only if they are missing in the output file. +- Compendium overwrite mode: when using the ``--compendium-overwrite`` option, translations + from the compendium take priority and replace those in the output file. + +The ``backup`` option is used to create a backup copy of the def.po file before updating it. +The ``suffix`` option allows you to specify a custom suffix for the backup file (defaulting to ``~``). + +concat and merge usage scenarios +================================= + +1. Merging Multiple PO Files (`concat`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel concat [options] ` +Suppose you manage a project with several PO files for the same language (for example, modules or plugins have their own translations), and you want to combine them into a single file for further work or for delivery to translators. + +**Example:** + +.. code-block:: shell + + pybabel concat -o merged.po module1.po module2.po module3.po + +**Features:** + +- If the same string has different translations in different files, the resulting file for that string will include a special comment ``#-#-#-#-# (PROJECT VERSION) #-#-#-#-#`` and the message will be marked with the ``fuzzy`` flag—this is useful for later manual conflict resolution. +- You can keep only unique strings using the ``-u`` (`--less-than=2`) option. +- Use `--use-first` to take only the first encountered translation for each string, skipping automatic merging of multiple options. +- Output can be sorted alphabetically or by source file (options `-s`, `-F`). + +**Typical Use Case:** + + A project has translations from different teams. Before releasing, you need to gather all translations into one file, resolve possible conflicts, and provide the finalized version to translators for review. + + +2. Updating Translations with a Template and Compendium (`merge`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel merge [options] def.po ref.pot` +You need to update an existing translation file (`def.po`) based on a new template (`ref.pot`), reusing translations from an additional translation memory (compendium). + +**Example:** + +.. code-block:: shell + + pybabel merge -C my-compendium.po --backup def.po ref.pot + +**Features:** + +- The compendium (`-C`) allows you to pull translations from a shared translation memory. Multiple compendiums can be used. +- By default, translations from the compendium are used only for new or missing entries in `def.po`. +- The `--compendium-overwrite` option allows overwriting existing translations with those found in the compendium (helpful for terminology standardization). +- When a translation from the compendium is used, a comment is automatically added (this can be disabled with `--no-compendium-comment`). +- The `--backup` flag saves a backup copy of your file before updating (`~` suffix by default, configurable with `--suffix`). + +**Typical Use Case:** + + After a release, a new translation template is provided. The team decides to enrich the translation by leveraging a common compendium in order to improve quality and unify terms. The merge command is run with the compendium and backup options enabled. diff --git a/tests/messages/frontend/test_concat.py b/tests/messages/frontend/test_concat.py new file mode 100644 index 000000000..a16b4d4d8 --- /dev/null +++ b/tests/messages/frontend/test_concat.py @@ -0,0 +1,334 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import contextlib +import io +import pathlib +import sys + +import pytest +from freezegun import freeze_time + +from babel.messages import Catalog, frontend, pofile +from babel.messages.frontend import OptionError +from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA +from tests.messages.utils import Distribution + + +@pytest.fixture(autouse=True) +def frozen_time(): + with freeze_time("1994-11-11"): + yield + + +@pytest.fixture +def concat_cmd(): + dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + cmd = frontend.ConcatenateCatalog(dist) + cmd.initialize_options() + return cmd + + +@pytest.fixture +def po_files(tmp_path: pathlib.Path): + temp1 = tmp_path / 'msgcat_temp1.po' + temp2 = tmp_path / 'msgcat_temp2.po' + + with open(temp1, 'wb') as file: + catalog = Catalog() + catalog.add('other1', string='Other 1', locations=[('simple.py', 1)], flags=['flag1000']) + catalog.add('other2', string='Other 2', locations=[('simple.py', 10)]) + catalog.add('same', string='Same', locations=[('simple.py', 100)], flags=['flag1', 'flag1.2']) + catalog.add('almost_same', string='Almost same', locations=[('simple.py', 1000)], flags=['flag2']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals'), locations=[('simple.py', 2000)]) + pofile.write_po(file, catalog) + + with open(temp2, 'wb') as file: + catalog = Catalog() + catalog.add('other3', string='Other 3', locations=[('hard.py', 1)]) + catalog.add('other4', string='Other 4', locations=[('hard.py', 10)]) + catalog.add('almost_same', string='A bit same', locations=[('hard.py', 1000)], flags=['flag3']) + catalog.add('same', string='Same', locations=[('hard.py', 100)], flags=['flag4']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals other'), locations=[('hard.py', 2000)]) + pofile.write_po(file, catalog) + + return temp1, temp2 + + +def test_no_input_files(concat_cmd): + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_no_output_file(concat_cmd): + concat_cmd.input_files = ['project/i18n/messages.pot'] + concat_cmd.finalize_options() # output_file not required; defaults to stdout + + +def test_unique_exclusive_with_less_than(concat_cmd, po_files): + temp1, temp2 = po_files + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.unique = True + concat_cmd.less_than = 3 + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_unique_exclusive_with_more_than(concat_cmd, po_files): + temp1, temp2 = po_files + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.unique = True + concat_cmd.more_than = 1 + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_default(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert 'msgid "other1"' in content + assert 'msgstr "Other 1"' in content + assert 'msgid "other3"' in content + + assert 'msgid "same"' in content + assert 'msgstr "Same"' in content + assert content.count('#-#-#-#-# msgcat_temp1.po') == 0 or 'msgid "same"' not in [ + block for block in content.split('\n\n') if '#-#-#-#-#' in block + ] + + almost_same_block = next(b for b in content.split('\n\n') if 'msgid "almost_same"' in b) + assert 'fuzzy' in almost_same_block + assert '#-#-#-#-#' in almost_same_block + assert 'Almost same' in almost_same_block + assert 'A bit same' in almost_same_block + + plural_block = next(b for b in content.split('\n\n') if 'msgid "plural"' in b) + assert 'fuzzy' in plural_block + assert '#-#-#-#-#' in plural_block + + +def test_use_first(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.use_first = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert '#-#-#-#-#' not in content + + almost_same_block = next(b for b in content.split('\n\n') if 'msgid "almost_same"' in b) + assert 'fuzzy' not in almost_same_block + assert 'msgstr "Almost same"' in almost_same_block + + plural_block = next(b for b in content.split('\n\n') if 'msgid "plural"' in b) + assert 'fuzzy' not in plural_block + assert 'msgstr[0] "Plural"' in plural_block + assert 'msgstr[1] "Plurals"' in plural_block + + +def test_unique(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.unique = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert 'msgid "other1"' in content + assert 'msgid "other2"' in content + assert 'msgid "other3"' in content + assert 'msgid "other4"' in content + assert 'msgid "same"' not in content + assert 'msgid "almost_same"' not in content + + +def test_less_than_equivalent_to_unique(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.less_than = 2 + concat_cmd.finalize_options() + concat_cmd.run() + less_than_content = output_file.read_text() + + concat_cmd.less_than = None + concat_cmd.unique = True + concat_cmd.finalize_options() + concat_cmd.run() + unique_content = output_file.read_text() + + assert less_than_content == unique_content + + +def test_more_than(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.more_than = 1 + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert 'msgid "other1"' not in content + assert 'msgid "other3"' not in content + assert 'msgid "same"' in content + assert 'msgid "almost_same"' in content + assert 'msgid "plural"' in content + + almost_same_block = next(b for b in content.split('\n\n') if 'msgid "almost_same"' in b) + assert 'fuzzy' in almost_same_block + + +def test_no_wrap_width_exclusive(concat_cmd, po_files): + temp1, _ = po_files + concat_cmd.input_files = [str(temp1)] + concat_cmd.no_wrap = True + concat_cmd.width = 80 + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_stdout_output(concat_cmd, po_files, monkeypatch): + temp1, _ = po_files + concat_cmd.input_files = [str(temp1)] + concat_cmd.finalize_options() + + buf = io.BytesIO() + monkeypatch.setattr(sys, 'stdout', type('FakeStdout', (), {'buffer': buf})()) + concat_cmd.run() + + content = buf.getvalue().decode('utf-8') + assert 'msgid "other1"' in content + assert 'msgstr "Other 1"' in content + assert 'msgid "same"' in content + + +def test_stdout_dash(concat_cmd, po_files, monkeypatch): + temp1, _ = po_files + concat_cmd.input_files = [str(temp1)] + concat_cmd.output_file = '-' + concat_cmd.finalize_options() + + buf = io.BytesIO() + monkeypatch.setattr(sys, 'stdout', type('FakeStdout', (), {'buffer': buf})()) + concat_cmd.run() + + content = buf.getvalue().decode('utf-8') + assert 'msgid "other1"' in content + + +def test_same_string_no_conflict(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + same_block = next(b for b in content.split('\n\n') if 'msgid "same"' in b) + assert 'fuzzy' not in same_block + assert '#-#-#-#-#' not in same_block + assert 'msgstr "Same"' in same_block + + +def test_no_location(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.no_location = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + assert '#: ' not in content + assert 'msgid "other1"' in content + + +def test_sort_output(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.sort_output = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + msgid_positions = { + 'almost_same': content.index('msgid "almost_same"'), + 'other1': content.index('msgid "other1"'), + 'other2': content.index('msgid "other2"'), + 'other3': content.index('msgid "other3"'), + 'other4': content.index('msgid "other4"'), + 'same': content.index('msgid "same"'), + } + ordered = sorted(msgid_positions, key=msgid_positions.get) + assert ordered == ['almost_same', 'other1', 'other2', 'other3', 'other4', 'same'] + + +def test_single_input_file(concat_cmd, po_files, tmp_path): + temp1, _ = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1)] + concat_cmd.output_file = str(output_file) + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + assert 'msgid "other1"' in content + assert 'msgid "other2"' in content + assert 'msgid "same"' in content + assert '#-#-#-#-#' not in content + assert 'fuzzy' not in content + + +def test_unique_exclusive_with_more_than_nonzero(concat_cmd, po_files): + temp1, temp2 = po_files + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.unique = True + concat_cmd.more_than = 0 + concat_cmd.finalize_options() + + +def test_conflicted_po_raises_on_read(tmp_path): + from babel.messages.pofile import PoFileError, read_po + + conflicted = tmp_path / 'conflicted.po' + conflicted.write_text( + 'msgid "hello"\n' + '#-#-#-#-# file1.po (PROJECT 1.0) #-#-#-#-#\n' + 'msgstr "Hello"\n' + ) + with pytest.raises(PoFileError): + with open(conflicted) as f: + read_po(f, abort_invalid=True) diff --git a/tests/messages/frontend/test_merge.py b/tests/messages/frontend/test_merge.py new file mode 100644 index 000000000..6e6c2f45f --- /dev/null +++ b/tests/messages/frontend/test_merge.py @@ -0,0 +1,329 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import pathlib +import shutil + +import pytest +from freezegun import freeze_time + +from babel.messages import Catalog, frontend, pofile +from babel.messages.frontend import OptionError +from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA +from tests.messages.utils import Distribution + + +@pytest.fixture(autouse=True) +def frozen_time(): + with freeze_time("1994-11-11"): + yield + + +@pytest.fixture +def merge_cmd(): + dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + cmd = frontend.MergeCatalog(dist) + cmd.initialize_options() + return cmd + + +@pytest.fixture +def merge_files(tmp_path: pathlib.Path): + temp_def = tmp_path / 'msgmerge_def.po' + temp_ref = tmp_path / 'msgmerge_ref.pot' + compendium = tmp_path / 'compendium.po' + + with open(temp_ref, 'wb') as file: + catalog = Catalog() + for word in ['word1', 'word2', 'word3', 'word4']: + catalog.add(word) + pofile.write_po(file, catalog) + + with open(temp_def, 'wb') as file: + catalog = Catalog() + catalog.add('word1', string='Word 1') + catalog.add('word2', string='Word 2') + catalog.add('word3') + pofile.write_po(file, catalog) + + with open(compendium, 'wb') as file: + catalog = Catalog() + catalog.add('word1', string='Comp Word 1') + catalog.add('word2', string='Comp Word 2') + catalog.add('word4', string='Word 4') + catalog.add('word5', string='Word 5') + pofile.write_po(file, catalog) + + return temp_def, temp_ref, compendium + + +def test_no_input_files(merge_cmd): + with pytest.raises(OptionError): + merge_cmd.finalize_options() + + with pytest.raises(OptionError): + merge_cmd.input_files = ['1'] + merge_cmd.finalize_options() + + with pytest.raises(OptionError): + merge_cmd.input_files = ['1', '2', '3'] + merge_cmd.finalize_options() + + +def test_no_output_file(merge_cmd, merge_files): + temp_def, temp_ref, _ = merge_files + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + with pytest.raises(OptionError): + merge_cmd.finalize_options() + + merge_cmd.output_file = str(temp_ref) + merge_cmd.finalize_options() + + merge_cmd.output_file = None + merge_cmd.update = True + merge_cmd.finalize_options() + + +def test_default(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + + assert 'msgid "word1"' in content + assert 'msgstr "Word 1"' in content + assert 'msgid "word2"' in content + assert 'msgstr "Word 2"' in content + + assert 'msgid "word4"' in content + word4_block = next(b for b in content.split('\n\n') if 'msgid "word4"' in b) + assert 'msgstr ""' in word4_block + + +def test_compendium(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + + assert 'msgstr "Word 4"' in content + + word1_block = next(b for b in content.split('\n\n') if 'msgid "word1"' in b) + assert 'msgstr "Word 1"' in word1_block + assert 'Comp Word 1' not in word1_block + + +def test_compendium_overwrite(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.compendium_overwrite = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + + word1_block = next(b for b in content.split('\n\n') if 'msgid "word1"' in b and '#~' not in b) + assert 'msgstr "Comp Word 1"' in word1_block + + assert '#~ msgid "word1"' in content + assert '#~ msgstr "Word 1"' in content + + +def test_update(merge_cmd, merge_files): + temp_def, temp_ref, _ = merge_files + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.update = True + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = temp_def.read_text() + assert 'msgstr "Word 1"' in content + assert 'msgid "word4"' in content + + +def test_update_backup(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + before_content = temp_def.read_text() + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.update = True + merge_cmd.backup = True + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + backup = pathlib.Path(str(temp_def) + '~') + assert backup.exists() + assert backup.read_text() == before_content + + temp_def.unlink() + shutil.move(str(backup), str(temp_def)) + merge_cmd.suffix = '.bac' + merge_cmd.run() + + bac = pathlib.Path(str(temp_def) + '.bac') + assert bac.exists() + assert bac.read_text() == before_content + + +def test_no_wrap_width_exclusive(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.no_wrap = True + merge_cmd.width = 80 + with pytest.raises(OptionError): + merge_cmd.finalize_options() + + +def test_compendium_with_comment(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert f'#. {compendium}' in content + assert 'msgid "word4"' in content + assert 'msgstr "Word 4"' in content + + +def test_compendium_does_not_overwrite_existing(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + word1_block = next(b for b in content.split('\n\n') if 'msgid "word1"' in b) + assert 'msgstr "Word 1"' in word1_block + assert 'Comp Word 1' not in word1_block + + +def test_multiple_compendiums(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + compendium2 = tmp_path / 'compendium2.po' + output_file = tmp_path / 'msgmerge.po' + + with open(compendium2, 'wb') as f: + cat = Catalog() + cat.add('word3', string='Word 3 from comp2') + pofile.write_po(f, cat) + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium), str(compendium2)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert 'msgstr "Word 4"' in content + assert 'msgstr "Word 3 from comp2"' in content + + +def test_compendium_fills_empty_translation(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + compendium_with_word3 = tmp_path / 'comp_word3.po' + output_file = tmp_path / 'msgmerge.po' + + with open(compendium_with_word3, 'wb') as f: + cat = Catalog() + cat.add('word3', string='Word 3 comp') + pofile.write_po(f, cat) + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium_with_word3)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert 'msgstr "Word 3 comp"' in content + + +def test_obsolete_messages(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + output_file = tmp_path / 'msgmerge.po' + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert '#~ msgid' not in content + + extra_def = tmp_path / 'extra_def.po' + with open(extra_def, 'wb') as f: + cat = Catalog() + cat.add('word1', string='Word 1') + cat.add('old_word', string='Old Word') + pofile.write_po(f, cat) + + merge_cmd.input_files = [str(extra_def), str(temp_ref)] + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert '#~ msgid "old_word"' in content + assert '#~ msgstr "Old Word"' in content + + +def test_compendium_not_applied_for_absent_messages(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + active_section = content.split('#~')[0] + assert 'word5' not in active_section