From 6e908d31f2641f45534617cfde973988fae0820a Mon Sep 17 00:00:00 2001 From: nec Date: Wed, 22 Apr 2026 10:38:43 +0200 Subject: [PATCH 1/4] Fix locale bundling for JDK>23 --- pyproject.toml | 2 +- src/jdk4py/_included_locales.py | 17 ++++- tests/resources/PrintLocaleNumberFormats.jar | Bin 0 -> 1271 bytes tests/resources/PrintLocaleNumberFormats.java | 19 ++++++ tests/test_jdk4py.py | 59 ++++++++++++++++++ uv.lock | 2 +- 6 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 tests/resources/PrintLocaleNumberFormats.jar create mode 100644 tests/resources/PrintLocaleNumberFormats.java diff --git a/pyproject.toml b/pyproject.toml index 2984326b..a8e8b48b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ keywords = ["jdk", "java", "jvm", "jre"] name = "jdk4py" readme = "README.md" requires-python = ">=3.10" -version = "25.0.2.0" +version = "25.0.2.1" [project.urls] Repository = "https://github.com/activeviam/jdk4py" diff --git a/src/jdk4py/_included_locales.py b/src/jdk4py/_included_locales.py index 97ab8d58..a40365f5 100644 --- a/src/jdk4py/_included_locales.py +++ b/src/jdk4py/_included_locales.py @@ -1,17 +1,32 @@ +# Each region-specific tag is paired with its bare-language parent so that +# `jlink --include-locales` retains the corresponding parent resource bundle +# (e.g. `FormatData_fr.class`). +# See https://github.com/openjdk/jdk/blob/jdk-25%2B35/src/jdk.jlink/share/classes/jdk/tools/jlink/internal/plugins/IncludeLocalesPlugin.java INCLUDED_LOCALES = frozenset( [ + "bn", "bn-IN", + "da", "da-DK", + "de", "de-DE", - "en-US", + "en", "en-GB", + "en-US", + "es", "es-ES", "es-MX", + "fr", "fr-FR", + "it", "it-IT", + "ja", "ja-JP", + "pt", "pt-BR", + "ru", "ru-RU", + "zh", "zh-CN", ], ) diff --git a/tests/resources/PrintLocaleNumberFormats.jar b/tests/resources/PrintLocaleNumberFormats.jar new file mode 100644 index 0000000000000000000000000000000000000000..2907656785a9fa446882a8156a8734a177c800da GIT binary patch literal 1271 zcmWIWW@h1HVBlb2unw3O!+-=h8CV#6T|*poJ^kGD|D9rB2mmS-Vc_84z)&gz)CO1T z>*(j{<{BKL=j+CMVj~xmArHfay%*|FtZ?qO@-(+ScT!mW`;BgajM>VW?EmBL{}9Xh zqxkxvYnG1gK|{Gcv!`shEVb@F%kuw6jNd$qsW5Ijtrh(|@cTX86S_XEXN5Fo@|;ci zxbn`gSR=8jo?mKvJHy^Qn_F%l;LXmlyChRJ8|ah*Aja)j4WMjMYH@yPQF3aren3%X zUWreBa$-)ZUukYqYLQ!hQEp;Mv0idcVsUYA$mt+qM-f|JIp;$k>LgVZIA$~mxz-3e zD4tlXwIXz3!NFC+yADk_S(3HPdFI}y<~zzBONo1Ie83#@%XHlZYh%ZaLOPRYy_@&0 z_`Gdxo`3yr3v2cbEoU{N1TF|%5psDJXFdT zXSljJhqz0Zv0uj7l~}=YD%=b*x{G zbNnK`ljj$9taG41b$Ylz(Kcj_ zr)=e!kuo6(b2x4s)lP5-HylJQBL&bNw}c(Zg%F* zr&&up-eq|h_S`-+F?6?rjQtJs=b>BYh;NJMyCiz%M6J;pPdGCA|GGZoLQ>8a@y)3E%%#yHtAkzyi*?K zD55B~us&VFJ5cBG&NlIN_qBP1>z7Dgo2b*d%V|H$>pc!%R~pvr$qJHv@j1UJdw$Dn`P;|^WBT+X{v8}PRhDlNL`aq`|rJ~ z?dxg(KT#TY`Ryc+W=f!Qd_WE&CQ&i`g7Z&?#x!al2=3B9n#VquPer ze?R};IQ?F2hEmpr%iS7nFLp__`kg4V`X#&NURz~Uw1fHA1vjU0e~+s^a)Uoe=zwtl zGtGH{_q|?;y^K7dwbwOs%0cfOg>T%TT(j`VyV3_t3=D3ppj^Yq#KnNSJOE}LBmmFy zkkWyXNrVAWJ|JgCQ2BrW+CU~;D^h6zvK-k2WQTz=EdtyCGKtN?0p6@^Af3!Wco|6Z Hv4D60#3AtY literal 0 HcmV?d00001 diff --git a/tests/resources/PrintLocaleNumberFormats.java b/tests/resources/PrintLocaleNumberFormats.java new file mode 100644 index 00000000..8cebb931 --- /dev/null +++ b/tests/resources/PrintLocaleNumberFormats.java @@ -0,0 +1,19 @@ +package resources; + +import java.text.DecimalFormatSymbols; +import java.util.Locale; + +public class PrintLocaleNumberFormats { + + public static void main(String[] args) { + for (final Locale locale: DecimalFormatSymbols.getAvailableLocales()) { + final DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); + System.out.println( + locale.toLanguageTag() + + "\t" + ((int) dfs.getDecimalSeparator()) + + "\t" + ((int) dfs.getGroupingSeparator()) + ); + } + } + +} diff --git a/tests/test_jdk4py.py b/tests/test_jdk4py.py index a89f77bf..f3200101 100644 --- a/tests/test_jdk4py.py +++ b/tests/test_jdk4py.py @@ -1,4 +1,5 @@ import re +from dataclasses import dataclass from pathlib import Path from subprocess import run @@ -44,3 +45,61 @@ def test_included_locales() -> None: for locale in completed_process.stdout.strip().splitlines() } assert locales.issuperset(INCLUDED_LOCALES) + + +@dataclass(frozen=True) +class NumberFormattingSeparators: + decimal_separator: str + grouping_separator: str + + +COMMA = "," +DOT = "." +NARROW_NO_BREAK_SPACE = chr(0x202F) +NO_BREAK_SPACE = chr(0x00A0) + + +_NUMBER_FORMATTING_EXPECTED_SEPARATORS = { + "bn-IN": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), + "da-DK": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), + "de-DE": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), + "en-GB": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), + "en-US": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), + "es-ES": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), + "es-MX": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), + "fr-FR": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=NARROW_NO_BREAK_SPACE), + "it-IT": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), + "ja-JP": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), + "pt-BR": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), + "ru-RU": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=NO_BREAK_SPACE), +} + + +def test_locale_data_is_loaded() -> None: + """ + `jlink --include-locales=fr-FR` does not retain `FormatData_fr.class`, + so `DecimalFormatSymbols.getInstance(Locale.forLanguageTag("fr-FR"))` + silently returns ROOT symbols. + + `INCLUDED_LOCALES` must therefore list each region tag alongside its bare-language parent. + """ + completed_process = run( # noqa: S603 + [JAVA, "-jar", _TEST_RESOURCES_DIRECTORY / "PrintLocaleNumberFormats.jar"], + capture_output=True, + check=True, + text=True, + ) + actual = {} + for line in completed_process.stdout.strip().splitlines(): + tag, decimal_cp, grouping_cp = line.split("\t") + actual[tag] = (chr(int(decimal_cp)), chr(int(grouping_cp))) + + mismatches = { + tag: {"expected": expected, "actual": actual.get(tag)} + for tag, expected in _NUMBER_FORMATTING_EXPECTED_SEPARATORS.items() + if actual.get(tag) != expected + } + assert not mismatches, ( + f"CLDR data not loaded for these locales (likely a missing " + f"parent-language bundle): {mismatches}" + ) diff --git a/uv.lock b/uv.lock index 2f3d3793..7f6a0583 100644 --- a/uv.lock +++ b/uv.lock @@ -34,7 +34,7 @@ wheels = [ [[package]] name = "jdk4py" -version = "21.0.8.2" +version = "25.0.2.1" source = { editable = "." } [package.dev-dependencies] From 8905d36fbca51420e55566f40433ef9f680fd5af Mon Sep 17 00:00:00 2001 From: nec Date: Wed, 22 Apr 2026 10:46:52 +0200 Subject: [PATCH 2/4] Ruff --- tests/test_jdk4py.py | 53 +++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/tests/test_jdk4py.py b/tests/test_jdk4py.py index f3200101..00f4135a 100644 --- a/tests/test_jdk4py.py +++ b/tests/test_jdk4py.py @@ -60,28 +60,55 @@ class NumberFormattingSeparators: _NUMBER_FORMATTING_EXPECTED_SEPARATORS = { - "bn-IN": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), - "da-DK": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), - "de-DE": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), - "en-GB": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), - "en-US": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), - "es-ES": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), - "es-MX": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), - "fr-FR": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=NARROW_NO_BREAK_SPACE), - "it-IT": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), - "ja-JP": NumberFormattingSeparators(decimal_separator=DOT, grouping_separator=COMMA), - "pt-BR": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=DOT), - "ru-RU": NumberFormattingSeparators(decimal_separator=COMMA, grouping_separator=NO_BREAK_SPACE), + "bn-IN": NumberFormattingSeparators( + decimal_separator=DOT, grouping_separator=COMMA + ), + "da-DK": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=DOT + ), + "de-DE": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=DOT + ), + "en-GB": NumberFormattingSeparators( + decimal_separator=DOT, grouping_separator=COMMA + ), + "en-US": NumberFormattingSeparators( + decimal_separator=DOT, grouping_separator=COMMA + ), + "es-ES": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=DOT + ), + "es-MX": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=DOT + ), + "fr-FR": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=NARROW_NO_BREAK_SPACE + ), + "it-IT": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=DOT + ), + "ja-JP": NumberFormattingSeparators( + decimal_separator=DOT, grouping_separator=COMMA + ), + "pt-BR": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=DOT + ), + "ru-RU": NumberFormattingSeparators( + decimal_separator=COMMA, grouping_separator=NO_BREAK_SPACE + ), } def test_locale_data_is_loaded() -> None: """ + Check that each included locale uses its expected number formatting separators. + `jlink --include-locales=fr-FR` does not retain `FormatData_fr.class`, so `DecimalFormatSymbols.getInstance(Locale.forLanguageTag("fr-FR"))` silently returns ROOT symbols. - `INCLUDED_LOCALES` must therefore list each region tag alongside its bare-language parent. + `INCLUDED_LOCALES` must therefore list each region tag alongside its + bare-language parent. """ completed_process = run( # noqa: S603 [JAVA, "-jar", _TEST_RESOURCES_DIRECTORY / "PrintLocaleNumberFormats.jar"], From 7657963fabf517a85ad1a29735de1e31bde5fdb5 Mon Sep 17 00:00:00 2001 From: nec Date: Wed, 22 Apr 2026 10:54:21 +0200 Subject: [PATCH 3/4] Fix test --- tests/test_jdk4py.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_jdk4py.py b/tests/test_jdk4py.py index 00f4135a..46971e02 100644 --- a/tests/test_jdk4py.py +++ b/tests/test_jdk4py.py @@ -79,7 +79,7 @@ class NumberFormattingSeparators: decimal_separator=COMMA, grouping_separator=DOT ), "es-MX": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=DOT + decimal_separator=DOT, grouping_separator=COMMA ), "fr-FR": NumberFormattingSeparators( decimal_separator=COMMA, grouping_separator=NARROW_NO_BREAK_SPACE @@ -119,7 +119,10 @@ def test_locale_data_is_loaded() -> None: actual = {} for line in completed_process.stdout.strip().splitlines(): tag, decimal_cp, grouping_cp = line.split("\t") - actual[tag] = (chr(int(decimal_cp)), chr(int(grouping_cp))) + actual[tag] = NumberFormattingSeparators( + decimal_separator=chr(int(decimal_cp)), + grouping_separator=chr(int(grouping_cp)), + ) mismatches = { tag: {"expected": expected, "actual": actual.get(tag)} From 66fe90c414e4ffdd3484146fe1713b861849e4b9 Mon Sep 17 00:00:00 2001 From: Thibault Derousseaux <6574550+tibdex@users.noreply.github.com> Date: Wed, 22 Apr 2026 15:59:53 -0400 Subject: [PATCH 4/4] Polish --- pyproject.toml | 1 + src/jdk4py/_included_locales.py | 57 ++++++++-------- tests/test_jdk4py.py | 116 ++++++++++++++------------------ 3 files changed, 78 insertions(+), 96 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a8e8b48b..6e7a727c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ ignore = [ "D203", "D211", "D212", + "E501", "EM102", "ISC001", "S101", diff --git a/src/jdk4py/_included_locales.py b/src/jdk4py/_included_locales.py index a40365f5..f1aa0aae 100644 --- a/src/jdk4py/_included_locales.py +++ b/src/jdk4py/_included_locales.py @@ -1,32 +1,29 @@ -# Each region-specific tag is paired with its bare-language parent so that -# `jlink --include-locales` retains the corresponding parent resource bundle -# (e.g. `FormatData_fr.class`). -# See https://github.com/openjdk/jdk/blob/jdk-25%2B35/src/jdk.jlink/share/classes/jdk/tools/jlink/internal/plugins/IncludeLocalesPlugin.java +from collections.abc import Collection +from typing import Literal, TypeAlias, get_args + +_RegionSpecificLocale: TypeAlias = Literal[ + "bn-IN", + "da-DK", + "de-DE", + "en-GB", + "en-US", + "es-ES", + "es-MX", + "fr-FR", + "it-IT", + "ja-JP", + "pt-BR", + "ru-RU", + "zh-CN", +] +_REGION_SPECIFIC_LOCALES: Collection[str] = get_args(_RegionSpecificLocale) + INCLUDED_LOCALES = frozenset( - [ - "bn", - "bn-IN", - "da", - "da-DK", - "de", - "de-DE", - "en", - "en-GB", - "en-US", - "es", - "es-ES", - "es-MX", - "fr", - "fr-FR", - "it", - "it-IT", - "ja", - "ja-JP", - "pt", - "pt-BR", - "ru", - "ru-RU", - "zh", - "zh-CN", - ], + { + locale + for tag in _REGION_SPECIFIC_LOCALES + # Expand each region-specific locale to also include its language-only locale so that `jlink --include-locales` retains the language-only locale bundles (e.g. `FormatData_fr.class`). + # See https://github.com/openjdk/jdk/blob/jdk-25%2B35/src/jdk.jlink/share/classes/jdk/tools/jlink/internal/plugins/IncludeLocalesPlugin.java. + for locale in (tag, tag.split("-", maxsplit=1)[0]) + }, ) diff --git a/tests/test_jdk4py.py b/tests/test_jdk4py.py index 46971e02..4bd5e2a3 100644 --- a/tests/test_jdk4py.py +++ b/tests/test_jdk4py.py @@ -2,9 +2,10 @@ from dataclasses import dataclass from pathlib import Path from subprocess import run +from typing import get_args from jdk4py import JAVA, JAVA_VERSION -from jdk4py._included_locales import INCLUDED_LOCALES +from jdk4py._included_locales import INCLUDED_LOCALES, _RegionSpecificLocale _TEST_RESOURCES_DIRECTORY = Path(__file__).parent / "resources" @@ -44,92 +45,75 @@ def test_included_locales() -> None: locale.replace("_", "-") for locale in completed_process.stdout.strip().splitlines() } - assert locales.issuperset(INCLUDED_LOCALES) + assert locales >= INCLUDED_LOCALES @dataclass(frozen=True) -class NumberFormattingSeparators: +class _NumberFormattingSeparators: decimal_separator: str grouping_separator: str -COMMA = "," -DOT = "." -NARROW_NO_BREAK_SPACE = chr(0x202F) -NO_BREAK_SPACE = chr(0x00A0) - - -_NUMBER_FORMATTING_EXPECTED_SEPARATORS = { - "bn-IN": NumberFormattingSeparators( - decimal_separator=DOT, grouping_separator=COMMA - ), - "da-DK": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=DOT - ), - "de-DE": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=DOT - ), - "en-GB": NumberFormattingSeparators( - decimal_separator=DOT, grouping_separator=COMMA - ), - "en-US": NumberFormattingSeparators( - decimal_separator=DOT, grouping_separator=COMMA - ), - "es-ES": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=DOT - ), - "es-MX": NumberFormattingSeparators( - decimal_separator=DOT, grouping_separator=COMMA - ), - "fr-FR": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=NARROW_NO_BREAK_SPACE - ), - "it-IT": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=DOT - ), - "ja-JP": NumberFormattingSeparators( - decimal_separator=DOT, grouping_separator=COMMA - ), - "pt-BR": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=DOT - ), - "ru-RU": NumberFormattingSeparators( - decimal_separator=COMMA, grouping_separator=NO_BREAK_SPACE - ), -} - - -def test_locale_data_is_loaded() -> None: +_COMMA = "," +_DOT = "." +_NARROW_NO_BREAK_SPACE = chr(0x202F) +_NO_BREAK_SPACE = chr(0x00A0) + + +def _get_number_formatting_separators( + locale: _RegionSpecificLocale, / +) -> _NumberFormattingSeparators: + match locale: + case "bn-IN" | "en-GB" | "en-US" | "es-MX" | "ja-JP" | "zh-CN": + return _NumberFormattingSeparators( + decimal_separator=_DOT, grouping_separator=_COMMA + ) + case "da-DK" | "de-DE" | "es-ES" | "it-IT" | "pt-BR": + return _NumberFormattingSeparators( + decimal_separator=_COMMA, grouping_separator=_DOT + ) + case "fr-FR": + return _NumberFormattingSeparators( + decimal_separator=_COMMA, + grouping_separator=_NARROW_NO_BREAK_SPACE, + ) + case "ru-RU": + return _NumberFormattingSeparators( + decimal_separator=_COMMA, grouping_separator=_NO_BREAK_SPACE + ) + + +def test_locale_data_inclusion() -> None: """ Check that each included locale uses its expected number formatting separators. - `jlink --include-locales=fr-FR` does not retain `FormatData_fr.class`, - so `DecimalFormatSymbols.getInstance(Locale.forLanguageTag("fr-FR"))` + ``jlink --include-locales=fr-FR`` does not retain ``FormatData_fr.class``, + so ``DecimalFormatSymbols.getInstance(Locale.forLanguageTag("fr-FR"))`` silently returns ROOT symbols. - `INCLUDED_LOCALES` must therefore list each region tag alongside its - bare-language parent. + ``INCLUDED_LOCALES`` must therefore list each region-specific locale alongside its language-only parent. """ + expected = { + locale: _get_number_formatting_separators(locale) + for locale in get_args(_RegionSpecificLocale) + } + completed_process = run( # noqa: S603 [JAVA, "-jar", _TEST_RESOURCES_DIRECTORY / "PrintLocaleNumberFormats.jar"], capture_output=True, check=True, text=True, ) - actual = {} - for line in completed_process.stdout.strip().splitlines(): - tag, decimal_cp, grouping_cp = line.split("\t") - actual[tag] = NumberFormattingSeparators( + actual = { + locale: _NumberFormattingSeparators( decimal_separator=chr(int(decimal_cp)), grouping_separator=chr(int(grouping_cp)), ) - - mismatches = { - tag: {"expected": expected, "actual": actual.get(tag)} - for tag, expected in _NUMBER_FORMATTING_EXPECTED_SEPARATORS.items() - if actual.get(tag) != expected + for line in completed_process.stdout.strip().splitlines() + for locale, decimal_cp, grouping_cp in [line.split("\t")] + if locale in expected } - assert not mismatches, ( - f"CLDR data not loaded for these locales (likely a missing " - f"parent-language bundle): {mismatches}" + + assert actual == expected, ( + "CLDR data not loaded for some locales (likely a missing language-only bundle)" )