From 19be55d3303d3d97ab8fd4c4418eb97700a06dd3 Mon Sep 17 00:00:00 2001 From: ubparviz Date: Sun, 25 May 2025 01:58:48 +0500 Subject: [PATCH 1/2] Add Uzbek language support --- README.rst | 1 + num2words/__init__.py | 3 +- num2words/lang_UZ.py | 133 ++++++++++++++++++++++++++++++++++++++++++ tests/test_uz.py | 129 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 num2words/lang_UZ.py create mode 100644 tests/test_uz.py diff --git a/README.rst b/README.rst index 953d5b77..335a8287 100644 --- a/README.rst +++ b/README.rst @@ -132,6 +132,7 @@ Besides the numerical argument, there are two main optional arguments, ``to:`` a * ``tr`` (Turkish) * ``th`` (Thai) * ``uk`` (Ukrainian) +* ``uz`` (Uzbek) * ``vi`` (Vietnamese) * ``zh`` (Chinese - Traditional) * ``zh_CN`` (Chinese - Simplified / Mainland China) diff --git a/num2words/__init__.py b/num2words/__init__.py index 1f660eff..82f58009 100644 --- a/num2words/__init__.py +++ b/num2words/__init__.py @@ -25,7 +25,7 @@ lang_ID, lang_IS, lang_IT, lang_JA, lang_KN, lang_KO, lang_KZ, lang_LT, lang_LV, lang_MN, lang_NL, lang_NO, lang_PL, lang_PT, lang_PT_BR, lang_RO, lang_RU, lang_SK, lang_SL, lang_SR, - lang_SV, lang_TE, lang_TET, lang_TG, lang_TH, lang_TR, lang_UK, + lang_SV, lang_TE, lang_TET, lang_TG, lang_TH, lang_TR, lang_UK, lang_UZ, lang_VI, lang_ZH, lang_ZH_CN, lang_ZH_HK, lang_ZH_TW) CONVERTER_CLASSES = { @@ -82,6 +82,7 @@ 'tr': lang_TR.Num2Word_TR(), 'nl': lang_NL.Num2Word_NL(), 'uk': lang_UK.Num2Word_UK(), + 'uz': lang_UZ.Num2Word_UZ(), 'te': lang_TE.Num2Word_TE(), 'tet': lang_TET.Num2Word_TET(), 'hu': lang_HU.Num2Word_HU(), diff --git a/num2words/lang_UZ.py b/num2words/lang_UZ.py new file mode 100644 index 00000000..eee289cd --- /dev/null +++ b/num2words/lang_UZ.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2003, Taro Ogawa. All Rights Reserved. +# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import unicode_literals + +from .base import Num2Word_Base +from .utils import get_digits, splitbyx + +ZERO = 'nol' + +ONES = { + 1: 'bir', + 2: 'ikki', + 3: 'uch', + 4: 'toʻrt', + 5: 'besh', + 6: 'olti', + 7: 'yetti', + 8: 'sakkiz', + 9: 'toʻqqiz', +} + +TEN = 'oʻn' + +TWENTIES = { + 2: 'yigirma', + 3: 'oʻttiz', + 4: 'qirq', + 5: 'ellik', + 6: 'oltmish', + 7: 'yetmish', + 8: 'sakson', + 9: 'toʻqson', +} +HUNDRED = 'bir yuz' + +THOUSANDS = { + 1: 'ming', + 2: 'million', + 3: 'milliard', + 4: 'trillion', + 5: 'kvadrillion', + 6: 'kvintillion', + 7: 'sextillion', + 8: 'septillion', + 9: 'oktilion', + 10: 'nonillion', +} + + +class Num2Word_UZ(Num2Word_Base): + CURRENCY_FORMS = { + 'USD': ('dollar', 'sent'), + 'UZS': ('soʻm', 'tiyin'), + 'RUB': ('rubl', 'kopek'), + 'EUR': ('yevro', 'sent'), + 'GBP': ('funt', 'pens'), + } + + def setup(self): + self.negword = "minus" + self.pointword = "butun" + + def to_cardinal(self, number): + n = str(number).replace(',', '.') + if '.' in n: + left, right = n.split('.') + leading_zero_count = len(right) - len(right.lstrip('0')) + return u'%s %s %s' % ( + self._int2word(int(left)), + self.pointword, + (ZERO + ' ') * leading_zero_count + self._int2word(int(right)) + ) + else: + return self._int2word(int(n)) + + def pluralize(self, n, form): + return form + + def _cents_verbose(self, number, currency): + return self._int2word(number, currency == 'UZS') + + def _int2word(self, n, feminine=False): + if n < 0: + return ' '.join([self.negword, self._int2word(abs(n))]) + if n == 0: + return ZERO + + words = [] + chunks = list(splitbyx(str(n), 3)) + i = len(chunks) + for x in chunks: + i -= 1 + if x == 0: + continue + + n1, n2, n3 = get_digits(x) + + if n3 > 0: + if n3 == 1: + words.append(HUNDRED) + if n3 > 1: + words.append(ONES[n3] + ' yuz') + + if n2 == 1: + words.append(TEN) + elif n2 > 1: + words.append(TWENTIES[n2]) + + if n1 > 0: + words.append(ONES[n1]) + + if i > 0: + words.append(THOUSANDS[i]) + + return ' '.join(words) + + def to_ordinal(self, number): + raise NotImplementedError() \ No newline at end of file diff --git a/tests/test_uz.py b/tests/test_uz.py new file mode 100644 index 00000000..fc3aea60 --- /dev/null +++ b/tests/test_uz.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2003, Taro Ogawa. All Rights Reserved. +# Copyright (c) 2013, Savoir-faire Linux inc. All Rights Reserved. + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301 USA + +from __future__ import unicode_literals + +from unittest import TestCase + +from num2words import num2words + +TEST_CASES_CARDINAL = ( + (1, "bir"), + (2, "ikki"), + (3, "uch"), + (4, "toʻrt"), + (5, "besh"), + (6, "olti"), + (7, "yetti"), + (8, "sakkiz"), + (9, "toʻqqiz"), + (10, "oʻn"), + (10.01, "oʻn butun nol bir"), + (11, "oʻn bir"), + (12, "oʻn ikki"), + (12.50, "oʻn ikki butun besh"), + (13, "oʻn uch"), + (14, "oʻn toʻrt"), + (14.13, "oʻn toʻrt butun oʻn uch"), + (15, "oʻn besh"), + (16, "oʻn olti"), + (17, "oʻn yetti"), + (17.31, "oʻn yetti butun oʻttiz bir"), + (18, "oʻn sakkiz"), + (19, "oʻn toʻqqiz"), + (20, "yigirma"), + (21, "yigirma bir"), + (21.20, "yigirma bir butun ikki"), + (30, "oʻttiz"), + (32, "oʻttiz ikki"), + (40, "qirq"), + (43, "qirq uch"), + (43.007, "qirq uch butun nol nol yetti"), + (50, "ellik"), + (54, "ellik toʻrt"), + (60, "oltmish"), + (60.059, "oltmish butun nol ellik toʻqqiz"), + (65, "oltmish besh"), + (70, "yetmish"), + (76, "yetmish olti"), + (80, "sakson"), + (87, "sakson yetti"), + (90, "toʻqson"), + (98, "toʻqson sakkiz"), + (99, "toʻqson toʻqqiz"), + (100, "bir yuz"), + (101, "bir yuz bir"), + (199, "bir yuz toʻqson toʻqqiz"), + (200, "ikki yuz"), + (203, "ikki yuz uch"), + (300, "uch yuz"), + (356, "uch yuz ellik olti"), + (400, "toʻrt yuz"), + (434, "toʻrt yuz oʻttiz toʻrt"), + (500, "besh yuz"), + (578, "besh yuz yetmish sakkiz"), + (600, "olti yuz"), + (689, "olti yuz sakson toʻqqiz"), + (700, "yettiyuz"), + (729, "yetti yuz yigirma toʻqqiz"), + (800, "sakkiz yuz"), + (894, "sakkiz yuz toʻqson toʻrt"), + (900, "toʻqqiz yuz"), + (999, "toʻqqiz yuz toʻqson toʻqqiz"), + (1000, "bir ming"), + (1001, "bir ming bir"), + (2012, "ikki ming oʻn ikki"), + (2025, "ikki ming yigirma besh"), + (1234, "bir ming ikki yuz oʻttiz toʻrt"), + (12345.65, "oʻn ikki ming uch yuz qirq besh butun oltmish besh"), + (-260000, "minus ikki yuz oltmish ming"), + (777777, "yetti yuz yetmish yetti ming yetti yuz yetmish yetti"), + (999999, "toʻqqiz yuz toʻqson toʻqqiz ming toʻqqiz yuz toʻqson toʻqqiz"), + (1000000, "bir million"), + (1000000000, "bir milliard"), + (1234567890, "bir milliard ikki yuz oʻttiz toʻrt million besh yuz oltmish yetti ming sakkiz yuz toʻqson"), + (1000000000000000, "bir kvadrillion"), + (1000000000000000000, "bir kvintillion"), + (1000000000000000000000, "bir sextillion"), + (1000000000000000000000000, "bir septillion"), + (1000000000000000000000000000, "bir oktilion"), + (1000000000000000000000000000000, "bir nonillion"), + (215461407892039002157189883901676, + "ikki yuz oʻn besh nonillion toʻrt yuz oltmish bir oktilion toʻrt yuz " + "yetti septillion sakkiz yuz toʻqson ikki sextillion oʻttiz toʻqqiz kvintillion " + "ikki kvadrillion bir yuz ellik yetti trillion bir yuz sakson toʻqqiz milliard sakkiz " + "yuz sakson uch million toʻqqiz yuz bir ming olti yuz yetmish olti"), + (719094234693663034822824384220291, + "yetti yuz oʻn toʻqqiz nonillion toʻqson toʻrt oktilion ikki yuz oʻttiz toʻrt septillion " + "olti yuz toʻqson uch sextillion olti yuz oltmish uch kvintillion oʻttiz toʻrt kvadrillion " + "sakkiz yuz yigirma ikki trillion sakkiz yuz yigirma toʻrt milliard uch yuz sakson toʻrt " + "million ikki yuz yigirma ming ikki yuz toʻqson bir"), +) +TEST_CASES_TO_CURRENCY_UZS = ( + (0.00, "nol soʻm, nol tiyin"), + (1.00, "bir soʻm, nol tiyin"), + (2.00, "ikki soʻm, nol tiyin"), + (5.05, "besh soʻm, nol besh tiyin"), + (7.77, "yetti soʻm, yetmish yetti tiyin"), + (10.01, "oʻn soʻm, bir tiyin"), + (100.0, "bir yuz soʻm, nol tiyin"), + (1000.0, "ming soʻm, nol tiyin"), + (10000.99, "oʻn ming soʻm, toʻqson toʻqqiz tiyin"), + (200_000.0, "ikki yuz ming soʻm, nol tiyin"), + (10_000_000.0, "oʻn million soʻm, nol tiyin"), + (10222.0, "oʻn ming ikki yuz yigirma ikki soʻm, nol tiyin"), +) \ No newline at end of file From 63cf6e97c7afeff1be7e92b94fe9015c0db6a178 Mon Sep 17 00:00:00 2001 From: ubparviz Date: Sun, 25 May 2025 05:09:37 +0500 Subject: [PATCH 2/2] Add ordinal number support for Uzbek language --- num2words/lang_UZ.py | 81 ++++++++++++++++++++++++++++++++++++++++---- tests/test_uz.py | 80 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 153 insertions(+), 8 deletions(-) diff --git a/num2words/lang_UZ.py b/num2words/lang_UZ.py index eee289cd..2ce658e1 100644 --- a/num2words/lang_UZ.py +++ b/num2words/lang_UZ.py @@ -64,11 +64,29 @@ class Num2Word_UZ(Num2Word_Base): CURRENCY_FORMS = { - 'USD': ('dollar', 'sent'), - 'UZS': ('soʻm', 'tiyin'), - 'RUB': ('rubl', 'kopek'), - 'EUR': ('yevro', 'sent'), - 'GBP': ('funt', 'pens'), + 'USD': ('dollar', 'sent'), # AQSh (Amerika Qo‘shma Shtatlari) + 'UZS': ('soʻm', 'tiyin'), # Oʻzbekiston + 'RUB': ('rubl', 'kopek'), # Rossiya + 'EUR': ('yevro', 'sent'), # Yevro hududi (Yevropa Ittifoqi davlatlari) + 'GBP': ('funt', 'pens'), # Buyuk Britaniya + 'KZT': ('tenge', 'tiyin'), # Qozogʻiston + 'TJS': ('somoniy', 'diram'), # Tojikiston + 'CNY': ('yuan', 'feni'), # Xitoy + 'JPY': ('yen', 'sen'), # Yaponiya + 'KRW': ('von', 'chon'), # Janubiy Koreya + 'TRY': ('lira', 'kuruş'), # Turkiya + 'INR': ('rupiy', 'paysa'), # Hindiston + 'SAR': ('riyal', 'halala'), # Saudiya Arabistoni + 'CHF': ('frank', 'rappen'), # Shveytsariya + 'AFN': ('afgʻoniy', 'pul'), # Afgʻoniston + 'BRL': ('real', 'sentavo'), # Braziliya + 'IRR': ('rial', 'dinor'), # Eron + 'IDR': ('rupiya', 'sen'), # Indoneziya + 'TMT': ('manat', 'tenge'), # Turkmaniston + 'KGS': ('som', 'tiyin'), # Qirgʻiziston + 'EGP': ('misr funti', 'piastr'), # Misr + 'AED': ('dirham', 'fils'), # Birlashgan Arab Amirliklari (BAA) + 'KWD': ('quvayt dinori', 'fils'), # Quvayt } def setup(self): @@ -130,4 +148,55 @@ def _int2word(self, n, feminine=False): return ' '.join(words) def to_ordinal(self, number): - raise NotImplementedError() \ No newline at end of file + """ + Convert number to ordinal word in Uzbek. + For example: + 1 -> birinchi + 2 -> ikkinchi + 3 -> uchinchi + 4 -> toʻrtinchi + 10 -> oʻninchi + 21 -> yigirmanchi birinchi (note: complex ordinals may need improvements) + """ + if not isinstance(number, int): + raise ValueError("Ordinal conversion supports integers only") + + if number == 0: + return ZERO # nol is zero, ordinal form is not common + + # Get the cardinal form as list of words + cardinal_words = self._int2word(number).split() + + # Uzbek ordinal suffixes vary depending on last digit. + # We'll handle common base words with special forms: + special_ordinals = { + 'bir': 'birinchi', + 'ikki': 'ikkinchi', + 'uch': 'uchinchi', + 'toʻrt': 'toʻrtinchi', + 'besh': 'beshinchi', + 'olti': 'oltinchi', + 'yetti': 'yettinchi', + 'sakkiz': 'sakkizinchi', + 'toʻqqiz': 'toʻqqizinchi', + 'oʻn': 'oʻninchi', + 'yigirma': 'yigirmanchi', + 'oʻttiz': 'oʻttizinchi', + 'qirq': 'qirqinchi', + 'ellik': 'ellikinchi', + 'oltmish': 'oltmishinchi', + 'yetmish': 'yetmishinchi', + 'sakson': 'saksoninchi', + 'toʻqson': 'toʻqsoninchi', + 'bir yuz': 'bir yuzinchi', + } + + last_word = cardinal_words[-1] + + if last_word in special_ordinals: + cardinal_words[-1] = special_ordinals[last_word] + else: + # For compound words or words not in dictionary, just add 'inchi' + cardinal_words[-1] = last_word + 'inchi' + + return ' '.join(cardinal_words) \ No newline at end of file diff --git a/tests/test_uz.py b/tests/test_uz.py index fc3aea60..e2562aa0 100644 --- a/tests/test_uz.py +++ b/tests/test_uz.py @@ -113,6 +113,7 @@ "sakkiz yuz yigirma ikki trillion sakkiz yuz yigirma toʻrt milliard uch yuz sakson toʻrt " "million ikki yuz yigirma ming ikki yuz toʻqson bir"), ) + TEST_CASES_TO_CURRENCY_UZS = ( (0.00, "nol soʻm, nol tiyin"), (1.00, "bir soʻm, nol tiyin"), @@ -125,5 +126,80 @@ (10000.99, "oʻn ming soʻm, toʻqson toʻqqiz tiyin"), (200_000.0, "ikki yuz ming soʻm, nol tiyin"), (10_000_000.0, "oʻn million soʻm, nol tiyin"), - (10222.0, "oʻn ming ikki yuz yigirma ikki soʻm, nol tiyin"), -) \ No newline at end of file + (1_000_000_000.0, "bir milliard soʻm, nol tiyin"), + (10_000_000_000.0, "oʻn milliard soʻm, nol tiyin"), + (10_101_101_101.101, "oʻn milliard bir yuz bir million bir yuz bir ming bir yuz bir soʻm, oʻn tiyin"), + (77_777_777.77, "yetmish yetti million yetti yuz yetmish yetti ming yetti yuz yetmish yetti soʻm, yetmish yetti tiyin"), +) + +TEST_CASES_ORDINAL = ( + (1, "birinchi"), + (2, "ikkinchi"), + (3, "uchinchi"), + (4, "toʻrtinchi"), + (5, "beshinchi"), + (6, "oltinchi"), + (7, "yettinchi"), + (8, "sakkizinchi"), + (9, "toʻqqizinchi"), + (10, "oʻninchi"), + (11, "oʻn birinchi"), + (12, "oʻn ikkinchi"), + (20, "yigirmanchi"), + (21, "yigirma birinchi"), + (30, "oʻttizinchi"), + (40, "qirqinchi"), + (50, "ellikinchi"), + (60, "oltmishinchi"), + (70, "yetmishinchi"), + (80, "saksoninchi"), + (90, "toʻqsoninchi"), + (100, "bir yuzinchi"), + (101, "bir yuz birinchi"), + (200, "ikki yuzinchi"), + (1000, "bir minginchi"), + (1001, "bir ming birinchi"), + (1945, "bir ming toʻqqiz yuz qirq beshinchi"), + (1990, "bir ming toʻqqiz yuz toʻqsoninchi"), + (1991, "bir ming toʻqqiz yuz toʻqson birinchi"), + (2019, "ikki ming oʻn toʻqqizinchi"), + (2025, "ikki ming yigirma beshinchi"), + (3333, "uch ming uch yuz oʻttiz uchinchi"), + (3456, "uch ming toʻrt yuz ellik oltinchi"), + (11111, "oʻn bir ming bir yuz oʻn birinchi"), + (222222, "ikki yuz yigirma ikki ming ikki yuz yigirma ikkinchi"), + (1000000, "bir millioninchi"), + (1000001, "bir million birinchi"), + (1001001, "bir million bir ming birinchi"), + (1101010, "bir million bir yuz oʻn ming oʻninchi"), + (2002002, "ikki million ikki ming ikkinchi"), + (70707070, "yetmish million yetti yuz yetti ming yetmishinchi"), + (777777777, + "yetti yuz yetmish yetti million yetti yuz " + "yetmish yetti ming yetti yuz yetmish yettinchi"), + (1234567890, + "bir milliard ikki yuz oʻttiz toʻrt million " + "besh yuz oltmish yetti ming sakkiz yuz toʻqsoninchi"), + (99999999999999999, + "toʻqqiz yuz toʻqson toʻqqiz trillion toʻqqiz yuz " + "toʻqson toʻqqiz milliard toʻqqiz yuz toʻqson toʻqqiz million " + "toʻqqiz yuz toʻqson toʻqqiz ming toʻqqiz yuz toʻqson toʻqqizinchi"), + (12345678900987654321, + "oʻn ikki kvadrillion uch yuz qirq besh trillion " + "yett yuz sakson yetti milliard toʻqqiz yuz toʻqqiz million " + "sakson yetti ming toʻrt yuz oʻttiz birinchi"), +) + +class TestNum2WordsUZ(TestCase): + + def test_cardinal(self): + for number, expected in TEST_CASES_CARDINAL: + self.assertEqual(num2words(number, lang='uz'), expected) + + def test_currency(self): + for number, expected in TEST_CASES_TO_CURRENCY_UZS: + self.assertEqual(num2words(number, lang='uz', to='currency'), expected) + + def test_ordinal(self): + for number, expected in TEST_CASES_ORDINAL: + self.assertEqual(num2words(number, lang='uz', to='ordinal'), expected) \ No newline at end of file