From cc912ccd0f94341c95ce3964a1ef71596f92606e Mon Sep 17 00:00:00 2001 From: Jason Trill Date: Wed, 6 May 2026 20:24:44 -0700 Subject: [PATCH] Print chars from Unicode 15.1-17.0 Go 1.26 (current stable) still ships Unicode 15.0.0, so unicode.IsPrint() does not recognize blocks added in later Unicode releases. Whitelist them explicitly so they render rather than show as '?'. Go's master branch has bumped to Unicode 17.0, so we can delete this table when that's released. 15.1: CJK Ext I. 16.0: Todhri, Garay, Tulu-Tigalari, Sunuwar, Egyptian Hieroglyphs Ext-A, Gurung Khema, Kirat Rai, Symbols for Legacy Computing Supplement, Ol Onal. 17.0: Sidetic, Sharada Supplement, Tolong Siki, Beria Erfe, Tangut Components Supplement, Misc Symbols Supplement, Tai Yo, CJK Ext J. --- twin/styledRune.go | 38 +++++++++++++++++ twin/styledRune_test.go | 95 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/twin/styledRune.go b/twin/styledRune.go index 87106646..617b8e7f 100644 --- a/twin/styledRune.go +++ b/twin/styledRune.go @@ -2,6 +2,7 @@ package twin import ( "fmt" + "sort" "unicode" "github.com/rivo/uniseg" @@ -66,6 +67,36 @@ func TrimSpaceLeft(runes []StyledRune) []StyledRune { return []StyledRune{} } +// Blocks added in Unicode 15.1 (2023), 16.0 (2024), and 17.0 (2025). Go's +// unicode package lags behind the latest Unicode release (15.0.0 as of Go +// 1.25), so unicode.IsPrint() does not yet recognize these. We let the +// terminal render any unassigned code points within these blocks as tofu +// rather than mask real characters with '?'. +// +// Must be sorted by `lo` ascending; entries must not overlap. Binary search +// (sort.Search below) relies on this invariant. +var unicodePost15PrintableRanges = []struct { + lo, hi rune +}{ + {0x105C0, 0x105FF}, // Todhri (16.0) + {0x10940, 0x1095F}, // Sidetic (17.0) + {0x10D40, 0x10D8F}, // Garay (16.0) + {0x11380, 0x113FF}, // Tulu-Tigalari (16.0) + {0x11B60, 0x11B7F}, // Sharada Supplement (17.0) + {0x11BC0, 0x11BFF}, // Sunuwar (16.0) + {0x11DB0, 0x11DEF}, // Tolong Siki (17.0) + {0x13460, 0x143FF}, // Egyptian Hieroglyphs Extended-A (16.0) + {0x16100, 0x1613F}, // Gurung Khema (16.0) + {0x16D40, 0x16D7F}, // Kirat Rai (16.0) + {0x16EA0, 0x16EDF}, // Beria Erfe (17.0) + {0x18D80, 0x18DFF}, // Tangut Components Supplement (17.0) + {0x1CC00, 0x1CEFF}, // Symbols for Legacy Computing Supplement (16.0) + Misc Symbols Supplement (17.0) + {0x1E5D0, 0x1E5FF}, // Ol Onal (16.0) + {0x1E6C0, 0x1E6FF}, // Tai Yo (17.0) + {0x2EBF0, 0x2EE5F}, // CJK Unified Ideographs Extension I (15.1) + {0x323B0, 0x3347F}, // CJK Unified Ideographs Extension J (17.0) +} + func Printable(char rune) bool { if unicode.IsPrint(char) { return true @@ -88,5 +119,12 @@ func Printable(char rune) bool { return true } + i := sort.Search(len(unicodePost15PrintableRanges), func(i int) bool { + return unicodePost15PrintableRanges[i].lo > char + }) + if i > 0 && char <= unicodePost15PrintableRanges[i-1].hi { + return true + } + return false } diff --git a/twin/styledRune_test.go b/twin/styledRune_test.go index 43befc5e..54d6fe0f 100644 --- a/twin/styledRune_test.go +++ b/twin/styledRune_test.go @@ -41,3 +41,98 @@ func TestRuneWidth(t *testing.T) { assert.Equal(t, NewStyledRune('x', Style{}).Width(), 1) assert.Equal(t, NewStyledRune('午', Style{}).Width(), 2) } + +// Go's unicode tables (15.0.0 as of Go 1.25) lag behind the latest Unicode +// release. These are blocks added in Unicode 15.1 and 16.0 that +// unicode.IsPrint() does not yet recognize. +func TestPrintableUnicodePost15(t *testing.T) { + cases := []struct { + name string + r rune + }{ + // Unicode 15.1 (2023) + {"CJK Ext I start", 0x2EBF0}, + {"CJK Ext I end", 0x2EE5F}, + + // Unicode 16.0 (2024) + {"Todhri start", 0x105C0}, + {"Todhri end", 0x105F3}, + {"Garay start", 0x10D40}, + {"Garay end", 0x10D8E}, + {"Tulu-Tigalari start", 0x11380}, + {"Tulu-Tigalari end", 0x113D5}, + {"Sunuwar start", 0x11BC0}, + {"Sunuwar end", 0x11BF2}, + {"Egyptian Hieroglyphs Ext-A start", 0x13460}, + {"Egyptian Hieroglyphs Ext-A end", 0x143FA}, + {"Gurung Khema start", 0x16100}, + {"Gurung Khema end", 0x16139}, + {"Kirat Rai start", 0x16D40}, + {"Kirat Rai end", 0x16D79}, + {"Legacy Computing Supplement start", 0x1CC00}, + {"Large Type Piece (used by jj)", 0x1CE1A}, + {"Large Type Piece end", 0x1CE50}, + {"Legacy Computing Supplement end", 0x1CEBF}, + {"Ol Onal start", 0x1E5D0}, + {"Ol Onal end", 0x1E5FA}, + + // Unicode 17.0 (2025) + {"Sidetic start", 0x10940}, + {"Sidetic end", 0x1095F}, + {"Sharada Supplement start", 0x11B60}, + {"Sharada Supplement end", 0x11B7F}, + {"Tolong Siki start", 0x11DB0}, + {"Tolong Siki end", 0x11DEF}, + {"Beria Erfe start", 0x16EA0}, + {"Beria Erfe end", 0x16EDF}, + {"Tangut Components Supplement start", 0x18D80}, + {"Tangut Components Supplement end", 0x18DFF}, + {"Misc Symbols Supplement start", 0x1CEC0}, + {"Misc Symbols Supplement end", 0x1CEFF}, + {"Tai Yo start", 0x1E6C0}, + {"Tai Yo end", 0x1E6FF}, + {"CJK Ext J start", 0x323B0}, + {"CJK Ext J end", 0x3347F}, + } + + for _, tc := range cases { + assert.Assert(t, Printable(tc.r), + "expected U+%04X (%s) to be printable", tc.r, tc.name) + } +} + +// Mix of ASCII (the dominant case in real input), CJK, an emoji, an +// unprintable control char, and a Unicode 16+ rune that exercises the new +// range table. +// Binary search in Printable() depends on the table being sorted by `lo` +// with no overlaps. Catch ordering mistakes that the existing membership +// tests can miss (sort.Search returns 0 for an out-of-place leading entry, +// which silently misses lookups). +func TestUnicodePost15PrintableRangesSorted(t *testing.T) { + prevHi := rune(-1) + for _, r := range unicodePost15PrintableRanges { + assert.Assert(t, r.lo > prevHi, + "range %X..%X overlaps or is out of order with previous (hi=%X)", + r.lo, r.hi, prevHi) + assert.Assert(t, r.lo <= r.hi, + "range %X..%X has lo > hi", r.lo, r.hi) + prevHi = r.hi + } +} + +var benchPrintableInput = []rune{ + 'a', 'b', 'c', ' ', '1', '\t', '\n', // ASCII / common + '午', // CJK + '🚀', // emoji + 0x07, // BEL — unprintable + 0xa0, // NBSP + 0x1CE1A, // Large Type Piece (Unicode 16, only printable via the new table) +} + +func BenchmarkPrintable(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, r := range benchPrintableInput { + _ = Printable(r) + } + } +}