Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions twin/styledRune.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package twin

import (
"fmt"
"sort"
"unicode"

"github.com/rivo/uniseg"
Expand Down Expand Up @@ -66,6 +67,36 @@ func TrimSpaceLeft(runes []StyledRune) []StyledRune {
return []StyledRune{}
}

// Blocks added in Unicode 15.1 (2023), 16.0 (2024), and 17.0 (2025). Go's
// unicode package lags behind the latest Unicode release (15.0.0 as of Go
// 1.25), so unicode.IsPrint() does not yet recognize these. We let the
// terminal render any unassigned code points within these blocks as tofu
// rather than mask real characters with '?'.
//
// Must be sorted by `lo` ascending; entries must not overlap. Binary search
// (sort.Search below) relies on this invariant.
var unicodePost15PrintableRanges = []struct {
lo, hi rune
}{
{0x105C0, 0x105FF}, // Todhri (16.0)
{0x10940, 0x1095F}, // Sidetic (17.0)
{0x10D40, 0x10D8F}, // Garay (16.0)
{0x11380, 0x113FF}, // Tulu-Tigalari (16.0)
{0x11B60, 0x11B7F}, // Sharada Supplement (17.0)
{0x11BC0, 0x11BFF}, // Sunuwar (16.0)
{0x11DB0, 0x11DEF}, // Tolong Siki (17.0)
{0x13460, 0x143FF}, // Egyptian Hieroglyphs Extended-A (16.0)
{0x16100, 0x1613F}, // Gurung Khema (16.0)
{0x16D40, 0x16D7F}, // Kirat Rai (16.0)
{0x16EA0, 0x16EDF}, // Beria Erfe (17.0)
{0x18D80, 0x18DFF}, // Tangut Components Supplement (17.0)
{0x1CC00, 0x1CEFF}, // Symbols for Legacy Computing Supplement (16.0) + Misc Symbols Supplement (17.0)
{0x1E5D0, 0x1E5FF}, // Ol Onal (16.0)
{0x1E6C0, 0x1E6FF}, // Tai Yo (17.0)
{0x2EBF0, 0x2EE5F}, // CJK Unified Ideographs Extension I (15.1)
{0x323B0, 0x3347F}, // CJK Unified Ideographs Extension J (17.0)
}

func Printable(char rune) bool {
if unicode.IsPrint(char) {
return true
Expand All @@ -88,5 +119,12 @@ func Printable(char rune) bool {
return true
}

i := sort.Search(len(unicodePost15PrintableRanges), func(i int) bool {
return unicodePost15PrintableRanges[i].lo > char
})
if i > 0 && char <= unicodePost15PrintableRanges[i-1].hi {
return true
}

return false
}
95 changes: 95 additions & 0 deletions twin/styledRune_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,98 @@ func TestRuneWidth(t *testing.T) {
assert.Equal(t, NewStyledRune('x', Style{}).Width(), 1)
assert.Equal(t, NewStyledRune('午', Style{}).Width(), 2)
}

// Go's unicode tables (15.0.0 as of Go 1.25) lag behind the latest Unicode
// release. These are blocks added in Unicode 15.1 and 16.0 that
// unicode.IsPrint() does not yet recognize.
func TestPrintableUnicodePost15(t *testing.T) {
cases := []struct {
name string
r rune
}{
// Unicode 15.1 (2023)
{"CJK Ext I start", 0x2EBF0},
{"CJK Ext I end", 0x2EE5F},

// Unicode 16.0 (2024)
{"Todhri start", 0x105C0},
{"Todhri end", 0x105F3},
{"Garay start", 0x10D40},
{"Garay end", 0x10D8E},
{"Tulu-Tigalari start", 0x11380},
{"Tulu-Tigalari end", 0x113D5},
{"Sunuwar start", 0x11BC0},
{"Sunuwar end", 0x11BF2},
{"Egyptian Hieroglyphs Ext-A start", 0x13460},
{"Egyptian Hieroglyphs Ext-A end", 0x143FA},
{"Gurung Khema start", 0x16100},
{"Gurung Khema end", 0x16139},
{"Kirat Rai start", 0x16D40},
{"Kirat Rai end", 0x16D79},
{"Legacy Computing Supplement start", 0x1CC00},
{"Large Type Piece (used by jj)", 0x1CE1A},
{"Large Type Piece end", 0x1CE50},
{"Legacy Computing Supplement end", 0x1CEBF},
{"Ol Onal start", 0x1E5D0},
{"Ol Onal end", 0x1E5FA},

// Unicode 17.0 (2025)
{"Sidetic start", 0x10940},
{"Sidetic end", 0x1095F},
{"Sharada Supplement start", 0x11B60},
{"Sharada Supplement end", 0x11B7F},
{"Tolong Siki start", 0x11DB0},
{"Tolong Siki end", 0x11DEF},
{"Beria Erfe start", 0x16EA0},
{"Beria Erfe end", 0x16EDF},
{"Tangut Components Supplement start", 0x18D80},
{"Tangut Components Supplement end", 0x18DFF},
{"Misc Symbols Supplement start", 0x1CEC0},
{"Misc Symbols Supplement end", 0x1CEFF},
{"Tai Yo start", 0x1E6C0},
{"Tai Yo end", 0x1E6FF},
{"CJK Ext J start", 0x323B0},
{"CJK Ext J end", 0x3347F},
}

for _, tc := range cases {
assert.Assert(t, Printable(tc.r),
"expected U+%04X (%s) to be printable", tc.r, tc.name)
}
}

// Mix of ASCII (the dominant case in real input), CJK, an emoji, an
// unprintable control char, and a Unicode 16+ rune that exercises the new
// range table.
// Binary search in Printable() depends on the table being sorted by `lo`
// with no overlaps. Catch ordering mistakes that the existing membership
// tests can miss (sort.Search returns 0 for an out-of-place leading entry,
// which silently misses lookups).
func TestUnicodePost15PrintableRangesSorted(t *testing.T) {
prevHi := rune(-1)
for _, r := range unicodePost15PrintableRanges {
assert.Assert(t, r.lo > prevHi,
"range %X..%X overlaps or is out of order with previous (hi=%X)",
r.lo, r.hi, prevHi)
assert.Assert(t, r.lo <= r.hi,
"range %X..%X has lo > hi", r.lo, r.hi)
prevHi = r.hi
}
}

var benchPrintableInput = []rune{
'a', 'b', 'c', ' ', '1', '\t', '\n', // ASCII / common
'午', // CJK
'🚀', // emoji
0x07, // BEL — unprintable
0xa0, // NBSP
0x1CE1A, // Large Type Piece (Unicode 16, only printable via the new table)
}

func BenchmarkPrintable(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, r := range benchPrintableInput {
_ = Printable(r)
}
}
}