Init
im going to bed -=-
This commit is contained in:
43
lib/wcwidth/__init__.py
Normal file
43
lib/wcwidth/__init__.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""
|
||||
Wcwidth module.
|
||||
|
||||
https://github.com/jquast/wcwidth
|
||||
"""
|
||||
# re-export all functions & definitions, even private ones, from top-level
|
||||
# module path, to allow for 'from wcwidth import _private_func'. Of course,
|
||||
# user beware that any _private functions or variables not exported by __all__
|
||||
# may disappear or change signature at any future version.
|
||||
|
||||
# local
|
||||
from .wcwidth import ZERO_WIDTH # noqa
|
||||
from .wcwidth import (WIDE_EASTASIAN,
|
||||
AMBIGUOUS_EASTASIAN,
|
||||
VS16_NARROW_TO_WIDE,
|
||||
clip,
|
||||
ljust,
|
||||
rjust,
|
||||
width,
|
||||
center,
|
||||
wcwidth,
|
||||
wcswidth,
|
||||
list_versions,
|
||||
iter_sequences,
|
||||
strip_sequences,
|
||||
_wcmatch_version,
|
||||
_wcversion_value)
|
||||
from .bisearch import bisearch as _bisearch
|
||||
from .grapheme import grapheme_boundary_before # noqa
|
||||
from .grapheme import iter_graphemes, iter_graphemes_reverse
|
||||
from .textwrap import SequenceTextWrapper, wrap
|
||||
from .sgr_state import propagate_sgr
|
||||
|
||||
# The __all__ attribute defines the items exported from statement,
|
||||
# 'from wcwidth import *', but also to say, "This is the public API".
|
||||
__all__ = ('wcwidth', 'wcswidth', 'width', 'iter_sequences', 'iter_graphemes',
|
||||
'iter_graphemes_reverse', 'grapheme_boundary_before',
|
||||
'ljust', 'rjust', 'center', 'wrap', 'clip', 'strip_sequences',
|
||||
'list_versions', 'propagate_sgr')
|
||||
|
||||
# Using 'hatchling', it does not seem to provide the pyproject.toml nicety, "dynamic = ['version']"
|
||||
# like flit_core, maybe there is some better way but for now we have to duplicate it in both places
|
||||
__version__ = '0.6.0'
|
||||
BIN
lib/wcwidth/__pycache__/__init__.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/__init__.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/bisearch.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/bisearch.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/control_codes.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/control_codes.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/escape_sequences.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/escape_sequences.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/grapheme.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/grapheme.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/sgr_state.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/sgr_state.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/table_ambiguous.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/table_ambiguous.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/table_grapheme.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/table_grapheme.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/table_mc.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/table_mc.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/table_vs16.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/table_vs16.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/table_wide.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/table_wide.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/table_zero.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/table_zero.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/textwrap.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/textwrap.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/unicode_versions.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/unicode_versions.cpython-314.pyc
Normal file
Binary file not shown.
BIN
lib/wcwidth/__pycache__/wcwidth.cpython-314.pyc
Normal file
BIN
lib/wcwidth/__pycache__/wcwidth.cpython-314.pyc
Normal file
Binary file not shown.
29
lib/wcwidth/bisearch.py
Normal file
29
lib/wcwidth/bisearch.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Binary search function for Unicode interval tables."""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def bisearch(ucs: int, table: tuple[tuple[int, int], ...]) -> int:
|
||||
"""
|
||||
Binary search in interval table.
|
||||
|
||||
:param ucs: Ordinal value of unicode character.
|
||||
:param table: Tuple of starting and ending ranges of ordinal values,
|
||||
in form of ``((start, end), ...)``.
|
||||
:returns: 1 if ordinal value ucs is found within lookup table, else 0.
|
||||
"""
|
||||
lbound = 0
|
||||
ubound = len(table) - 1
|
||||
|
||||
if ucs < table[0][0] or ucs > table[ubound][1]:
|
||||
return 0
|
||||
|
||||
while ubound >= lbound:
|
||||
mid = (lbound + ubound) // 2
|
||||
if ucs > table[mid][1]:
|
||||
lbound = mid + 1
|
||||
elif ucs < table[mid][0]:
|
||||
ubound = mid - 1
|
||||
else:
|
||||
return 1
|
||||
|
||||
return 0
|
||||
46
lib/wcwidth/control_codes.py
Normal file
46
lib/wcwidth/control_codes.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""
|
||||
Control character sets for terminal handling.
|
||||
|
||||
This module provides the control character sets used by the width() function to handle terminal
|
||||
control characters.
|
||||
"""
|
||||
|
||||
# Illegal C0/C1 control characters.
|
||||
# These raise ValueError in 'strict' mode.
|
||||
ILLEGAL_CTRL = frozenset(
|
||||
chr(c) for c in (
|
||||
list(range(0x01, 0x07)) + # SOH, STX, ETX (^C), EOT (^D), ENQ, ACK
|
||||
list(range(0x10, 0x1b)) + # DLE through SUB (^Z)
|
||||
list(range(0x1c, 0x20)) + # FS, GS, RS, US
|
||||
[0x7f] + # DEL
|
||||
list(range(0x80, 0xa0)) # C1 control characters
|
||||
)
|
||||
)
|
||||
|
||||
# Vertical movement control characters.
|
||||
# These raise ValueError in 'strict' mode (indeterminate horizontal position).
|
||||
VERTICAL_CTRL = frozenset({
|
||||
'\x0a', # LF (line feed)
|
||||
'\x0b', # VT (vertical tab)
|
||||
'\x0c', # FF (form feed)
|
||||
})
|
||||
|
||||
# Horizontal movement control characters.
|
||||
# These affect cursor position and are tracked in 'strict' and 'parse' modes.
|
||||
HORIZONTAL_CTRL = frozenset({
|
||||
'\x08', # BS (backspace) - cursor left 1
|
||||
'\x09', # HT (horizontal tab) - advance to next tab stop
|
||||
'\x0d', # CR (carriage return) - cursor to column 0
|
||||
})
|
||||
|
||||
# Terminal-valid zero-width control characters.
|
||||
# These are allowed in all modes (zero-width, no movement).
|
||||
ZERO_WIDTH_CTRL = frozenset({
|
||||
'\x00', # NUL
|
||||
'\x07', # BEL (bell)
|
||||
'\x0e', # SO (shift out)
|
||||
'\x0f', # SI (shift in)
|
||||
})
|
||||
|
||||
# All control characters that need special handling (not regular printable).
|
||||
ALL_CTRL = ILLEGAL_CTRL | VERTICAL_CTRL | HORIZONTAL_CTRL | ZERO_WIDTH_CTRL | {'\x1b'}
|
||||
69
lib/wcwidth/escape_sequences.py
Normal file
69
lib/wcwidth/escape_sequences.py
Normal file
@@ -0,0 +1,69 @@
|
||||
r"""
|
||||
Terminal escape sequence patterns.
|
||||
|
||||
This module provides regex patterns for matching terminal escape sequences. All patterns match
|
||||
sequences that begin with ESC (``\x1b``). Before calling re.match with these patterns, callers
|
||||
should first check that the character at the current position is ESC for optimal performance.
|
||||
"""
|
||||
# std imports
|
||||
import re
|
||||
|
||||
# Zero-width escape sequences (SGR, OSC, CSI, etc.). This table, like INDETERMINATE_EFFECT_SEQUENCE,
|
||||
# originated from the 'blessed' library.
|
||||
ZERO_WIDTH_PATTERN = re.compile(
|
||||
# CSI sequences
|
||||
r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]|'
|
||||
# OSC sequences
|
||||
r'\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|'
|
||||
# APC sequences
|
||||
r'\x1b_[^\x1b\x07]*(?:\x07|\x1b\\)|'
|
||||
# DCS sequences
|
||||
r'\x1bP[^\x1b\x07]*(?:\x07|\x1b\\)|'
|
||||
# PM sequences
|
||||
r'\x1b\^[^\x1b\x07]*(?:\x07|\x1b\\)|'
|
||||
# Character set designation
|
||||
r'\x1b[()].|'
|
||||
# Fe sequences
|
||||
r'\x1b[\x40-\x5f]|'
|
||||
# Fp sequences
|
||||
r'\x1b[78=>g]'
|
||||
)
|
||||
|
||||
# Cursor right movement: CSI [n] C, parameter may be parsed by width()
|
||||
CURSOR_RIGHT_SEQUENCE = re.compile(r'\x1b\[(\d*)C')
|
||||
|
||||
# Cursor left movement: CSI [n] D, parameter may be parsed by width()
|
||||
CURSOR_LEFT_SEQUENCE = re.compile(r'\x1b\[(\d*)D')
|
||||
|
||||
# Indeterminate effect sequences - raise ValueError in 'strict' mode. The effects of these sequences
|
||||
# are likely to be undesirable, moving the cursor vertically or to any unknown position, and
|
||||
# otherwise not managed by the 'width' method of this library.
|
||||
#
|
||||
# This table was created initially with code generation by extraction of termcap library with
|
||||
# techniques used at 'blessed' library runtime for 'xterm', 'alacritty', 'kitty', ghostty',
|
||||
# 'screen', 'tmux', and others. Then, these common capabilities were merged into the list below.
|
||||
INDETERMINATE_EFFECT_SEQUENCE = re.compile(
|
||||
'|'.join(f'(?:{_pattern})' for _pattern in (
|
||||
r'\x1b\[\d+;\d+r', # change_scroll_region
|
||||
r'\x1b\[\d*K', # erase_in_line (clr_eol, clr_bol)
|
||||
r'\x1b\[\d*J', # erase_in_display (clr_eos, erase_display)
|
||||
r'\x1b\[\d*G', # column_address
|
||||
r'\x1b\[\d+;\d+H', # cursor_address
|
||||
r'\x1b\[\d*H', # cursor_home
|
||||
r'\x1b\[\d*A', # cursor_up
|
||||
r'\x1b\[\d*B', # cursor_down
|
||||
r'\x1b\[\d*P', # delete_character
|
||||
r'\x1b\[\d*M', # delete_line
|
||||
r'\x1b\[\d*L', # insert_line
|
||||
r'\x1b\[\d*@', # insert_character
|
||||
r'\x1b\[\d+X', # erase_chars
|
||||
r'\x1b\[\d*S', # scroll_up (parm_index)
|
||||
r'\x1b\[\d*T', # scroll_down (parm_rindex)
|
||||
r'\x1b\[\d*d', # row_address
|
||||
r'\x1b\[\?1049[hl]', # alternate screen buffer
|
||||
r'\x1b\[\?47[hl]', # alternate screen (legacy)
|
||||
r'\x1b8', # restore_cursor
|
||||
r'\x1bD', # scroll_forward (index)
|
||||
r'\x1bM', # scroll_reverse (reverse index)
|
||||
))
|
||||
)
|
||||
428
lib/wcwidth/grapheme.py
Normal file
428
lib/wcwidth/grapheme.py
Normal file
@@ -0,0 +1,428 @@
|
||||
"""
|
||||
Grapheme cluster segmentation following Unicode Standard Annex #29.
|
||||
|
||||
This module provides pure-Python implementation of the grapheme cluster boundary algorithm as
|
||||
defined in UAX #29: Unicode Text Segmentation.
|
||||
|
||||
https://www.unicode.org/reports/tr29/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# std imports
|
||||
from enum import IntEnum
|
||||
from functools import lru_cache
|
||||
|
||||
from typing import TYPE_CHECKING, NamedTuple
|
||||
|
||||
# local
|
||||
from .bisearch import bisearch as _bisearch
|
||||
from .table_grapheme import (GRAPHEME_L,
|
||||
GRAPHEME_T,
|
||||
GRAPHEME_V,
|
||||
GRAPHEME_LV,
|
||||
INCB_EXTEND,
|
||||
INCB_LINKER,
|
||||
GRAPHEME_LVT,
|
||||
INCB_CONSONANT,
|
||||
GRAPHEME_EXTEND,
|
||||
GRAPHEME_CONTROL,
|
||||
GRAPHEME_PREPEND,
|
||||
GRAPHEME_SPACINGMARK,
|
||||
EXTENDED_PICTOGRAPHIC,
|
||||
GRAPHEME_REGIONAL_INDICATOR)
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
# std imports
|
||||
from collections.abc import Iterator
|
||||
|
||||
# Maximum backward scan distance when finding grapheme cluster boundaries.
|
||||
# Covers all known Unicode grapheme clusters with margin; longer sequences are pathological.
|
||||
MAX_GRAPHEME_SCAN = 32
|
||||
|
||||
|
||||
class GCB(IntEnum):
|
||||
"""Grapheme Cluster Break property values."""
|
||||
|
||||
OTHER = 0
|
||||
CR = 1
|
||||
LF = 2
|
||||
CONTROL = 3
|
||||
EXTEND = 4
|
||||
ZWJ = 5
|
||||
REGIONAL_INDICATOR = 6
|
||||
PREPEND = 7
|
||||
SPACING_MARK = 8
|
||||
L = 9
|
||||
V = 10
|
||||
T = 11
|
||||
LV = 12
|
||||
LVT = 13
|
||||
|
||||
|
||||
# All lru_cache sizes in this file use maxsize=1024, chosen by benchmarking UDHR data (500+
|
||||
# languages) and considering typical process-long sessions: western scripts need ~64 unique
|
||||
# codepoints, but CJK could reach ~2000 -- but likely not.
|
||||
@lru_cache(maxsize=1024)
|
||||
def _grapheme_cluster_break(ucs: int) -> GCB:
|
||||
# pylint: disable=too-many-branches,too-complex
|
||||
"""Return the Grapheme_Cluster_Break property for a codepoint."""
|
||||
# Single codepoint matches
|
||||
if ucs == 0x000d:
|
||||
return GCB.CR
|
||||
if ucs == 0x000a:
|
||||
return GCB.LF
|
||||
if ucs == 0x200d:
|
||||
return GCB.ZWJ
|
||||
# Matching by codepoint ranges, requiring binary search
|
||||
if _bisearch(ucs, GRAPHEME_CONTROL):
|
||||
return GCB.CONTROL
|
||||
if _bisearch(ucs, GRAPHEME_EXTEND):
|
||||
return GCB.EXTEND
|
||||
if _bisearch(ucs, GRAPHEME_REGIONAL_INDICATOR):
|
||||
return GCB.REGIONAL_INDICATOR
|
||||
if _bisearch(ucs, GRAPHEME_PREPEND):
|
||||
return GCB.PREPEND
|
||||
if _bisearch(ucs, GRAPHEME_SPACINGMARK):
|
||||
return GCB.SPACING_MARK
|
||||
if _bisearch(ucs, GRAPHEME_L):
|
||||
return GCB.L
|
||||
if _bisearch(ucs, GRAPHEME_V):
|
||||
return GCB.V
|
||||
if _bisearch(ucs, GRAPHEME_T):
|
||||
return GCB.T
|
||||
if _bisearch(ucs, GRAPHEME_LV):
|
||||
return GCB.LV
|
||||
if _bisearch(ucs, GRAPHEME_LVT):
|
||||
return GCB.LVT
|
||||
return GCB.OTHER
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def _is_extended_pictographic(ucs: int) -> bool:
|
||||
"""Check if codepoint has Extended_Pictographic property."""
|
||||
return bool(_bisearch(ucs, EXTENDED_PICTOGRAPHIC))
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def _is_incb_linker(ucs: int) -> bool:
|
||||
"""Check if codepoint has InCB=Linker property."""
|
||||
return bool(_bisearch(ucs, INCB_LINKER))
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def _is_incb_consonant(ucs: int) -> bool:
|
||||
"""Check if codepoint has InCB=Consonant property."""
|
||||
return bool(_bisearch(ucs, INCB_CONSONANT))
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def _is_incb_extend(ucs: int) -> bool:
|
||||
"""Check if codepoint has InCB=Extend property."""
|
||||
return bool(_bisearch(ucs, INCB_EXTEND))
|
||||
|
||||
|
||||
class BreakResult(NamedTuple):
|
||||
"""Result of grapheme cluster break decision."""
|
||||
|
||||
should_break: bool
|
||||
ri_count: int
|
||||
|
||||
|
||||
@lru_cache(maxsize=1024)
|
||||
def _simple_break_check(prev_gcb: GCB, curr_gcb: GCB) -> BreakResult | None:
|
||||
"""
|
||||
Check simple GCB-pair-based break rules (cacheable).
|
||||
|
||||
Returns BreakResult for rules that can be determined from GCB properties alone, or None if
|
||||
complex lookback rules (GB9c, GB11) need to be checked.
|
||||
"""
|
||||
# GB3: CR x LF
|
||||
if prev_gcb == GCB.CR and curr_gcb == GCB.LF:
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB4: (Control|CR|LF) ÷
|
||||
if prev_gcb in (GCB.CONTROL, GCB.CR, GCB.LF):
|
||||
return BreakResult(should_break=True, ri_count=0)
|
||||
|
||||
# GB5: ÷ (Control|CR|LF)
|
||||
if curr_gcb in (GCB.CONTROL, GCB.CR, GCB.LF):
|
||||
return BreakResult(should_break=True, ri_count=0)
|
||||
|
||||
# GB6: L x (L|V|LV|LVT)
|
||||
if prev_gcb == GCB.L and curr_gcb in (GCB.L, GCB.V, GCB.LV, GCB.LVT):
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB7: (LV|V) x (V|T)
|
||||
if prev_gcb in (GCB.LV, GCB.V) and curr_gcb in (GCB.V, GCB.T):
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB8: (LVT|T) x T
|
||||
if prev_gcb in (GCB.LVT, GCB.T) and curr_gcb == GCB.T:
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB9: x (Extend|ZWJ) - but ZWJ needs GB11 check, so only handle Extend here
|
||||
if curr_gcb == GCB.EXTEND:
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB9a: x SpacingMark
|
||||
if curr_gcb == GCB.SPACING_MARK:
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB9b: Prepend x
|
||||
if prev_gcb == GCB.PREPEND:
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB9c and GB11 need lookback - return None to signal complex check needed
|
||||
# GB12/13 (RI pairs) need ri_count state - also handled in main function
|
||||
return None
|
||||
|
||||
|
||||
def _should_break(
|
||||
prev_gcb: GCB,
|
||||
curr_gcb: GCB,
|
||||
text: str,
|
||||
curr_idx: int,
|
||||
ri_count: int,
|
||||
) -> BreakResult:
|
||||
# pylint: disable=too-many-branches,too-complex
|
||||
"""
|
||||
Determine if there should be a grapheme cluster break between prev and curr.
|
||||
|
||||
Implements UAX #29 grapheme cluster boundary rules.
|
||||
"""
|
||||
# Try cached simple rules first
|
||||
result = _simple_break_check(prev_gcb, curr_gcb)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# GB9: x ZWJ (not cached because GB11 needs lookback when prev is ZWJ)
|
||||
if curr_gcb == GCB.ZWJ:
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
|
||||
# GB9c: Indic conjunct cluster
|
||||
# \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker}
|
||||
# [\p{InCB=Extend}\p{InCB=Linker}]* x \p{InCB=Consonant}
|
||||
curr_ucs = ord(text[curr_idx])
|
||||
if _is_incb_consonant(curr_ucs):
|
||||
has_linker = False
|
||||
i = curr_idx - 1
|
||||
while i >= 0:
|
||||
prev_ucs = ord(text[i])
|
||||
if _is_incb_linker(prev_ucs):
|
||||
has_linker = True
|
||||
i -= 1
|
||||
elif _is_incb_extend(prev_ucs):
|
||||
i -= 1
|
||||
elif _is_incb_consonant(prev_ucs):
|
||||
if has_linker:
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
# GB11: ExtPict Extend* ZWJ x ExtPict
|
||||
if prev_gcb == GCB.ZWJ and _is_extended_pictographic(curr_ucs):
|
||||
i = curr_idx - 2 # Skip the ZWJ at curr_idx - 1
|
||||
while i >= 0:
|
||||
prev_ucs = ord(text[i])
|
||||
prev_prop = _grapheme_cluster_break(prev_ucs)
|
||||
if prev_prop == GCB.EXTEND:
|
||||
i -= 1
|
||||
elif _is_extended_pictographic(prev_ucs):
|
||||
return BreakResult(should_break=False, ri_count=0)
|
||||
else:
|
||||
break
|
||||
|
||||
# GB12/GB13: RI x RI (pair matching)
|
||||
if prev_gcb == GCB.REGIONAL_INDICATOR and curr_gcb == GCB.REGIONAL_INDICATOR:
|
||||
if ri_count % 2 == 1:
|
||||
return BreakResult(should_break=False, ri_count=ri_count + 1)
|
||||
return BreakResult(should_break=True, ri_count=1)
|
||||
|
||||
# GB999: Any ÷ Any
|
||||
ri_count = 1 if curr_gcb == GCB.REGIONAL_INDICATOR else 0
|
||||
return BreakResult(should_break=True, ri_count=ri_count)
|
||||
|
||||
|
||||
def iter_graphemes(
|
||||
unistr: str,
|
||||
start: int = 0,
|
||||
end: int | None = None,
|
||||
) -> Iterator[str]:
|
||||
r"""
|
||||
Iterate over grapheme clusters in a Unicode string.
|
||||
|
||||
Grapheme clusters are "user-perceived characters" - what a user would
|
||||
consider a single character, which may consist of multiple Unicode
|
||||
codepoints (e.g., a base character with combining marks, emoji sequences).
|
||||
|
||||
:param unistr: The Unicode string to segment.
|
||||
:param start: Starting index (default 0).
|
||||
:param end: Ending index (default len(unistr)).
|
||||
:yields: Grapheme cluster substrings.
|
||||
|
||||
Example::
|
||||
|
||||
>>> list(iter_graphemes('cafe\u0301'))
|
||||
['c', 'a', 'f', 'e\u0301']
|
||||
>>> list(iter_graphemes('\U0001F468\u200D\U0001F469\u200D\U0001F467'))
|
||||
['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467']
|
||||
>>> list(iter_graphemes('\U0001F1FA\U0001F1F8'))
|
||||
['o', 'k', '\U0001F1FA\U0001F1F8']
|
||||
|
||||
.. versionadded:: 0.3.0
|
||||
"""
|
||||
if not unistr:
|
||||
return
|
||||
|
||||
length = len(unistr)
|
||||
|
||||
if end is None:
|
||||
end = length
|
||||
|
||||
if start >= end or start >= length:
|
||||
return
|
||||
|
||||
end = min(end, length)
|
||||
|
||||
# Track state for grapheme cluster boundaries
|
||||
cluster_start = start
|
||||
ri_count = 0
|
||||
|
||||
# Get GCB for first character
|
||||
prev_gcb = _grapheme_cluster_break(ord(unistr[start]))
|
||||
|
||||
# Handle Regional Indicator count initialization
|
||||
if prev_gcb == GCB.REGIONAL_INDICATOR:
|
||||
ri_count = 1
|
||||
|
||||
for idx in range(start + 1, end):
|
||||
curr_gcb = _grapheme_cluster_break(ord(unistr[idx]))
|
||||
|
||||
result = _should_break(prev_gcb, curr_gcb, unistr, idx, ri_count)
|
||||
ri_count = result.ri_count
|
||||
|
||||
if result.should_break:
|
||||
yield unistr[cluster_start:idx]
|
||||
cluster_start = idx
|
||||
|
||||
prev_gcb = curr_gcb
|
||||
|
||||
# Yield the final cluster
|
||||
yield unistr[cluster_start:end]
|
||||
|
||||
|
||||
def _find_cluster_start(text: str, pos: int) -> int:
|
||||
"""
|
||||
Find the start of the grapheme cluster containing the character before pos.
|
||||
|
||||
Scans backwards from pos to find a safe starting point, then iterates forward using standard
|
||||
break rules to find the actual cluster boundary.
|
||||
|
||||
:param text: The Unicode string.
|
||||
:param pos: Position to search before (exclusive).
|
||||
:returns: Start position of the grapheme cluster.
|
||||
"""
|
||||
target_cp = ord(text[pos - 1])
|
||||
|
||||
# GB3: CR x LF - LF after CR is part of same cluster
|
||||
if target_cp == 0x0A and pos >= 2 and text[pos - 2] == '\r':
|
||||
return pos - 2
|
||||
|
||||
# Fast path: ASCII (except LF) starts its own cluster
|
||||
if target_cp < 0x80:
|
||||
# GB9b: Check for preceding PREPEND (rare: Arabic/Brahmic)
|
||||
if pos >= 2 and target_cp >= 0x20:
|
||||
prev_cp = ord(text[pos - 2])
|
||||
if prev_cp >= 0x80 and _grapheme_cluster_break(prev_cp) == GCB.PREPEND:
|
||||
return _find_cluster_start(text, pos - 1)
|
||||
return pos - 1
|
||||
|
||||
# Scan backward to find a safe starting point
|
||||
safe_start = pos - 1
|
||||
while safe_start > 0 and (pos - safe_start) < MAX_GRAPHEME_SCAN:
|
||||
cp = ord(text[safe_start])
|
||||
if 0x20 <= cp < 0x80: # ASCII always starts a cluster
|
||||
break
|
||||
if _grapheme_cluster_break(cp) == GCB.CONTROL: # GB4
|
||||
break
|
||||
safe_start -= 1
|
||||
|
||||
# Verify forward to find the actual cluster boundary
|
||||
cluster_start = safe_start
|
||||
left_gcb = _grapheme_cluster_break(ord(text[safe_start]))
|
||||
ri_count = 1 if left_gcb == GCB.REGIONAL_INDICATOR else 0
|
||||
|
||||
for i in range(safe_start + 1, pos):
|
||||
right_gcb = _grapheme_cluster_break(ord(text[i]))
|
||||
result = _should_break(left_gcb, right_gcb, text, i, ri_count)
|
||||
ri_count = result.ri_count
|
||||
if result.should_break:
|
||||
cluster_start = i
|
||||
left_gcb = right_gcb
|
||||
|
||||
return cluster_start
|
||||
|
||||
|
||||
def grapheme_boundary_before(unistr: str, pos: int) -> int:
|
||||
r"""
|
||||
Find the grapheme cluster boundary immediately before a position.
|
||||
|
||||
:param unistr: The Unicode string to search.
|
||||
:param pos: Position in the string (0 < pos <= len(unistr)).
|
||||
:returns: Start index of the grapheme cluster containing the character at pos-1.
|
||||
|
||||
Example::
|
||||
|
||||
>>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8)
|
||||
6
|
||||
>>> grapheme_boundary_before('a\r\nb', 3)
|
||||
1
|
||||
|
||||
.. versionadded:: 0.3.6
|
||||
"""
|
||||
if pos <= 0:
|
||||
return 0
|
||||
return _find_cluster_start(unistr, min(pos, len(unistr)))
|
||||
|
||||
|
||||
def iter_graphemes_reverse(
|
||||
unistr: str,
|
||||
start: int = 0,
|
||||
end: int | None = None,
|
||||
) -> Iterator[str]:
|
||||
r"""
|
||||
Iterate over grapheme clusters in reverse order (last to first).
|
||||
|
||||
:param unistr: The Unicode string to segment.
|
||||
:param start: Starting index (default 0).
|
||||
:param end: Ending index (default len(unistr)).
|
||||
:yields: Grapheme cluster substrings in reverse order.
|
||||
|
||||
Example::
|
||||
|
||||
>>> list(iter_graphemes_reverse('cafe\u0301'))
|
||||
['e\u0301', 'f', 'a', 'c']
|
||||
|
||||
.. versionadded:: 0.3.6
|
||||
"""
|
||||
if not unistr:
|
||||
return
|
||||
|
||||
length = len(unistr)
|
||||
|
||||
end = length if end is None else min(end, length)
|
||||
start = max(start, 0)
|
||||
|
||||
if start >= end or start >= length:
|
||||
return
|
||||
|
||||
pos = end
|
||||
while pos > start:
|
||||
cluster_start = _find_cluster_start(unistr, pos)
|
||||
# Don't yield partial graphemes that extend before start
|
||||
if cluster_start < start:
|
||||
break
|
||||
yield unistr[cluster_start:pos]
|
||||
pos = cluster_start
|
||||
0
lib/wcwidth/py.typed
Normal file
0
lib/wcwidth/py.typed
Normal file
338
lib/wcwidth/sgr_state.py
Normal file
338
lib/wcwidth/sgr_state.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
SGR (Select Graphic Rendition) state tracking for terminal escape sequences.
|
||||
|
||||
This module provides functions for tracking and propagating terminal styling (bold, italic, colors,
|
||||
etc.) via public API propagate_sgr(), and its dependent functions, cut() and wrap(). It only has
|
||||
attributes necessary to perform its functions, eg 'RED' and 'BLUE' attributes are not defined.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
# std imports
|
||||
import re
|
||||
from enum import IntEnum
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator, NamedTuple
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from typing import Sequence
|
||||
|
||||
|
||||
class _SGR(IntEnum):
|
||||
"""
|
||||
SGR (Select Graphic Rendition) parameter codes.
|
||||
|
||||
References:
|
||||
- https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
|
||||
- https://github.com/tehmaze/ansi/tree/master/ansi/colour
|
||||
"""
|
||||
|
||||
RESET = 0
|
||||
BOLD = 1
|
||||
DIM = 2
|
||||
ITALIC = 3
|
||||
UNDERLINE = 4
|
||||
BLINK = 5
|
||||
RAPID_BLINK = 6
|
||||
INVERSE = 7
|
||||
HIDDEN = 8
|
||||
STRIKETHROUGH = 9
|
||||
DOUBLE_UNDERLINE = 21
|
||||
BOLD_DIM_OFF = 22
|
||||
ITALIC_OFF = 23
|
||||
UNDERLINE_OFF = 24
|
||||
BLINK_OFF = 25
|
||||
INVERSE_OFF = 27
|
||||
HIDDEN_OFF = 28
|
||||
STRIKETHROUGH_OFF = 29
|
||||
FG_BLACK = 30
|
||||
FG_WHITE = 37
|
||||
FG_EXTENDED = 38
|
||||
FG_DEFAULT = 39
|
||||
BG_BLACK = 40
|
||||
BG_WHITE = 47
|
||||
BG_EXTENDED = 48
|
||||
BG_DEFAULT = 49
|
||||
FG_BRIGHT_BLACK = 90
|
||||
FG_BRIGHT_WHITE = 97
|
||||
BG_BRIGHT_BLACK = 100
|
||||
BG_BRIGHT_WHITE = 107
|
||||
|
||||
|
||||
# SGR sequence pattern: CSI followed by params (digits, semicolons, colons) ending with 'm'
|
||||
# Colons are used in ITU T.416 (ISO 8613-6) extended color format: 38:2::R:G:B
|
||||
# This colon format is less common than semicolon (38;2;R;G;B) but supported by kitty,
|
||||
# iTerm2, and newer VTE-based terminals.
|
||||
_SGR_PATTERN = re.compile(r'\x1b\[([\d;:]*)m')
|
||||
|
||||
# Fast path: quick check if any SGR sequence exists
|
||||
_SGR_QUICK_CHECK = re.compile(r'\x1b\[[\d;:]*m')
|
||||
|
||||
# Reset sequence
|
||||
_SGR_RESET = '\x1b[0m'
|
||||
|
||||
|
||||
class _SGRState(NamedTuple):
|
||||
"""
|
||||
Track active SGR terminal attributes by category (immutable).
|
||||
|
||||
:param bold: Bold attribute (SGR 1).
|
||||
:param dim: Dim/faint attribute (SGR 2).
|
||||
:param italic: Italic attribute (SGR 3).
|
||||
:param underline: Underline attribute (SGR 4).
|
||||
:param blink: Slow blink attribute (SGR 5).
|
||||
:param rapid_blink: Rapid blink attribute (SGR 6).
|
||||
:param inverse: Inverse/reverse attribute (SGR 7).
|
||||
:param hidden: Hidden/invisible attribute (SGR 8).
|
||||
:param strikethrough: Strikethrough attribute (SGR 9).
|
||||
:param double_underline: Double underline attribute (SGR 21).
|
||||
:param foreground: Foreground color as tuple of SGR params, or None for default.
|
||||
:param background: Background color as tuple of SGR params, or None for default.
|
||||
"""
|
||||
|
||||
bold: bool = False
|
||||
dim: bool = False
|
||||
italic: bool = False
|
||||
underline: bool = False
|
||||
blink: bool = False
|
||||
rapid_blink: bool = False
|
||||
inverse: bool = False
|
||||
hidden: bool = False
|
||||
strikethrough: bool = False
|
||||
double_underline: bool = False
|
||||
foreground: tuple[int, ...] | None = None
|
||||
background: tuple[int, ...] | None = None
|
||||
|
||||
|
||||
# Default state with no attributes set
|
||||
_SGR_STATE_DEFAULT = _SGRState()
|
||||
|
||||
|
||||
def _sgr_state_is_active(state: _SGRState) -> bool:
|
||||
"""
|
||||
Return True if any attributes are set.
|
||||
|
||||
:param state: The SGR state to check.
|
||||
:returns: True if any attribute differs from default.
|
||||
"""
|
||||
return (state.bold or state.dim or state.italic or state.underline
|
||||
or state.blink or state.rapid_blink or state.inverse or state.hidden
|
||||
or state.strikethrough or state.double_underline
|
||||
or state.foreground is not None or state.background is not None)
|
||||
|
||||
|
||||
def _sgr_state_to_sequence(state: _SGRState) -> str:
|
||||
"""
|
||||
Generate minimal SGR sequence to restore this state from reset.
|
||||
|
||||
:param state: The SGR state to convert.
|
||||
:returns: SGR escape sequence string, or empty string if no attributes set.
|
||||
"""
|
||||
if not _sgr_state_is_active(state):
|
||||
return ''
|
||||
|
||||
# Map boolean attributes to their SGR codes
|
||||
bool_attrs = [
|
||||
(state.bold, '1'), (state.dim, '2'), (state.italic, '3'),
|
||||
(state.underline, '4'), (state.blink, '5'), (state.rapid_blink, '6'),
|
||||
(state.inverse, '7'), (state.hidden, '8'), (state.strikethrough, '9'),
|
||||
(state.double_underline, '21'),
|
||||
]
|
||||
params = [code for active, code in bool_attrs if active]
|
||||
|
||||
# Add color params (already formatted as tuples)
|
||||
if state.foreground is not None:
|
||||
params.append(';'.join(str(p) for p in state.foreground))
|
||||
if state.background is not None:
|
||||
params.append(';'.join(str(p) for p in state.background))
|
||||
|
||||
return f'\x1b[{";".join(params)}m'
|
||||
|
||||
|
||||
def _parse_sgr_params(sequence: str) -> list[int | tuple[int, ...]]:
|
||||
r"""
|
||||
Parse SGR sequence and return list of parameter values.
|
||||
|
||||
Handles compound sequences like ``\x1b[1;31;4m`` -> [1, 31, 4].
|
||||
Empty params (e.g., ``\x1b[m``) are treated as [0] (reset).
|
||||
Colon-separated extended colors like ``\x1b[38:2::255:0:0m`` are returned
|
||||
as tuples: [(38, 2, 255, 0, 0)].
|
||||
|
||||
:param sequence: SGR escape sequence string.
|
||||
:returns: List of integer parameters or tuples for colon-separated colors.
|
||||
"""
|
||||
match = _SGR_PATTERN.match(sequence)
|
||||
if not match:
|
||||
return []
|
||||
params_str = match.group(1)
|
||||
if not params_str:
|
||||
return [0] # \x1b[m is equivalent to \x1b[0m
|
||||
result: list[int | tuple[int, ...]] = []
|
||||
for param in params_str.split(';'):
|
||||
if ':' in param:
|
||||
# Colon-separated extended color (ITU T.416 format)
|
||||
# e.g., "38:2::255:0:0" or "38:2:1:255:0:0" (with colorspace)
|
||||
parts = [int(p) if p else 0 for p in param.split(':')]
|
||||
result.append(tuple(parts))
|
||||
else:
|
||||
result.append(int(param) if param else 0)
|
||||
return result
|
||||
|
||||
|
||||
def _parse_extended_color(
|
||||
params: Iterator[int | tuple[int, ...]], base: int
|
||||
) -> tuple[int, ...] | None:
|
||||
"""
|
||||
Parse extended color (256-color or RGB) from parameter iterator.
|
||||
|
||||
:param params: Iterator of remaining SGR parameters (semicolon-separated format).
|
||||
:param base: Base code (38 for foreground, 48 for background).
|
||||
:returns: Color tuple like (38, 5, N) or (38, 2, R, G, B), or None if malformed.
|
||||
"""
|
||||
try:
|
||||
mode = next(params)
|
||||
if isinstance(mode, tuple):
|
||||
return None # Unexpected tuple, colon format handled separately
|
||||
if mode == 5: # 256-color
|
||||
n = next(params)
|
||||
if isinstance(n, tuple):
|
||||
return None
|
||||
return (int(base), 5, n)
|
||||
if mode == 2: # RGB
|
||||
r, g, b = next(params), next(params), next(params)
|
||||
if isinstance(r, tuple) or isinstance(g, tuple) or isinstance(b, tuple):
|
||||
return None
|
||||
return (int(base), 2, r, g, b)
|
||||
except StopIteration:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _sgr_state_update(state: _SGRState, sequence: str) -> _SGRState:
|
||||
# pylint: disable=too-many-branches,too-complex,too-many-statements
|
||||
# NOTE: When minimum Python version is 3.10+, this can be simplified using match/case.
|
||||
"""
|
||||
Parse SGR sequence and return new state with updates applied.
|
||||
|
||||
:param state: Current SGR state.
|
||||
:param sequence: SGR escape sequence string.
|
||||
:returns: New SGRState with updates applied.
|
||||
"""
|
||||
params_list = _parse_sgr_params(sequence)
|
||||
params = iter(params_list)
|
||||
for p in params:
|
||||
# Handle colon-separated extended colors (ITU T.416 format)
|
||||
if isinstance(p, tuple):
|
||||
if len(p) >= 2 and p[0] == _SGR.FG_EXTENDED:
|
||||
# Foreground: (38, 2, [colorspace,] R, G, B) or (38, 5, N)
|
||||
state = state._replace(foreground=p)
|
||||
elif len(p) >= 2 and p[0] == _SGR.BG_EXTENDED:
|
||||
# Background: (48, 2, [colorspace,] R, G, B) or (48, 5, N)
|
||||
state = state._replace(background=p)
|
||||
continue
|
||||
if p == _SGR.RESET:
|
||||
state = _SGR_STATE_DEFAULT
|
||||
# Attribute ON codes
|
||||
elif p == _SGR.BOLD:
|
||||
state = state._replace(bold=True)
|
||||
elif p == _SGR.DIM:
|
||||
state = state._replace(dim=True)
|
||||
elif p == _SGR.ITALIC:
|
||||
state = state._replace(italic=True)
|
||||
elif p == _SGR.UNDERLINE:
|
||||
state = state._replace(underline=True)
|
||||
elif p == _SGR.BLINK:
|
||||
state = state._replace(blink=True)
|
||||
elif p == _SGR.RAPID_BLINK:
|
||||
state = state._replace(rapid_blink=True)
|
||||
elif p == _SGR.INVERSE:
|
||||
state = state._replace(inverse=True)
|
||||
elif p == _SGR.HIDDEN:
|
||||
state = state._replace(hidden=True)
|
||||
elif p == _SGR.STRIKETHROUGH:
|
||||
state = state._replace(strikethrough=True)
|
||||
elif p == _SGR.DOUBLE_UNDERLINE:
|
||||
state = state._replace(double_underline=True)
|
||||
# Attribute OFF codes
|
||||
elif p == _SGR.BOLD_DIM_OFF:
|
||||
state = state._replace(bold=False, dim=False)
|
||||
elif p == _SGR.ITALIC_OFF:
|
||||
state = state._replace(italic=False)
|
||||
elif p == _SGR.UNDERLINE_OFF:
|
||||
state = state._replace(underline=False, double_underline=False)
|
||||
elif p == _SGR.BLINK_OFF:
|
||||
state = state._replace(blink=False, rapid_blink=False)
|
||||
elif p == _SGR.INVERSE_OFF:
|
||||
state = state._replace(inverse=False)
|
||||
elif p == _SGR.HIDDEN_OFF:
|
||||
state = state._replace(hidden=False)
|
||||
elif p == _SGR.STRIKETHROUGH_OFF:
|
||||
state = state._replace(strikethrough=False)
|
||||
# Basic colors (30-37, 40-47 standard; 90-97, 100-107 bright)
|
||||
elif (_SGR.FG_BLACK <= p <= _SGR.FG_WHITE
|
||||
or _SGR.FG_BRIGHT_BLACK <= p <= _SGR.FG_BRIGHT_WHITE):
|
||||
state = state._replace(foreground=(p,))
|
||||
elif (_SGR.BG_BLACK <= p <= _SGR.BG_WHITE
|
||||
or _SGR.BG_BRIGHT_BLACK <= p <= _SGR.BG_BRIGHT_WHITE):
|
||||
state = state._replace(background=(p,))
|
||||
elif p == _SGR.FG_DEFAULT:
|
||||
state = state._replace(foreground=None)
|
||||
elif p == _SGR.BG_DEFAULT:
|
||||
state = state._replace(background=None)
|
||||
# Extended colors (semicolon-separated format)
|
||||
elif p == _SGR.FG_EXTENDED:
|
||||
if color := _parse_extended_color(params, _SGR.FG_EXTENDED):
|
||||
state = state._replace(foreground=color)
|
||||
elif p == _SGR.BG_EXTENDED:
|
||||
if color := _parse_extended_color(params, _SGR.BG_EXTENDED):
|
||||
state = state._replace(background=color)
|
||||
return state
|
||||
|
||||
|
||||
def propagate_sgr(lines: Sequence[str]) -> list[str]:
|
||||
r"""
|
||||
Propagate SGR codes across wrapped lines.
|
||||
|
||||
When text with SGR styling is wrapped across multiple lines, each line
|
||||
needs to be self-contained for proper display. This function:
|
||||
|
||||
- Ends each line with ``\x1b[0m`` if styles are active (prevents bleeding)
|
||||
- Starts each subsequent line with the active style restored
|
||||
|
||||
:param lines: List of text lines, possibly containing SGR sequences.
|
||||
:returns: List of lines with SGR codes propagated.
|
||||
|
||||
Example::
|
||||
|
||||
>>> propagate_sgr(['\x1b[31mhello', 'world\x1b[0m'])
|
||||
['\x1b[31mhello\x1b[0m', '\x1b[31mworld\x1b[0m']
|
||||
|
||||
This is useful in cases of making special editors and viewers, and is used for the
|
||||
default modes (propagate_sgr=True) of :func:`wcwidth.width` and :func:`wcwidth.clip`.
|
||||
|
||||
When wrapping and clipping text containing SGR sequences, maybe a previous line enabled the BLUE
|
||||
color--if we are viewing *only* the line following, we would want the carry over the BLUE color,
|
||||
and all lines with sequences should end with terminating reset (``\x1b[0m``).
|
||||
"""
|
||||
# Fast path: check if any line contains SGR sequences
|
||||
if not any(_SGR_QUICK_CHECK.search(line) for line in lines) or not lines:
|
||||
return list(lines)
|
||||
|
||||
result: list[str] = []
|
||||
state = _SGR_STATE_DEFAULT
|
||||
|
||||
for line in lines:
|
||||
# Prefix with restoration sequence if state is active
|
||||
prefix = _sgr_state_to_sequence(state)
|
||||
|
||||
# Update state by processing all SGR sequences in this line
|
||||
for match in _SGR_PATTERN.finditer(line):
|
||||
state = _sgr_state_update(state, match.group())
|
||||
|
||||
# Build output line
|
||||
output_line = prefix + line if prefix else line
|
||||
if _sgr_state_is_active(state):
|
||||
output_line = output_line + _SGR_RESET
|
||||
|
||||
result.append(output_line)
|
||||
|
||||
return result
|
||||
189
lib/wcwidth/table_ambiguous.py
Normal file
189
lib/wcwidth/table_ambiguous.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""
|
||||
Exports AMBIGUOUS_EASTASIAN table keyed by supporting unicode version level.
|
||||
|
||||
This code generated by wcwidth/bin/update-tables.py on 2026-01-18 23:27:15 UTC.
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
AMBIGUOUS_EASTASIAN = {
|
||||
'17.0.0': (
|
||||
# Source: EastAsianWidth-17.0.0.txt
|
||||
# Date: 2025-07-24, 00:12:54 GMT
|
||||
#
|
||||
(0x000a1, 0x000a1,), # Inverted Exclamation Mark
|
||||
(0x000a4, 0x000a4,), # Currency Sign
|
||||
(0x000a7, 0x000a8,), # Section Sign ..Diaeresis
|
||||
(0x000aa, 0x000aa,), # Feminine Ordinal Indicator
|
||||
(0x000ad, 0x000ae,), # Soft Hyphen ..Registered Sign
|
||||
(0x000b0, 0x000b4,), # Degree Sign ..Acute Accent
|
||||
(0x000b6, 0x000ba,), # Pilcrow Sign ..Masculine Ordinal Indica
|
||||
(0x000bc, 0x000bf,), # Vulgar Fraction One Quar..Inverted Question Mark
|
||||
(0x000c6, 0x000c6,), # Latin Capital Letter Ae
|
||||
(0x000d0, 0x000d0,), # Latin Capital Letter Eth
|
||||
(0x000d7, 0x000d8,), # Multiplication Sign ..Latin Capital Letter O W
|
||||
(0x000de, 0x000e1,), # Latin Capital Letter Tho..Latin Small Letter A Wit
|
||||
(0x000e6, 0x000e6,), # Latin Small Letter Ae
|
||||
(0x000e8, 0x000ea,), # Latin Small Letter E Wit..Latin Small Letter E Wit
|
||||
(0x000ec, 0x000ed,), # Latin Small Letter I Wit..Latin Small Letter I Wit
|
||||
(0x000f0, 0x000f0,), # Latin Small Letter Eth
|
||||
(0x000f2, 0x000f3,), # Latin Small Letter O Wit..Latin Small Letter O Wit
|
||||
(0x000f7, 0x000fa,), # Division Sign ..Latin Small Letter U Wit
|
||||
(0x000fc, 0x000fc,), # Latin Small Letter U With Diaeresis
|
||||
(0x000fe, 0x000fe,), # Latin Small Letter Thorn
|
||||
(0x00101, 0x00101,), # Latin Small Letter A With Macron
|
||||
(0x00111, 0x00111,), # Latin Small Letter D With Stroke
|
||||
(0x00113, 0x00113,), # Latin Small Letter E With Macron
|
||||
(0x0011b, 0x0011b,), # Latin Small Letter E With Caron
|
||||
(0x00126, 0x00127,), # Latin Capital Letter H W..Latin Small Letter H Wit
|
||||
(0x0012b, 0x0012b,), # Latin Small Letter I With Macron
|
||||
(0x00131, 0x00133,), # Latin Small Letter Dotle..Latin Small Ligature Ij
|
||||
(0x00138, 0x00138,), # Latin Small Letter Kra
|
||||
(0x0013f, 0x00142,), # Latin Capital Letter L W..Latin Small Letter L Wit
|
||||
(0x00144, 0x00144,), # Latin Small Letter N With Acute
|
||||
(0x00148, 0x0014b,), # Latin Small Letter N Wit..Latin Small Letter Eng
|
||||
(0x0014d, 0x0014d,), # Latin Small Letter O With Macron
|
||||
(0x00152, 0x00153,), # Latin Capital Ligature O..Latin Small Ligature Oe
|
||||
(0x00166, 0x00167,), # Latin Capital Letter T W..Latin Small Letter T Wit
|
||||
(0x0016b, 0x0016b,), # Latin Small Letter U With Macron
|
||||
(0x001ce, 0x001ce,), # Latin Small Letter A With Caron
|
||||
(0x001d0, 0x001d0,), # Latin Small Letter I With Caron
|
||||
(0x001d2, 0x001d2,), # Latin Small Letter O With Caron
|
||||
(0x001d4, 0x001d4,), # Latin Small Letter U With Caron
|
||||
(0x001d6, 0x001d6,), # Latin Small Letter U With Diaeresis And Macron
|
||||
(0x001d8, 0x001d8,), # Latin Small Letter U With Diaeresis And Acute
|
||||
(0x001da, 0x001da,), # Latin Small Letter U With Diaeresis And Caron
|
||||
(0x001dc, 0x001dc,), # Latin Small Letter U With Diaeresis And Grave
|
||||
(0x00251, 0x00251,), # Latin Small Letter Alpha
|
||||
(0x00261, 0x00261,), # Latin Small Letter Script G
|
||||
(0x002c4, 0x002c4,), # Modifier Letter Up Arrowhead
|
||||
(0x002c7, 0x002c7,), # Caron
|
||||
(0x002c9, 0x002cb,), # Modifier Letter Macron ..Modifier Letter Grave Ac
|
||||
(0x002cd, 0x002cd,), # Modifier Letter Low Macron
|
||||
(0x002d0, 0x002d0,), # Modifier Letter Triangular Colon
|
||||
(0x002d8, 0x002db,), # Breve ..Ogonek
|
||||
(0x002dd, 0x002dd,), # Double Acute Accent
|
||||
(0x002df, 0x002df,), # Modifier Letter Cross Accent
|
||||
(0x00391, 0x003a1,), # Greek Capital Letter Alp..Greek Capital Letter Rho
|
||||
(0x003a3, 0x003a9,), # Greek Capital Letter Sig..Greek Capital Letter Ome
|
||||
(0x003b1, 0x003c1,), # Greek Small Letter Alpha..Greek Small Letter Rho
|
||||
(0x003c3, 0x003c9,), # Greek Small Letter Sigma..Greek Small Letter Omega
|
||||
(0x00401, 0x00401,), # Cyrillic Capital Letter Io
|
||||
(0x00410, 0x0044f,), # Cyrillic Capital Letter ..Cyrillic Small Letter Ya
|
||||
(0x00451, 0x00451,), # Cyrillic Small Letter Io
|
||||
(0x02010, 0x02010,), # Hyphen
|
||||
(0x02013, 0x02016,), # En Dash ..Double Vertical Line
|
||||
(0x02018, 0x02019,), # Left Single Quotation Ma..Right Single Quotation M
|
||||
(0x0201c, 0x0201d,), # Left Double Quotation Ma..Right Double Quotation M
|
||||
(0x02020, 0x02022,), # Dagger ..Bullet
|
||||
(0x02024, 0x02027,), # One Dot Leader ..Hyphenation Point
|
||||
(0x02030, 0x02030,), # Per Mille Sign
|
||||
(0x02032, 0x02033,), # Prime ..Double Prime
|
||||
(0x02035, 0x02035,), # Reversed Prime
|
||||
(0x0203b, 0x0203b,), # Reference Mark
|
||||
(0x0203e, 0x0203e,), # Overline
|
||||
(0x02074, 0x02074,), # Superscript Four
|
||||
(0x0207f, 0x0207f,), # Superscript Latin Small Letter N
|
||||
(0x02081, 0x02084,), # Subscript One ..Subscript Four
|
||||
(0x020ac, 0x020ac,), # Euro Sign
|
||||
(0x02103, 0x02103,), # Degree Celsius
|
||||
(0x02105, 0x02105,), # Care Of
|
||||
(0x02109, 0x02109,), # Degree Fahrenheit
|
||||
(0x02113, 0x02113,), # Script Small L
|
||||
(0x02116, 0x02116,), # Numero Sign
|
||||
(0x02121, 0x02122,), # Telephone Sign ..Trade Mark Sign
|
||||
(0x02126, 0x02126,), # Ohm Sign
|
||||
(0x0212b, 0x0212b,), # Angstrom Sign
|
||||
(0x02153, 0x02154,), # Vulgar Fraction One Thir..Vulgar Fraction Two Thir
|
||||
(0x0215b, 0x0215e,), # Vulgar Fraction One Eigh..Vulgar Fraction Seven Ei
|
||||
(0x02160, 0x0216b,), # Roman Numeral One ..Roman Numeral Twelve
|
||||
(0x02170, 0x02179,), # Small Roman Numeral One ..Small Roman Numeral Ten
|
||||
(0x02189, 0x02189,), # Vulgar Fraction Zero Thirds
|
||||
(0x02190, 0x02199,), # Leftwards Arrow ..South West Arrow
|
||||
(0x021b8, 0x021b9,), # North West Arrow To Long..Leftwards Arrow To Bar O
|
||||
(0x021d2, 0x021d2,), # Rightwards Double Arrow
|
||||
(0x021d4, 0x021d4,), # Left Right Double Arrow
|
||||
(0x021e7, 0x021e7,), # Upwards White Arrow
|
||||
(0x02200, 0x02200,), # For All
|
||||
(0x02202, 0x02203,), # Partial Differential ..There Exists
|
||||
(0x02207, 0x02208,), # Nabla ..Element Of
|
||||
(0x0220b, 0x0220b,), # Contains As Member
|
||||
(0x0220f, 0x0220f,), # N-ary Product
|
||||
(0x02211, 0x02211,), # N-ary Summation
|
||||
(0x02215, 0x02215,), # Division Slash
|
||||
(0x0221a, 0x0221a,), # Square Root
|
||||
(0x0221d, 0x02220,), # Proportional To ..Angle
|
||||
(0x02223, 0x02223,), # Divides
|
||||
(0x02225, 0x02225,), # Parallel To
|
||||
(0x02227, 0x0222c,), # Logical And ..Double Integral
|
||||
(0x0222e, 0x0222e,), # Contour Integral
|
||||
(0x02234, 0x02237,), # Therefore ..Proportion
|
||||
(0x0223c, 0x0223d,), # Tilde Operator ..Reversed Tilde
|
||||
(0x02248, 0x02248,), # Almost Equal To
|
||||
(0x0224c, 0x0224c,), # All Equal To
|
||||
(0x02252, 0x02252,), # Approximately Equal To Or The Image Of
|
||||
(0x02260, 0x02261,), # Not Equal To ..Identical To
|
||||
(0x02264, 0x02267,), # Less-than Or Equal To ..Greater-than Over Equal
|
||||
(0x0226a, 0x0226b,), # Much Less-than ..Much Greater-than
|
||||
(0x0226e, 0x0226f,), # Not Less-than ..Not Greater-than
|
||||
(0x02282, 0x02283,), # Subset Of ..Superset Of
|
||||
(0x02286, 0x02287,), # Subset Of Or Equal To ..Superset Of Or Equal To
|
||||
(0x02295, 0x02295,), # Circled Plus
|
||||
(0x02299, 0x02299,), # Circled Dot Operator
|
||||
(0x022a5, 0x022a5,), # Up Tack
|
||||
(0x022bf, 0x022bf,), # Right Triangle
|
||||
(0x02312, 0x02312,), # Arc
|
||||
(0x02460, 0x024e9,), # Circled Digit One ..Circled Latin Small Lett
|
||||
(0x024eb, 0x0254b,), # Negative Circled Number ..Box Drawings Heavy Verti
|
||||
(0x02550, 0x02573,), # Box Drawings Double Hori..Box Drawings Light Diago
|
||||
(0x02580, 0x0258f,), # Upper Half Block ..Left One Eighth Block
|
||||
(0x02592, 0x02595,), # Medium Shade ..Right One Eighth Block
|
||||
(0x025a0, 0x025a1,), # Black Square ..White Square
|
||||
(0x025a3, 0x025a9,), # White Square Containing ..Square With Diagonal Cro
|
||||
(0x025b2, 0x025b3,), # Black Up-pointing Triang..White Up-pointing Triang
|
||||
(0x025b6, 0x025b7,), # Black Right-pointing Tri..White Right-pointing Tri
|
||||
(0x025bc, 0x025bd,), # Black Down-pointing Tria..White Down-pointing Tria
|
||||
(0x025c0, 0x025c1,), # Black Left-pointing Tria..White Left-pointing Tria
|
||||
(0x025c6, 0x025c8,), # Black Diamond ..White Diamond Containing
|
||||
(0x025cb, 0x025cb,), # White Circle
|
||||
(0x025ce, 0x025d1,), # Bullseye ..Circle With Right Half B
|
||||
(0x025e2, 0x025e5,), # Black Lower Right Triang..Black Upper Right Triang
|
||||
(0x025ef, 0x025ef,), # Large Circle
|
||||
(0x02605, 0x02606,), # Black Star ..White Star
|
||||
(0x02609, 0x02609,), # Sun
|
||||
(0x0260e, 0x0260f,), # Black Telephone ..White Telephone
|
||||
(0x0261c, 0x0261c,), # White Left Pointing Index
|
||||
(0x0261e, 0x0261e,), # White Right Pointing Index
|
||||
(0x02640, 0x02640,), # Female Sign
|
||||
(0x02642, 0x02642,), # Male Sign
|
||||
(0x02660, 0x02661,), # Black Spade Suit ..White Heart Suit
|
||||
(0x02663, 0x02665,), # Black Club Suit ..Black Heart Suit
|
||||
(0x02667, 0x0266a,), # White Club Suit ..Eighth Note
|
||||
(0x0266c, 0x0266d,), # Beamed Sixteenth Notes ..Music Flat Sign
|
||||
(0x0266f, 0x0266f,), # Music Sharp Sign
|
||||
(0x0269e, 0x0269f,), # Three Lines Converging R..Three Lines Converging L
|
||||
(0x026bf, 0x026bf,), # Squared Key
|
||||
(0x026c6, 0x026cd,), # Rain ..Disabled Car
|
||||
(0x026cf, 0x026d3,), # Pick ..Chains
|
||||
(0x026d5, 0x026e1,), # Alternate One-way Left W..Restricted Left Entry-2
|
||||
(0x026e3, 0x026e3,), # Heavy Circle With Stroke And Two Dots Above
|
||||
(0x026e8, 0x026e9,), # Black Cross On Shield ..Shinto Shrine
|
||||
(0x026eb, 0x026f1,), # Castle ..Umbrella On Ground
|
||||
(0x026f4, 0x026f4,), # Ferry
|
||||
(0x026f6, 0x026f9,), # Square Four Corners ..Person With Ball
|
||||
(0x026fb, 0x026fc,), # Japanese Bank Symbol ..Headstone Graveyard Symb
|
||||
(0x026fe, 0x026ff,), # Cup On Black Square ..White Flag With Horizont
|
||||
(0x0273d, 0x0273d,), # Heavy Teardrop-spoked Asterisk
|
||||
(0x02776, 0x0277f,), # Dingbat Negative Circled..Dingbat Negative Circled
|
||||
(0x02b56, 0x02b59,), # Heavy Oval With Oval Ins..Heavy Circled Saltire
|
||||
(0x03248, 0x0324f,), # Circled Number Ten On Bl..Circled Number Eighty On
|
||||
(0x0e000, 0x0f8ff,), # (nil)
|
||||
(0x0fffd, 0x0fffd,), # Replacement Character
|
||||
(0x1f100, 0x1f10a,), # Digit Zero Full Stop ..Digit Nine Comma
|
||||
(0x1f110, 0x1f12d,), # Parenthesized Latin Capi..Circled Cd
|
||||
(0x1f130, 0x1f169,), # Squared Latin Capital Le..Negative Circled Latin C
|
||||
(0x1f170, 0x1f18d,), # Negative Squared Latin C..Negative Squared Sa
|
||||
(0x1f18f, 0x1f190,), # Negative Squared Wc ..Square Dj
|
||||
(0x1f19b, 0x1f1ac,), # Squared Three D ..Squared Vod
|
||||
(0xf0000, 0xffffd,), # (nil)
|
||||
(0x100000, 0x10fffd,), # (nil)
|
||||
),
|
||||
}
|
||||
2294
lib/wcwidth/table_grapheme.py
Normal file
2294
lib/wcwidth/table_grapheme.py
Normal file
File diff suppressed because it is too large
Load Diff
206
lib/wcwidth/table_mc.py
Normal file
206
lib/wcwidth/table_mc.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""
|
||||
Exports CATEGORY_MC table keyed by supporting unicode version level.
|
||||
|
||||
This code generated by wcwidth/bin/update-tables.py on 2026-01-29 00:47:54 UTC.
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
CATEGORY_MC = {
|
||||
'17.0.0': (
|
||||
# Source: DerivedGeneralCategory-17.0.0.txt
|
||||
# Date: 2025-07-24, 00:12:50 GMT
|
||||
#
|
||||
(0x00903, 0x00903,), # Devanagari Sign Visarga
|
||||
(0x0093b, 0x0093b,), # Devanagari Vowel Sign Ooe
|
||||
(0x0093e, 0x00940,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Ii
|
||||
(0x00949, 0x0094c,), # Devanagari Vowel Sign Ca..Devanagari Vowel Sign Au
|
||||
(0x0094e, 0x0094f,), # Devanagari Vowel Sign Pr..Devanagari Vowel Sign Aw
|
||||
(0x00982, 0x00983,), # Bengali Sign Anusvara ..Bengali Sign Visarga
|
||||
(0x009be, 0x009c0,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Ii
|
||||
(0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai
|
||||
(0x009cb, 0x009cc,), # Bengali Vowel Sign O ..Bengali Vowel Sign Au
|
||||
(0x009d7, 0x009d7,), # Bengali Au Length Mark
|
||||
(0x00a03, 0x00a03,), # Gurmukhi Sign Visarga
|
||||
(0x00a3e, 0x00a40,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Ii
|
||||
(0x00a83, 0x00a83,), # Gujarati Sign Visarga
|
||||
(0x00abe, 0x00ac0,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Ii
|
||||
(0x00ac9, 0x00ac9,), # Gujarati Vowel Sign Candra O
|
||||
(0x00acb, 0x00acc,), # Gujarati Vowel Sign O ..Gujarati Vowel Sign Au
|
||||
(0x00b02, 0x00b03,), # Oriya Sign Anusvara ..Oriya Sign Visarga
|
||||
(0x00b3e, 0x00b3e,), # Oriya Vowel Sign Aa
|
||||
(0x00b40, 0x00b40,), # Oriya Vowel Sign Ii
|
||||
(0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai
|
||||
(0x00b4b, 0x00b4c,), # Oriya Vowel Sign O ..Oriya Vowel Sign Au
|
||||
(0x00b57, 0x00b57,), # Oriya Au Length Mark
|
||||
(0x00bbe, 0x00bbf,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign I
|
||||
(0x00bc1, 0x00bc2,), # Tamil Vowel Sign U ..Tamil Vowel Sign Uu
|
||||
(0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai
|
||||
(0x00bca, 0x00bcc,), # Tamil Vowel Sign O ..Tamil Vowel Sign Au
|
||||
(0x00bd7, 0x00bd7,), # Tamil Au Length Mark
|
||||
(0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga
|
||||
(0x00c41, 0x00c44,), # Telugu Vowel Sign U ..Telugu Vowel Sign Vocali
|
||||
(0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga
|
||||
(0x00cbe, 0x00cbe,), # Kannada Vowel Sign Aa
|
||||
(0x00cc0, 0x00cc4,), # Kannada Vowel Sign Ii ..Kannada Vowel Sign Vocal
|
||||
(0x00cc7, 0x00cc8,), # Kannada Vowel Sign Ee ..Kannada Vowel Sign Ai
|
||||
(0x00cca, 0x00ccb,), # Kannada Vowel Sign O ..Kannada Vowel Sign Oo
|
||||
(0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark
|
||||
(0x00cf3, 0x00cf3,), # Kannada Sign Combining Anusvara Above Right
|
||||
(0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga
|
||||
(0x00d3e, 0x00d40,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Ii
|
||||
(0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai
|
||||
(0x00d4a, 0x00d4c,), # Malayalam Vowel Sign O ..Malayalam Vowel Sign Au
|
||||
(0x00d57, 0x00d57,), # Malayalam Au Length Mark
|
||||
(0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya
|
||||
(0x00dcf, 0x00dd1,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Diga
|
||||
(0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan
|
||||
(0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga
|
||||
(0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes
|
||||
(0x00f7f, 0x00f7f,), # Tibetan Sign Rnam Bcad
|
||||
(0x0102b, 0x0102c,), # Myanmar Vowel Sign Tall ..Myanmar Vowel Sign Aa
|
||||
(0x01031, 0x01031,), # Myanmar Vowel Sign E
|
||||
(0x01038, 0x01038,), # Myanmar Sign Visarga
|
||||
(0x0103b, 0x0103c,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M
|
||||
(0x01056, 0x01057,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
|
||||
(0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K
|
||||
(0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo
|
||||
(0x01083, 0x01084,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan
|
||||
(0x01087, 0x0108c,), # Myanmar Sign Shan Tone-2..Myanmar Sign Shan Counci
|
||||
(0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5
|
||||
(0x0109a, 0x0109c,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton
|
||||
(0x01715, 0x01715,), # Tagalog Sign Pamudpod
|
||||
(0x01734, 0x01734,), # Hanunoo Sign Pamudpod
|
||||
(0x017b6, 0x017b6,), # Khmer Vowel Sign Aa
|
||||
(0x017be, 0x017c5,), # Khmer Vowel Sign Oe ..Khmer Vowel Sign Au
|
||||
(0x017c7, 0x017c8,), # Khmer Sign Reahmuk ..Khmer Sign Yuukaleapintu
|
||||
(0x01923, 0x01926,), # Limbu Vowel Sign Ee ..Limbu Vowel Sign Au
|
||||
(0x01929, 0x0192b,), # Limbu Subjoined Letter Y..Limbu Subjoined Letter W
|
||||
(0x01930, 0x01931,), # Limbu Small Letter Ka ..Limbu Small Letter Nga
|
||||
(0x01933, 0x01938,), # Limbu Small Letter Ta ..Limbu Small Letter La
|
||||
(0x01a19, 0x01a1a,), # Buginese Vowel Sign E ..Buginese Vowel Sign O
|
||||
(0x01a55, 0x01a55,), # Tai Tham Consonant Sign Medial Ra
|
||||
(0x01a57, 0x01a57,), # Tai Tham Consonant Sign La Tang Lai
|
||||
(0x01a61, 0x01a61,), # Tai Tham Vowel Sign A
|
||||
(0x01a63, 0x01a64,), # Tai Tham Vowel Sign Aa ..Tai Tham Vowel Sign Tall
|
||||
(0x01a6d, 0x01a72,), # Tai Tham Vowel Sign Oy ..Tai Tham Vowel Sign Tham
|
||||
(0x01b04, 0x01b04,), # Balinese Sign Bisah
|
||||
(0x01b35, 0x01b35,), # Balinese Vowel Sign Tedung
|
||||
(0x01b3b, 0x01b3b,), # Balinese Vowel Sign Ra Repa Tedung
|
||||
(0x01b3d, 0x01b41,), # Balinese Vowel Sign La L..Balinese Vowel Sign Tali
|
||||
(0x01b43, 0x01b44,), # Balinese Vowel Sign Pepe..Balinese Adeg Adeg
|
||||
(0x01b82, 0x01b82,), # Sundanese Sign Pangwisad
|
||||
(0x01ba1, 0x01ba1,), # Sundanese Consonant Sign Pamingkal
|
||||
(0x01ba6, 0x01ba7,), # Sundanese Vowel Sign Pan..Sundanese Vowel Sign Pan
|
||||
(0x01baa, 0x01baa,), # Sundanese Sign Pamaaeh
|
||||
(0x01be7, 0x01be7,), # Batak Vowel Sign E
|
||||
(0x01bea, 0x01bec,), # Batak Vowel Sign I ..Batak Vowel Sign O
|
||||
(0x01bee, 0x01bee,), # Batak Vowel Sign U
|
||||
(0x01bf2, 0x01bf3,), # Batak Pangolat ..Batak Panongonan
|
||||
(0x01c24, 0x01c2b,), # Lepcha Subjoined Letter ..Lepcha Vowel Sign Uu
|
||||
(0x01c34, 0x01c35,), # Lepcha Consonant Sign Ny..Lepcha Consonant Sign Ka
|
||||
(0x01ce1, 0x01ce1,), # Vedic Tone Atharvavedic Independent Svarita
|
||||
(0x01cf7, 0x01cf7,), # Vedic Sign Atikrama
|
||||
(0x0302e, 0x0302f,), # Hangul Single Dot Tone M..Hangul Double Dot Tone M
|
||||
(0x0a823, 0x0a824,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
|
||||
(0x0a827, 0x0a827,), # Syloti Nagri Vowel Sign Oo
|
||||
(0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga
|
||||
(0x0a8b4, 0x0a8c3,), # Saurashtra Consonant Sig..Saurashtra Vowel Sign Au
|
||||
(0x0a952, 0x0a953,), # Rejang Consonant Sign H ..Rejang Virama
|
||||
(0x0a983, 0x0a983,), # Javanese Sign Wignyan
|
||||
(0x0a9b4, 0x0a9b5,), # Javanese Vowel Sign Taru..Javanese Vowel Sign Tolo
|
||||
(0x0a9ba, 0x0a9bb,), # Javanese Vowel Sign Tali..Javanese Vowel Sign Dirg
|
||||
(0x0a9be, 0x0a9c0,), # Javanese Consonant Sign ..Javanese Pangkon
|
||||
(0x0aa2f, 0x0aa30,), # Cham Vowel Sign O ..Cham Vowel Sign Ai
|
||||
(0x0aa33, 0x0aa34,), # Cham Consonant Sign Ya ..Cham Consonant Sign Ra
|
||||
(0x0aa4d, 0x0aa4d,), # Cham Consonant Sign Final H
|
||||
(0x0aa7b, 0x0aa7b,), # Myanmar Sign Pao Karen Tone
|
||||
(0x0aa7d, 0x0aa7d,), # Myanmar Sign Tai Laing Tone-5
|
||||
(0x0aaeb, 0x0aaeb,), # Meetei Mayek Vowel Sign Ii
|
||||
(0x0aaee, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
|
||||
(0x0aaf5, 0x0aaf5,), # Meetei Mayek Vowel Sign Visarga
|
||||
(0x0abe3, 0x0abe4,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
|
||||
(0x0abe6, 0x0abe7,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
|
||||
(0x0abe9, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
|
||||
(0x0abec, 0x0abec,), # Meetei Mayek Lum Iyek
|
||||
(0x11000, 0x11000,), # Brahmi Sign Candrabindu
|
||||
(0x11002, 0x11002,), # Brahmi Sign Visarga
|
||||
(0x11082, 0x11082,), # Kaithi Sign Visarga
|
||||
(0x110b0, 0x110b2,), # Kaithi Vowel Sign Aa ..Kaithi Vowel Sign Ii
|
||||
(0x110b7, 0x110b8,), # Kaithi Vowel Sign O ..Kaithi Vowel Sign Au
|
||||
(0x1112c, 0x1112c,), # Chakma Vowel Sign E
|
||||
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
|
||||
(0x11182, 0x11182,), # Sharada Sign Visarga
|
||||
(0x111b3, 0x111b5,), # Sharada Vowel Sign Aa ..Sharada Vowel Sign Ii
|
||||
(0x111bf, 0x111c0,), # Sharada Vowel Sign Au ..Sharada Sign Virama
|
||||
(0x111ce, 0x111ce,), # Sharada Vowel Sign Prishthamatra E
|
||||
(0x1122c, 0x1122e,), # Khojki Vowel Sign Aa ..Khojki Vowel Sign Ii
|
||||
(0x11232, 0x11233,), # Khojki Vowel Sign O ..Khojki Vowel Sign Au
|
||||
(0x11235, 0x11235,), # Khojki Sign Virama
|
||||
(0x112e0, 0x112e2,), # Khudawadi Vowel Sign Aa ..Khudawadi Vowel Sign Ii
|
||||
(0x11302, 0x11303,), # Grantha Sign Anusvara ..Grantha Sign Visarga
|
||||
(0x1133e, 0x1133f,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign I
|
||||
(0x11341, 0x11344,), # Grantha Vowel Sign U ..Grantha Vowel Sign Vocal
|
||||
(0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai
|
||||
(0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama
|
||||
(0x11357, 0x11357,), # Grantha Au Length Mark
|
||||
(0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal
|
||||
(0x113b8, 0x113ba,), # Tulu-tigalari Vowel Sign..Tulu-tigalari Vowel Sign
|
||||
(0x113c2, 0x113c2,), # Tulu-tigalari Vowel Sign Ee
|
||||
(0x113c5, 0x113c5,), # Tulu-tigalari Vowel Sign Ai
|
||||
(0x113c7, 0x113ca,), # Tulu-tigalari Vowel Sign..Tulu-tigalari Sign Candr
|
||||
(0x113cc, 0x113cd,), # Tulu-tigalari Sign Anusv..Tulu-tigalari Sign Visar
|
||||
(0x113cf, 0x113cf,), # Tulu-tigalari Sign Looped Virama
|
||||
(0x11435, 0x11437,), # Newa Vowel Sign Aa ..Newa Vowel Sign Ii
|
||||
(0x11440, 0x11441,), # Newa Vowel Sign O ..Newa Vowel Sign Au
|
||||
(0x11445, 0x11445,), # Newa Sign Visarga
|
||||
(0x114b0, 0x114b2,), # Tirhuta Vowel Sign Aa ..Tirhuta Vowel Sign Ii
|
||||
(0x114b9, 0x114b9,), # Tirhuta Vowel Sign E
|
||||
(0x114bb, 0x114be,), # Tirhuta Vowel Sign Ai ..Tirhuta Vowel Sign Au
|
||||
(0x114c1, 0x114c1,), # Tirhuta Sign Visarga
|
||||
(0x115af, 0x115b1,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Ii
|
||||
(0x115b8, 0x115bb,), # Siddham Vowel Sign E ..Siddham Vowel Sign Au
|
||||
(0x115be, 0x115be,), # Siddham Sign Visarga
|
||||
(0x11630, 0x11632,), # Modi Vowel Sign Aa ..Modi Vowel Sign Ii
|
||||
(0x1163b, 0x1163c,), # Modi Vowel Sign O ..Modi Vowel Sign Au
|
||||
(0x1163e, 0x1163e,), # Modi Sign Visarga
|
||||
(0x116ac, 0x116ac,), # Takri Sign Visarga
|
||||
(0x116ae, 0x116af,), # Takri Vowel Sign I ..Takri Vowel Sign Ii
|
||||
(0x116b6, 0x116b6,), # Takri Sign Virama
|
||||
(0x1171e, 0x1171e,), # Ahom Consonant Sign Medial Ra
|
||||
(0x11720, 0x11721,), # Ahom Vowel Sign A ..Ahom Vowel Sign Aa
|
||||
(0x11726, 0x11726,), # Ahom Vowel Sign E
|
||||
(0x1182c, 0x1182e,), # Dogra Vowel Sign Aa ..Dogra Vowel Sign Ii
|
||||
(0x11838, 0x11838,), # Dogra Sign Visarga
|
||||
(0x11930, 0x11935,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E
|
||||
(0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O
|
||||
(0x1193d, 0x1193d,), # Dives Akuru Sign Halanta
|
||||
(0x11940, 0x11940,), # Dives Akuru Medial Ya
|
||||
(0x11942, 0x11942,), # Dives Akuru Medial Ra
|
||||
(0x119d1, 0x119d3,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign I
|
||||
(0x119dc, 0x119df,), # Nandinagari Vowel Sign O..Nandinagari Sign Visarga
|
||||
(0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E
|
||||
(0x11a39, 0x11a39,), # Zanabazar Square Sign Visarga
|
||||
(0x11a57, 0x11a58,), # Soyombo Vowel Sign Ai ..Soyombo Vowel Sign Au
|
||||
(0x11a97, 0x11a97,), # Soyombo Sign Visarga
|
||||
(0x11b61, 0x11b61,), # Sharada Vowel Sign Ooe
|
||||
(0x11b65, 0x11b65,), # Sharada Vowel Sign Short O
|
||||
(0x11b67, 0x11b67,), # Sharada Vowel Sign Candra O
|
||||
(0x11c2f, 0x11c2f,), # Bhaiksuki Vowel Sign Aa
|
||||
(0x11c3e, 0x11c3e,), # Bhaiksuki Sign Visarga
|
||||
(0x11ca9, 0x11ca9,), # Marchen Subjoined Letter Ya
|
||||
(0x11cb1, 0x11cb1,), # Marchen Vowel Sign I
|
||||
(0x11cb4, 0x11cb4,), # Marchen Vowel Sign O
|
||||
(0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
|
||||
(0x11d93, 0x11d94,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
|
||||
(0x11d96, 0x11d96,), # Gunjala Gondi Sign Visarga
|
||||
(0x11ef5, 0x11ef6,), # Makasar Vowel Sign E ..Makasar Vowel Sign O
|
||||
(0x11f03, 0x11f03,), # Kawi Sign Visarga
|
||||
(0x11f34, 0x11f35,), # Kawi Vowel Sign Aa ..Kawi Vowel Sign Alternat
|
||||
(0x11f3e, 0x11f3f,), # Kawi Vowel Sign E ..Kawi Vowel Sign Ai
|
||||
(0x11f41, 0x11f41,), # Kawi Sign Killer
|
||||
(0x1612a, 0x1612c,), # Gurung Khema Consonant S..Gurung Khema Consonant S
|
||||
(0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui
|
||||
(0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea
|
||||
(0x1d165, 0x1d166,), # Musical Symbol Combining..Musical Symbol Combining
|
||||
(0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining
|
||||
),
|
||||
}
|
||||
126
lib/wcwidth/table_vs16.py
Normal file
126
lib/wcwidth/table_vs16.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
Exports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level.
|
||||
|
||||
This code generated by wcwidth/bin/update-tables.py on 2025-09-15 16:57:50 UTC.
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
VS16_NARROW_TO_WIDE = {
|
||||
'9.0.0': (
|
||||
# Source: 9.0.0
|
||||
# Date: 2025-01-30, 21:48:29 GMT
|
||||
#
|
||||
(0x00023, 0x00023,), # Number Sign
|
||||
(0x0002a, 0x0002a,), # Asterisk
|
||||
(0x00030, 0x00039,), # Digit Zero ..Digit Nine
|
||||
(0x000a9, 0x000a9,), # Copyright Sign
|
||||
(0x000ae, 0x000ae,), # Registered Sign
|
||||
(0x0203c, 0x0203c,), # Double Exclamation Mark
|
||||
(0x02049, 0x02049,), # Exclamation Question Mark
|
||||
(0x02122, 0x02122,), # Trade Mark Sign
|
||||
(0x02139, 0x02139,), # Information Source
|
||||
(0x02194, 0x02199,), # Left Right Arrow ..South West Arrow
|
||||
(0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho
|
||||
(0x02328, 0x02328,), # Keyboard
|
||||
(0x023cf, 0x023cf,), # Eject Symbol
|
||||
(0x023ed, 0x023ef,), # Black Right-pointing Dou..Black Right-pointing Tri
|
||||
(0x023f1, 0x023f2,), # Stopwatch ..Timer Clock
|
||||
(0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record
|
||||
(0x024c2, 0x024c2,), # Circled Latin Capital Letter M
|
||||
(0x025aa, 0x025ab,), # Black Small Square ..White Small Square
|
||||
(0x025b6, 0x025b6,), # Black Right-pointing Triangle
|
||||
(0x025c0, 0x025c0,), # Black Left-pointing Triangle
|
||||
(0x025fb, 0x025fc,), # White Medium Square ..Black Medium Square
|
||||
(0x02600, 0x02604,), # Black Sun With Rays ..Comet
|
||||
(0x0260e, 0x0260e,), # Black Telephone
|
||||
(0x02611, 0x02611,), # Ballot Box With Check
|
||||
(0x02618, 0x02618,), # Shamrock
|
||||
(0x0261d, 0x0261d,), # White Up Pointing Index
|
||||
(0x02620, 0x02620,), # Skull And Crossbones
|
||||
(0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign
|
||||
(0x02626, 0x02626,), # Orthodox Cross
|
||||
(0x0262a, 0x0262a,), # Star And Crescent
|
||||
(0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang
|
||||
(0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face
|
||||
(0x02640, 0x02640,), # Female Sign
|
||||
(0x02642, 0x02642,), # Male Sign
|
||||
(0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit
|
||||
(0x02663, 0x02663,), # Black Club Suit
|
||||
(0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit
|
||||
(0x02668, 0x02668,), # Hot Springs
|
||||
(0x0267b, 0x0267b,), # Black Universal Recycling Symbol
|
||||
(0x0267e, 0x0267e,), # Permanent Paper Sign
|
||||
(0x02692, 0x02692,), # Hammer And Pick
|
||||
(0x02694, 0x02697,), # Crossed Swords ..Alembic
|
||||
(0x02699, 0x02699,), # Gear
|
||||
(0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis
|
||||
(0x026a0, 0x026a0,), # Warning Sign
|
||||
(0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign
|
||||
(0x026b0, 0x026b1,), # Coffin ..Funeral Urn
|
||||
(0x026c8, 0x026c8,), # Thunder Cloud And Rain
|
||||
(0x026cf, 0x026cf,), # Pick
|
||||
(0x026d1, 0x026d1,), # Helmet With White Cross
|
||||
(0x026d3, 0x026d3,), # Chains
|
||||
(0x026e9, 0x026e9,), # Shinto Shrine
|
||||
(0x026f0, 0x026f1,), # Mountain ..Umbrella On Ground
|
||||
(0x026f4, 0x026f4,), # Ferry
|
||||
(0x026f7, 0x026f9,), # Skier ..Person With Ball
|
||||
(0x02702, 0x02702,), # Black Scissors
|
||||
(0x02708, 0x02709,), # Airplane ..Envelope
|
||||
(0x0270c, 0x0270d,), # Victory Hand ..Writing Hand
|
||||
(0x0270f, 0x0270f,), # Pencil
|
||||
(0x02712, 0x02712,), # Black Nib
|
||||
(0x02714, 0x02714,), # Heavy Check Mark
|
||||
(0x02716, 0x02716,), # Heavy Multiplication X
|
||||
(0x0271d, 0x0271d,), # Latin Cross
|
||||
(0x02721, 0x02721,), # Star Of David
|
||||
(0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star
|
||||
(0x02744, 0x02744,), # Snowflake
|
||||
(0x02747, 0x02747,), # Sparkle
|
||||
(0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart
|
||||
(0x027a1, 0x027a1,), # Black Rightwards Arrow
|
||||
(0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward
|
||||
(0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow
|
||||
(0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C
|
||||
(0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C
|
||||
(0x1f321, 0x1f321,), # Thermometer
|
||||
(0x1f324, 0x1f32c,), # White Sun With Small Clo..Wind Blowing Face
|
||||
(0x1f336, 0x1f336,), # Hot Pepper
|
||||
(0x1f37d, 0x1f37d,), # Fork And Knife With Plate
|
||||
(0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon
|
||||
(0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs
|
||||
(0x1f39e, 0x1f39f,), # Film Frames ..Admission Tickets
|
||||
(0x1f3cb, 0x1f3ce,), # Weight Lifter ..Racing Car
|
||||
(0x1f3d4, 0x1f3df,), # Snow Capped Mountain ..Stadium
|
||||
(0x1f3f3, 0x1f3f3,), # Waving White Flag
|
||||
(0x1f3f5, 0x1f3f5,), # Rosette
|
||||
(0x1f3f7, 0x1f3f7,), # Label
|
||||
(0x1f43f, 0x1f43f,), # Chipmunk
|
||||
(0x1f441, 0x1f441,), # Eye
|
||||
(0x1f4fd, 0x1f4fd,), # Film Projector
|
||||
(0x1f549, 0x1f54a,), # Om Symbol ..Dove Of Peace
|
||||
(0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock
|
||||
(0x1f573, 0x1f579,), # Hole ..Joystick
|
||||
(0x1f587, 0x1f587,), # Linked Paperclips
|
||||
(0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon
|
||||
(0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed
|
||||
(0x1f5a5, 0x1f5a5,), # Desktop Computer
|
||||
(0x1f5a8, 0x1f5a8,), # Printer
|
||||
(0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball
|
||||
(0x1f5bc, 0x1f5bc,), # Frame With Picture
|
||||
(0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet
|
||||
(0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad
|
||||
(0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper
|
||||
(0x1f5e1, 0x1f5e1,), # Dagger Knife
|
||||
(0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette
|
||||
(0x1f5e8, 0x1f5e8,), # Left Speech Bubble
|
||||
(0x1f5ef, 0x1f5ef,), # Right Anger Bubble
|
||||
(0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot
|
||||
(0x1f5fa, 0x1f5fa,), # World Map
|
||||
(0x1f6cb, 0x1f6cb,), # Couch And Lamp
|
||||
(0x1f6cd, 0x1f6cf,), # Shopping Bags ..Bed
|
||||
(0x1f6e0, 0x1f6e5,), # Hammer And Wrench ..Motor Boat
|
||||
(0x1f6e9, 0x1f6e9,), # Small Airplane
|
||||
(0x1f6f0, 0x1f6f0,), # Satellite
|
||||
(0x1f6f3, 0x1f6f3,), # Passenger Ship
|
||||
),
|
||||
}
|
||||
138
lib/wcwidth/table_wide.py
Normal file
138
lib/wcwidth/table_wide.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""
|
||||
Exports WIDE_EASTASIAN table keyed by supporting unicode version level.
|
||||
|
||||
This code generated by wcwidth/bin/update-tables.py on 2026-01-30 00:58:17 UTC.
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
WIDE_EASTASIAN = {
|
||||
'17.0.0': (
|
||||
# Source: EastAsianWidth-17.0.0.txt
|
||||
# Date: 2025-07-24, 00:12:54 GMT
|
||||
#
|
||||
(0x01100, 0x0115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler
|
||||
(0x0231a, 0x0231b,), # Watch ..Hourglass
|
||||
(0x02329, 0x0232a,), # Left-pointing Angle Brac..Right-pointing Angle Bra
|
||||
(0x023e9, 0x023ec,), # Black Right-pointing Dou..Black Down-pointing Doub
|
||||
(0x023f0, 0x023f0,), # Alarm Clock
|
||||
(0x023f3, 0x023f3,), # Hourglass With Flowing Sand
|
||||
(0x025fd, 0x025fe,), # White Medium Small Squar..Black Medium Small Squar
|
||||
(0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage
|
||||
(0x02630, 0x02637,), # Trigram For Heaven ..Trigram For Earth
|
||||
(0x02648, 0x02653,), # Aries ..Pisces
|
||||
(0x0267f, 0x0267f,), # Wheelchair Symbol
|
||||
(0x0268a, 0x0268f,), # Monogram For Yang ..Digram For Greater Yin
|
||||
(0x02693, 0x02693,), # Anchor
|
||||
(0x026a1, 0x026a1,), # High Voltage Sign
|
||||
(0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle
|
||||
(0x026bd, 0x026be,), # Soccer Ball ..Baseball
|
||||
(0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud
|
||||
(0x026ce, 0x026ce,), # Ophiuchus
|
||||
(0x026d4, 0x026d4,), # No Entry
|
||||
(0x026ea, 0x026ea,), # Church
|
||||
(0x026f2, 0x026f3,), # Fountain ..Flag In Hole
|
||||
(0x026f5, 0x026f5,), # Sailboat
|
||||
(0x026fa, 0x026fa,), # Tent
|
||||
(0x026fd, 0x026fd,), # Fuel Pump
|
||||
(0x02705, 0x02705,), # White Heavy Check Mark
|
||||
(0x0270a, 0x0270b,), # Raised Fist ..Raised Hand
|
||||
(0x02728, 0x02728,), # Sparkles
|
||||
(0x0274c, 0x0274c,), # Cross Mark
|
||||
(0x0274e, 0x0274e,), # Negative Squared Cross Mark
|
||||
(0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O
|
||||
(0x02757, 0x02757,), # Heavy Exclamation Mark Symbol
|
||||
(0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign
|
||||
(0x027b0, 0x027b0,), # Curly Loop
|
||||
(0x027bf, 0x027bf,), # Double Curly Loop
|
||||
(0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square
|
||||
(0x02b50, 0x02b50,), # White Medium Star
|
||||
(0x02b55, 0x02b55,), # Heavy Large Circle
|
||||
(0x02e80, 0x02e99,), # Cjk Radical Repeat ..Cjk Radical Rap
|
||||
(0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified
|
||||
(0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute
|
||||
(0x02ff0, 0x03029,), # Ideographic Description ..Hangzhou Numeral Nine
|
||||
(0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In
|
||||
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
|
||||
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
|
||||
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
|
||||
(0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
|
||||
(0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
|
||||
(0x03190, 0x031e5,), # Ideographic Annotation L..Cjk Stroke Szp
|
||||
(0x031ef, 0x0321e,), # Ideographic Description ..Parenthesized Korean Cha
|
||||
(0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto
|
||||
(0x03250, 0x0a48c,), # Partnership Sign ..Yi Syllable Yyr
|
||||
(0x0a490, 0x0a4c6,), # Yi Radical Qot ..Yi Radical Ke
|
||||
(0x0a960, 0x0a97c,), # Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo
|
||||
(0x0ac00, 0x0d7a3,), # Hangul Syllable Ga ..Hangul Syllable Hih
|
||||
(0x0f900, 0x0faff,), # Cjk Compatibility Ideogr..(nil)
|
||||
(0x0fe10, 0x0fe19,), # Presentation Form For Ve..Presentation Form For Ve
|
||||
(0x0fe30, 0x0fe52,), # Presentation Form For Ve..Small Full Stop
|
||||
(0x0fe54, 0x0fe66,), # Small Semicolon ..Small Equals Sign
|
||||
(0x0fe68, 0x0fe6b,), # Small Reverse Solidus ..Small Commercial At
|
||||
(0x0ff01, 0x0ff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa
|
||||
(0x0ffe0, 0x0ffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign
|
||||
(0x16fe0, 0x16fe3,), # Tangut Iteration Mark ..Old Chinese Iteration Ma
|
||||
(0x16ff2, 0x16ff6,), # Chinese Small Simplified..Yangqin Sign Slow Two Be
|
||||
(0x17000, 0x18cd5,), # (nil) ..Khitan Small Script Char
|
||||
(0x18cff, 0x18d1e,), # Khitan Small Script Char..(nil)
|
||||
(0x18d80, 0x18df2,), # Tangut Component-769 ..Tangut Component-883
|
||||
(0x1aff0, 0x1aff3,), # Katakana Letter Minnan T..Katakana Letter Minnan T
|
||||
(0x1aff5, 0x1affb,), # Katakana Letter Minnan T..Katakana Letter Minnan N
|
||||
(0x1affd, 0x1affe,), # Katakana Letter Minnan N..Katakana Letter Minnan N
|
||||
(0x1b000, 0x1b122,), # Katakana Letter Archaic ..Katakana Letter Archaic
|
||||
(0x1b132, 0x1b132,), # Hiragana Letter Small Ko
|
||||
(0x1b150, 0x1b152,), # Hiragana Letter Small Wi..Hiragana Letter Small Wo
|
||||
(0x1b155, 0x1b155,), # Katakana Letter Small Ko
|
||||
(0x1b164, 0x1b167,), # Katakana Letter Small Wi..Katakana Letter Small N
|
||||
(0x1b170, 0x1b2fb,), # Nushu Character-1b170 ..Nushu Character-1b2fb
|
||||
(0x1d300, 0x1d356,), # Monogram For Earth ..Tetragram For Fostering
|
||||
(0x1d360, 0x1d376,), # Counting Rod Unit Digit ..Ideographic Tally Mark F
|
||||
(0x1f004, 0x1f004,), # Mahjong Tile Red Dragon
|
||||
(0x1f0cf, 0x1f0cf,), # Playing Card Black Joker
|
||||
(0x1f18e, 0x1f18e,), # Negative Squared Ab
|
||||
(0x1f191, 0x1f19a,), # Squared Cl ..Squared Vs
|
||||
(0x1f1e6, 0x1f202,), # Regional Indicator Symbo..Squared Katakana Sa
|
||||
(0x1f210, 0x1f23b,), # Squared Cjk Unified Ideo..Squared Cjk Unified Ideo
|
||||
(0x1f240, 0x1f248,), # Tortoise Shell Bracketed..Tortoise Shell Bracketed
|
||||
(0x1f250, 0x1f251,), # Circled Ideograph Advant..Circled Ideograph Accept
|
||||
(0x1f260, 0x1f265,), # Rounded Symbol For Fu ..Rounded Symbol For Cai
|
||||
(0x1f300, 0x1f320,), # Cyclone ..Shooting Star
|
||||
(0x1f32d, 0x1f335,), # Hot Dog ..Cactus
|
||||
(0x1f337, 0x1f37c,), # Tulip ..Baby Bottle
|
||||
(0x1f37e, 0x1f393,), # Bottle With Popping Cork..Graduation Cap
|
||||
(0x1f3a0, 0x1f3ca,), # Carousel Horse ..Swimmer
|
||||
(0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And
|
||||
(0x1f3e0, 0x1f3f0,), # House Building ..European Castle
|
||||
(0x1f3f4, 0x1f3f4,), # Waving Black Flag
|
||||
(0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints
|
||||
(0x1f440, 0x1f440,), # Eyes
|
||||
(0x1f442, 0x1f4fc,), # Ear ..Videocassette
|
||||
(0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red
|
||||
(0x1f54b, 0x1f54e,), # Kaaba ..Menorah With Nine Branch
|
||||
(0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty
|
||||
(0x1f57a, 0x1f57a,), # Man Dancing
|
||||
(0x1f595, 0x1f596,), # Reversed Hand With Middl..Raised Hand With Part Be
|
||||
(0x1f5a4, 0x1f5a4,), # Black Heart
|
||||
(0x1f5fb, 0x1f64f,), # Mount Fuji ..Person With Folded Hands
|
||||
(0x1f680, 0x1f6c5,), # Rocket ..Left Luggage
|
||||
(0x1f6cc, 0x1f6cc,), # Sleeping Accommodation
|
||||
(0x1f6d0, 0x1f6d2,), # Place Of Worship ..Shopping Trolley
|
||||
(0x1f6d5, 0x1f6d8,), # Hindu Temple ..Landslide
|
||||
(0x1f6dc, 0x1f6df,), # Wireless ..Ring Buoy
|
||||
(0x1f6eb, 0x1f6ec,), # Airplane Departure ..Airplane Arriving
|
||||
(0x1f6f4, 0x1f6fc,), # Scooter ..Roller Skate
|
||||
(0x1f7e0, 0x1f7eb,), # Large Orange Circle ..Large Brown Square
|
||||
(0x1f7f0, 0x1f7f0,), # Heavy Equals Sign
|
||||
(0x1f90c, 0x1f93a,), # Pinched Fingers ..Fencer
|
||||
(0x1f93c, 0x1f945,), # Wrestlers ..Goal Net
|
||||
(0x1f947, 0x1f9ff,), # First Place Medal ..Nazar Amulet
|
||||
(0x1fa70, 0x1fa7c,), # Ballet Shoes ..Crutch
|
||||
(0x1fa80, 0x1fa8a,), # Yo-yo ..Trombone
|
||||
(0x1fa8e, 0x1fac6,), # Treasure Chest ..Fingerprint
|
||||
(0x1fac8, 0x1fac8,), # Hairy Creature
|
||||
(0x1facd, 0x1fadc,), # Orca ..Root Vegetable
|
||||
(0x1fadf, 0x1faea,), # Splatter ..Distorted Face
|
||||
(0x1faef, 0x1faf8,), # Fight Cloud ..Rightwards Pushing Hand
|
||||
(0x20000, 0x2fffd,), # Cjk Unified Ideograph-20..(nil)
|
||||
(0x30000, 0x3fffd,), # Cjk Unified Ideograph-30..(nil)
|
||||
),
|
||||
}
|
||||
350
lib/wcwidth/table_zero.py
Normal file
350
lib/wcwidth/table_zero.py
Normal file
@@ -0,0 +1,350 @@
|
||||
"""
|
||||
Exports ZERO_WIDTH table keyed by supporting unicode version level.
|
||||
|
||||
This code generated by wcwidth/bin/update-tables.py on 2026-01-30 00:48:24 UTC.
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
ZERO_WIDTH = {
|
||||
'17.0.0': (
|
||||
# Source: DerivedGeneralCategory-17.0.0.txt
|
||||
# Date: 2025-07-24, 00:12:50 GMT
|
||||
#
|
||||
(0x00000, 0x00000,), # (nil)
|
||||
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
|
||||
(0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
|
||||
(0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
|
||||
(0x005bf, 0x005bf,), # Hebrew Point Rafe
|
||||
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
|
||||
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
|
||||
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
|
||||
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
|
||||
(0x0061c, 0x0061c,), # Arabic Letter Mark
|
||||
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
|
||||
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
|
||||
(0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
|
||||
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
|
||||
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
|
||||
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
|
||||
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
|
||||
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
|
||||
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
|
||||
(0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot
|
||||
(0x007fd, 0x007fd,), # Nko Dantayalan
|
||||
(0x00816, 0x00819,), # Samaritan Mark In ..Samaritan Mark Dagesh
|
||||
(0x0081b, 0x00823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A
|
||||
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
|
||||
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
|
||||
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
|
||||
(0x00897, 0x0089f,), # Arabic Pepet ..Arabic Half Madda Over M
|
||||
(0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
|
||||
(0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
|
||||
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
|
||||
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
|
||||
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
|
||||
(0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo
|
||||
(0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga
|
||||
(0x009bc, 0x009bc,), # Bengali Sign Nukta
|
||||
(0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal
|
||||
(0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai
|
||||
(0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama
|
||||
(0x009d7, 0x009d7,), # Bengali Au Length Mark
|
||||
(0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal
|
||||
(0x009fe, 0x009fe,), # Bengali Sandhi Mark
|
||||
(0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga
|
||||
(0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta
|
||||
(0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu
|
||||
(0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai
|
||||
(0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama
|
||||
(0x00a51, 0x00a51,), # Gurmukhi Sign Udaat
|
||||
(0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak
|
||||
(0x00a75, 0x00a75,), # Gurmukhi Sign Yakash
|
||||
(0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga
|
||||
(0x00abc, 0x00abc,), # Gujarati Sign Nukta
|
||||
(0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand
|
||||
(0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand
|
||||
(0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama
|
||||
(0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca
|
||||
(0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle
|
||||
(0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga
|
||||
(0x00b3c, 0x00b3c,), # Oriya Sign Nukta
|
||||
(0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic
|
||||
(0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai
|
||||
(0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama
|
||||
(0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark
|
||||
(0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic
|
||||
(0x00b82, 0x00b82,), # Tamil Sign Anusvara
|
||||
(0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu
|
||||
(0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai
|
||||
(0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama
|
||||
(0x00bd7, 0x00bd7,), # Tamil Au Length Mark
|
||||
(0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An
|
||||
(0x00c3c, 0x00c3c,), # Telugu Sign Nukta
|
||||
(0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali
|
||||
(0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai
|
||||
(0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama
|
||||
(0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark
|
||||
(0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali
|
||||
(0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga
|
||||
(0x00cbc, 0x00cbc,), # Kannada Sign Nukta
|
||||
(0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal
|
||||
(0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai
|
||||
(0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama
|
||||
(0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark
|
||||
(0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal
|
||||
(0x00cf3, 0x00cf3,), # Kannada Sign Combining Anusvara Above Right
|
||||
(0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga
|
||||
(0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular
|
||||
(0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc
|
||||
(0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai
|
||||
(0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama
|
||||
(0x00d57, 0x00d57,), # Malayalam Au Length Mark
|
||||
(0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc
|
||||
(0x00d81, 0x00d83,), # Sinhala Sign Candrabindu..Sinhala Sign Visargaya
|
||||
(0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna
|
||||
(0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti
|
||||
(0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla
|
||||
(0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan
|
||||
(0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga
|
||||
(0x00e31, 0x00e31,), # Thai Character Mai Han-akat
|
||||
(0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu
|
||||
(0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan
|
||||
(0x00eb1, 0x00eb1,), # Lao Vowel Sign Mai Kan
|
||||
(0x00eb4, 0x00ebc,), # Lao Vowel Sign I ..Lao Semivowel Sign Lo
|
||||
(0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..Lao Yamakkan
|
||||
(0x00f18, 0x00f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig
|
||||
(0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla
|
||||
(0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags
|
||||
(0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru
|
||||
(0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes
|
||||
(0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta
|
||||
(0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags
|
||||
(0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter
|
||||
(0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter
|
||||
(0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan
|
||||
(0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M
|
||||
(0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
|
||||
(0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M
|
||||
(0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K
|
||||
(0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo
|
||||
(0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah
|
||||
(0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci
|
||||
(0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5
|
||||
(0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton
|
||||
(0x01160, 0x011ff,), # Hangul Jungseong Filler ..Hangul Jongseong Ssangni
|
||||
(0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin
|
||||
(0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod
|
||||
(0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod
|
||||
(0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U
|
||||
(0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U
|
||||
(0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat
|
||||
(0x017dd, 0x017dd,), # Khmer Sign Atthacan
|
||||
(0x0180b, 0x0180f,), # Mongolian Free Variation..Mongolian Free Variation
|
||||
(0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
|
||||
(0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga
|
||||
(0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W
|
||||
(0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i
|
||||
(0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae
|
||||
(0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign
|
||||
(0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue
|
||||
(0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot
|
||||
(0x01ab0, 0x01add,), # Combining Doubled Circum..Combining Dot-and-ring B
|
||||
(0x01ae0, 0x01aeb,), # Combining Left Tack Abov..Combining Double Rightwa
|
||||
(0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah
|
||||
(0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg
|
||||
(0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol
|
||||
(0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad
|
||||
(0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign
|
||||
(0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan
|
||||
(0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta
|
||||
(0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha
|
||||
(0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda
|
||||
(0x01ced, 0x01ced,), # Vedic Sign Tiryak
|
||||
(0x01cf4, 0x01cf4,), # Vedic Tone Candra Above
|
||||
(0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A
|
||||
(0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
|
||||
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
|
||||
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
|
||||
(0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
|
||||
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
|
||||
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
|
||||
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
|
||||
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
|
||||
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
|
||||
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
|
||||
(0x03164, 0x03164,), # Hangul Filler
|
||||
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
|
||||
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
|
||||
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
|
||||
(0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk
|
||||
(0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara
|
||||
(0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta
|
||||
(0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara
|
||||
(0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
|
||||
(0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta
|
||||
(0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga
|
||||
(0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi
|
||||
(0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig
|
||||
(0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay
|
||||
(0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop
|
||||
(0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama
|
||||
(0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan
|
||||
(0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon
|
||||
(0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw
|
||||
(0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa
|
||||
(0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng
|
||||
(0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina
|
||||
(0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T
|
||||
(0x0aab0, 0x0aab0,), # Tai Viet Mai Kang
|
||||
(0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U
|
||||
(0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia
|
||||
(0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek
|
||||
(0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho
|
||||
(0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
|
||||
(0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama
|
||||
(0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
|
||||
(0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek
|
||||
(0x0d7b0, 0x0d7ff,), # Hangul Jungseong O-yeo ..(nil)
|
||||
(0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika
|
||||
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
|
||||
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
|
||||
(0x0feff, 0x0feff,), # Zero Width No-break Space
|
||||
(0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
|
||||
(0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
|
||||
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
|
||||
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
|
||||
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
|
||||
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
|
||||
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
|
||||
(0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
|
||||
(0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo
|
||||
(0x10a3f, 0x10a3f,), # Kharoshthi Virama
|
||||
(0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation
|
||||
(0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas
|
||||
(0x10d69, 0x10d6d,), # Garay Vowel Sign E ..Garay Consonant Nasaliza
|
||||
(0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M
|
||||
(0x10efa, 0x10eff,), # Arabic Double Vertical B..Arabic Small Low Word Ma
|
||||
(0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke
|
||||
(0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two
|
||||
(0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga
|
||||
(0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
|
||||
(0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama
|
||||
(0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
|
||||
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
|
||||
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
|
||||
(0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
|
||||
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
|
||||
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
|
||||
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
|
||||
(0x11173, 0x11173,), # Mahajani Sign Nukta
|
||||
(0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga
|
||||
(0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama
|
||||
(0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe
|
||||
(0x111ce, 0x111cf,), # Sharada Vowel Sign Prish..Sharada Sign Inverted Ca
|
||||
(0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda
|
||||
(0x1123e, 0x1123e,), # Khojki Sign Sukun
|
||||
(0x11241, 0x11241,), # Khojki Vowel Sign Vocalic R
|
||||
(0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama
|
||||
(0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga
|
||||
(0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta
|
||||
(0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal
|
||||
(0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai
|
||||
(0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama
|
||||
(0x11357, 0x11357,), # Grantha Au Length Mark
|
||||
(0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal
|
||||
(0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit
|
||||
(0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter
|
||||
(0x113b8, 0x113c0,), # Tulu-tigalari Vowel Sign..Tulu-tigalari Vowel Sign
|
||||
(0x113c2, 0x113c2,), # Tulu-tigalari Vowel Sign Ee
|
||||
(0x113c5, 0x113c5,), # Tulu-tigalari Vowel Sign Ai
|
||||
(0x113c7, 0x113ca,), # Tulu-tigalari Vowel Sign..Tulu-tigalari Sign Candr
|
||||
(0x113cc, 0x113d0,), # Tulu-tigalari Sign Anusv..Tulu-tigalari Conjoiner
|
||||
(0x113d2, 0x113d2,), # Tulu-tigalari Gemination Mark
|
||||
(0x113e1, 0x113e2,), # Tulu-tigalari Vedic Tone..Tulu-tigalari Vedic Tone
|
||||
(0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta
|
||||
(0x1145e, 0x1145e,), # Newa Sandhi Mark
|
||||
(0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta
|
||||
(0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal
|
||||
(0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta
|
||||
(0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter
|
||||
(0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra
|
||||
(0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta
|
||||
(0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer
|
||||
(0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta
|
||||
(0x11930, 0x11935,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E
|
||||
(0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O
|
||||
(0x1193b, 0x1193e,), # Dives Akuru Sign Anusvar..Dives Akuru Virama
|
||||
(0x11940, 0x11940,), # Dives Akuru Medial Ya
|
||||
(0x11942, 0x11943,), # Dives Akuru Medial Ra ..Dives Akuru Sign Nukta
|
||||
(0x119d1, 0x119d7,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign V
|
||||
(0x119da, 0x119e0,), # Nandinagari Vowel Sign E..Nandinagari Sign Virama
|
||||
(0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E
|
||||
(0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L
|
||||
(0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi
|
||||
(0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster
|
||||
(0x11a47, 0x11a47,), # Zanabazar Square Subjoiner
|
||||
(0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar
|
||||
(0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner
|
||||
(0x11b60, 0x11b67,), # Sharada Vowel Sign Oe ..Sharada Vowel Sign Candr
|
||||
(0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc
|
||||
(0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama
|
||||
(0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter
|
||||
(0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu
|
||||
(0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
|
||||
(0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E
|
||||
(0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
|
||||
(0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama
|
||||
(0x11d47, 0x11d47,), # Masaram Gondi Ra-kara
|
||||
(0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
|
||||
(0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
|
||||
(0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama
|
||||
(0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O
|
||||
(0x11f00, 0x11f01,), # Kawi Sign Candrabindu ..Kawi Sign Anusvara
|
||||
(0x11f03, 0x11f03,), # Kawi Sign Visarga
|
||||
(0x11f34, 0x11f3a,), # Kawi Vowel Sign Aa ..Kawi Vowel Sign Vocalic
|
||||
(0x11f3e, 0x11f42,), # Kawi Vowel Sign E ..Kawi Conjoiner
|
||||
(0x11f5a, 0x11f5a,), # Kawi Sign Nukta
|
||||
(0x13430, 0x13440,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph Mirr
|
||||
(0x13447, 0x13455,), # Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi
|
||||
(0x1611e, 0x1612f,), # Gurung Khema Vowel Sign ..Gurung Khema Sign Tholho
|
||||
(0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High
|
||||
(0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta
|
||||
(0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar
|
||||
(0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui
|
||||
(0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below
|
||||
(0x16fe4, 0x16fe4,), # Khitan Small Script Filler
|
||||
(0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea
|
||||
(0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark
|
||||
(0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step
|
||||
(0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark
|
||||
(0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie
|
||||
(0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining
|
||||
(0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining
|
||||
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
|
||||
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
|
||||
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
|
||||
(0x1da00, 0x1da36,), # Signwriting Head Rim ..Signwriting Air Sucking
|
||||
(0x1da3b, 0x1da6c,), # Signwriting Mouth Closed..Signwriting Excitement
|
||||
(0x1da75, 0x1da75,), # Signwriting Upper Body Tilting From Hip Joints
|
||||
(0x1da84, 0x1da84,), # Signwriting Location Head Neck
|
||||
(0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie
|
||||
(0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod
|
||||
(0x1e000, 0x1e006,), # Combining Glagolitic Let..Combining Glagolitic Let
|
||||
(0x1e008, 0x1e018,), # Combining Glagolitic Let..Combining Glagolitic Let
|
||||
(0x1e01b, 0x1e021,), # Combining Glagolitic Let..Combining Glagolitic Let
|
||||
(0x1e023, 0x1e024,), # Combining Glagolitic Let..Combining Glagolitic Let
|
||||
(0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let
|
||||
(0x1e08f, 0x1e08f,), # Combining Cyrillic Small Letter Byelorussian-ukr
|
||||
(0x1e130, 0x1e136,), # Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T
|
||||
(0x1e2ae, 0x1e2ae,), # Toto Sign Rising Tone
|
||||
(0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini
|
||||
(0x1e4ec, 0x1e4ef,), # Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh
|
||||
(0x1e5ee, 0x1e5ef,), # Ol Onal Sign Mu ..Ol Onal Sign Ikir
|
||||
(0x1e6e3, 0x1e6e3,), # Tai Yo Sign Ue
|
||||
(0x1e6e6, 0x1e6e6,), # Tai Yo Sign Au
|
||||
(0x1e6ee, 0x1e6ef,), # Tai Yo Sign Ay ..Tai Yo Sign Ang
|
||||
(0x1e6f5, 0x1e6f5,), # Tai Yo Sign Om
|
||||
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
|
||||
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
|
||||
(0xe0000, 0xe0fff,), # (nil)
|
||||
),
|
||||
}
|
||||
656
lib/wcwidth/textwrap.py
Normal file
656
lib/wcwidth/textwrap.py
Normal file
@@ -0,0 +1,656 @@
|
||||
"""
|
||||
Sequence-aware text wrapping functions.
|
||||
|
||||
This module provides functions for wrapping text that may contain terminal escape sequences, with
|
||||
proper handling of Unicode grapheme clusters and character display widths.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
# std imports
|
||||
import re
|
||||
import secrets
|
||||
import textwrap
|
||||
|
||||
from typing import TYPE_CHECKING, NamedTuple
|
||||
|
||||
# local
|
||||
from .wcwidth import width as _width
|
||||
from .wcwidth import iter_sequences
|
||||
from .grapheme import iter_graphemes
|
||||
from .sgr_state import propagate_sgr as _propagate_sgr
|
||||
from .escape_sequences import ZERO_WIDTH_PATTERN
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from typing import Any, Literal
|
||||
|
||||
|
||||
class _HyperlinkState(NamedTuple):
|
||||
"""State for tracking an open OSC 8 hyperlink across line breaks."""
|
||||
|
||||
url: str # hyperlink target URL
|
||||
params: str # id=xxx and other key=value pairs separated by :
|
||||
terminator: str # BEL (\x07) or ST (\x1b\\)
|
||||
|
||||
|
||||
# Hyperlink parsing: captures (params, url, terminator)
|
||||
_HYPERLINK_OPEN_RE = re.compile(r'\x1b]8;([^;]*);([^\x07\x1b]*)(\x07|\x1b\\)')
|
||||
|
||||
|
||||
def _parse_hyperlink_open(seq: str) -> _HyperlinkState | None:
|
||||
"""Parse OSC 8 open sequence, return state or None."""
|
||||
if (m := _HYPERLINK_OPEN_RE.match(seq)):
|
||||
return _HyperlinkState(url=m.group(2), params=m.group(1), terminator=m.group(3))
|
||||
return None
|
||||
|
||||
|
||||
def _make_hyperlink_open(url: str, params: str, terminator: str) -> str:
|
||||
"""Generate OSC 8 open sequence."""
|
||||
return f'\x1b]8;{params};{url}{terminator}'
|
||||
|
||||
|
||||
def _make_hyperlink_close(terminator: str) -> str:
|
||||
"""Generate OSC 8 close sequence."""
|
||||
return f'\x1b]8;;{terminator}'
|
||||
|
||||
|
||||
class SequenceTextWrapper(textwrap.TextWrapper):
|
||||
"""
|
||||
Sequence-aware text wrapper extending :class:`textwrap.TextWrapper`.
|
||||
|
||||
This wrapper properly handles terminal escape sequences and Unicode grapheme clusters when
|
||||
calculating text width for wrapping.
|
||||
|
||||
This implementation is based on the SequenceTextWrapper from the 'blessed' library, with
|
||||
contributions from Avram Lubkin and grayjk.
|
||||
|
||||
The key difference from the blessed implementation is the addition of grapheme cluster support
|
||||
via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis
|
||||
and variations, regional indicator flags, and combining characters.
|
||||
|
||||
OSC 8 hyperlinks are handled specially: when a hyperlink must span multiple lines, each line
|
||||
receives complete open/close sequences with a shared ``id`` parameter, ensuring terminals
|
||||
treat the fragments as a single hyperlink for hover underlining. If the original hyperlink
|
||||
already has an ``id`` parameter, it is preserved; otherwise, one is generated.
|
||||
"""
|
||||
|
||||
def __init__(self, width: int = 70, *,
|
||||
control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
|
||||
tabsize: int = 8,
|
||||
ambiguous_width: int = 1,
|
||||
**kwargs: Any) -> None:
|
||||
"""
|
||||
Initialize the wrapper.
|
||||
|
||||
:param width: Maximum line width in display cells.
|
||||
:param control_codes: How to handle control sequences (see :func:`~.width`).
|
||||
:param tabsize: Tab stop width for tab expansion.
|
||||
:param ambiguous_width: Width to use for East Asian Ambiguous (A) characters.
|
||||
:param kwargs: Additional arguments passed to :class:`textwrap.TextWrapper`.
|
||||
"""
|
||||
super().__init__(width=width, **kwargs)
|
||||
self.control_codes = control_codes
|
||||
self.tabsize = tabsize
|
||||
self.ambiguous_width = ambiguous_width
|
||||
|
||||
@staticmethod
|
||||
def _next_hyperlink_id() -> str:
|
||||
"""Generate unique hyperlink id as 8-character hex string."""
|
||||
return secrets.token_hex(4)
|
||||
|
||||
def _width(self, text: str) -> int:
|
||||
"""Measure text width accounting for sequences."""
|
||||
return _width(text, control_codes=self.control_codes, tabsize=self.tabsize,
|
||||
ambiguous_width=self.ambiguous_width)
|
||||
|
||||
def _strip_sequences(self, text: str) -> str:
|
||||
"""Strip all terminal sequences from text."""
|
||||
result = []
|
||||
for segment, is_seq in iter_sequences(text):
|
||||
if not is_seq:
|
||||
result.append(segment)
|
||||
return ''.join(result)
|
||||
|
||||
def _extract_sequences(self, text: str) -> str:
|
||||
"""Extract only terminal sequences from text."""
|
||||
result = []
|
||||
for segment, is_seq in iter_sequences(text):
|
||||
if is_seq:
|
||||
result.append(segment)
|
||||
return ''.join(result)
|
||||
|
||||
def _split(self, text: str) -> list[str]: # pylint: disable=too-many-locals
|
||||
r"""
|
||||
Sequence-aware variant of :meth:`textwrap.TextWrapper._split`.
|
||||
|
||||
This method ensures that terminal escape sequences don't interfere with the text splitting
|
||||
logic, particularly for hyphen-based word breaking. It builds a position mapping from
|
||||
stripped text to original text, calls the parent's _split on stripped text, then maps chunks
|
||||
back.
|
||||
|
||||
OSC hyperlink sequences are treated as word boundaries::
|
||||
|
||||
>>> wrap('foo \x1b]8;;https://example.com\x07link\x1b]8;;\x07 bar', 6)
|
||||
['foo', '\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 'bar']
|
||||
|
||||
Both BEL (``\x07``) and ST (``\x1b\\``) terminators are supported.
|
||||
"""
|
||||
# pylint: disable=too-many-locals,too-many-branches
|
||||
# Build a mapping from stripped text positions to original text positions.
|
||||
#
|
||||
# Track where each character ENDS so that sequences between characters
|
||||
# attach to the following text (not preceding text). This ensures sequences
|
||||
# aren't lost when whitespace is dropped.
|
||||
#
|
||||
# char_end[i] = position in original text right after the i-th stripped char
|
||||
char_end: list[int] = []
|
||||
stripped_text = ''
|
||||
original_pos = 0
|
||||
prev_was_hyperlink_close = False
|
||||
|
||||
for segment, is_seq in iter_sequences(text):
|
||||
if not is_seq:
|
||||
# Conditionally insert space after hyperlink close to force word boundary
|
||||
if prev_was_hyperlink_close and segment and not segment[0].isspace():
|
||||
stripped_text += ' '
|
||||
char_end.append(original_pos)
|
||||
for char in segment:
|
||||
original_pos += 1
|
||||
char_end.append(original_pos)
|
||||
stripped_text += char
|
||||
prev_was_hyperlink_close = False
|
||||
else:
|
||||
is_hyperlink_close = segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07'))
|
||||
|
||||
# Conditionally insert space before OSC sequences to artificially create word
|
||||
# boundary, but *not* before hyperlink close sequences, to ensure hyperlink is
|
||||
# terminated on the same line.
|
||||
if (segment.startswith('\x1b]') and stripped_text and not
|
||||
stripped_text[-1].isspace()):
|
||||
if not is_hyperlink_close:
|
||||
stripped_text += ' '
|
||||
char_end.append(original_pos)
|
||||
|
||||
# Escape sequences advance position but don't add to stripped text
|
||||
original_pos += len(segment)
|
||||
prev_was_hyperlink_close = is_hyperlink_close
|
||||
|
||||
# Add sentinel for final position
|
||||
char_end.append(original_pos)
|
||||
|
||||
# Use parent's _split on the stripped text
|
||||
# pylint: disable-next=protected-access
|
||||
stripped_chunks = textwrap.TextWrapper._split(self, stripped_text)
|
||||
|
||||
# Handle text that contains only sequences (no visible characters).
|
||||
# Return the sequences as a single chunk to preserve them.
|
||||
if not stripped_chunks and text:
|
||||
return [text]
|
||||
|
||||
# Map the chunks back to the original text with sequences
|
||||
result: list[str] = []
|
||||
stripped_pos = 0
|
||||
num_chunks = len(stripped_chunks)
|
||||
|
||||
for idx, chunk in enumerate(stripped_chunks):
|
||||
chunk_len = len(chunk)
|
||||
|
||||
# Start is where previous character ended (or 0 for first chunk)
|
||||
start_orig = 0 if stripped_pos == 0 else char_end[stripped_pos - 1]
|
||||
|
||||
# End is where next character starts. For last chunk, use sentinel
|
||||
# to include any trailing sequences.
|
||||
if idx == num_chunks - 1:
|
||||
end_orig = char_end[-1] # sentinel includes trailing sequences
|
||||
else:
|
||||
end_orig = char_end[stripped_pos + chunk_len - 1]
|
||||
|
||||
# Extract the corresponding portion from the original text
|
||||
# Skip empty chunks (from virtual spaces inserted at OSC boundaries)
|
||||
if start_orig != end_orig:
|
||||
result.append(text[start_orig:end_orig])
|
||||
stripped_pos += chunk_len
|
||||
|
||||
return result
|
||||
|
||||
def _wrap_chunks(self, chunks: list[str]) -> list[str]: # pylint: disable=too-many-branches
|
||||
"""
|
||||
Wrap chunks into lines using sequence-aware width.
|
||||
|
||||
Override TextWrapper._wrap_chunks to use _width instead of len. Follows stdlib's algorithm:
|
||||
greedily fill lines, handle long words. Also handle OSC hyperlink processing. When
|
||||
hyperlinks span multiple lines, each line gets complete open/close sequences with matching
|
||||
id parameters for hover underlining continuity per OSC 8 spec.
|
||||
"""
|
||||
# pylint: disable=too-many-branches,too-many-statements,too-complex,too-many-locals
|
||||
# pylint: disable=too-many-nested-blocks
|
||||
# the hyperlink code in particular really pushes the complexity rating of this method.
|
||||
# preferring to keep it "all in one method" because of so much local state and manipulation.
|
||||
if not chunks:
|
||||
return []
|
||||
|
||||
if self.max_lines is not None:
|
||||
if self.max_lines > 1:
|
||||
indent = self.subsequent_indent
|
||||
else:
|
||||
indent = self.initial_indent
|
||||
if (self._width(indent)
|
||||
+ self._width(self.placeholder.lstrip())
|
||||
> self.width):
|
||||
raise ValueError("placeholder too large for max width")
|
||||
|
||||
lines: list[str] = []
|
||||
is_first_line = True
|
||||
|
||||
hyperlink_state: _HyperlinkState | None = None
|
||||
# Track the id we're using for the current hyperlink continuation
|
||||
current_hyperlink_id: str | None = None
|
||||
|
||||
# Arrange in reverse order so items can be efficiently popped
|
||||
chunks = list(reversed(chunks))
|
||||
|
||||
while chunks:
|
||||
current_line: list[str] = []
|
||||
current_width = 0
|
||||
|
||||
# Get the indent and available width for current line
|
||||
indent = self.initial_indent if is_first_line else self.subsequent_indent
|
||||
line_width = self.width - self._width(indent)
|
||||
|
||||
# If continuing a hyperlink from previous line, prepend open sequence
|
||||
if hyperlink_state is not None:
|
||||
open_seq = _make_hyperlink_open(
|
||||
hyperlink_state.url, hyperlink_state.params, hyperlink_state.terminator)
|
||||
chunks[-1] = open_seq + chunks[-1]
|
||||
|
||||
# Drop leading whitespace (except at very start)
|
||||
# When dropping, transfer any sequences to the next chunk.
|
||||
# Only drop if there's actual whitespace text, not if it's only sequences.
|
||||
stripped = self._strip_sequences(chunks[-1])
|
||||
if self.drop_whitespace and lines and stripped and not stripped.strip():
|
||||
sequences = self._extract_sequences(chunks[-1])
|
||||
del chunks[-1]
|
||||
if sequences and chunks:
|
||||
chunks[-1] = sequences + chunks[-1]
|
||||
|
||||
# Greedily add chunks that fit
|
||||
while chunks:
|
||||
chunk = chunks[-1]
|
||||
chunk_width = self._width(chunk)
|
||||
|
||||
if current_width + chunk_width <= line_width:
|
||||
current_line.append(chunks.pop())
|
||||
current_width += chunk_width
|
||||
else:
|
||||
break
|
||||
|
||||
# Handle chunk that's too long for any line
|
||||
if chunks and self._width(chunks[-1]) > line_width:
|
||||
self._handle_long_word(
|
||||
chunks, current_line, current_width, line_width
|
||||
)
|
||||
current_width = self._width(''.join(current_line))
|
||||
# Remove any empty chunks left by _handle_long_word
|
||||
while chunks and not chunks[-1]:
|
||||
del chunks[-1]
|
||||
|
||||
# Drop trailing whitespace
|
||||
# When dropping, transfer any sequences to the previous chunk.
|
||||
# Only drop if there's actual whitespace text, not if it's only sequences.
|
||||
stripped_last = self._strip_sequences(current_line[-1]) if current_line else ''
|
||||
if (self.drop_whitespace and current_line and
|
||||
stripped_last and not stripped_last.strip()):
|
||||
sequences = self._extract_sequences(current_line[-1])
|
||||
current_width -= self._width(current_line[-1])
|
||||
del current_line[-1]
|
||||
if sequences and current_line:
|
||||
current_line[-1] = current_line[-1] + sequences
|
||||
|
||||
if current_line:
|
||||
# Check whether this is a normal append or max_lines
|
||||
# truncation. Matches stdlib textwrap precedence:
|
||||
# normal if max_lines not set, not yet reached, or no
|
||||
# remaining visible content that would need truncation.
|
||||
no_more_content = (
|
||||
not chunks or
|
||||
self.drop_whitespace and
|
||||
len(chunks) == 1 and
|
||||
not self._strip_sequences(chunks[0]).strip()
|
||||
)
|
||||
if (self.max_lines is None or
|
||||
len(lines) + 1 < self.max_lines or
|
||||
no_more_content
|
||||
and current_width <= line_width):
|
||||
line_content = ''.join(current_line)
|
||||
|
||||
# Track hyperlink state through this line's content
|
||||
new_state = self._track_hyperlink_state(line_content, hyperlink_state)
|
||||
|
||||
# If we end inside a hyperlink, append close sequence
|
||||
if new_state is not None:
|
||||
# Ensure we have an id for continuation
|
||||
if current_hyperlink_id is None:
|
||||
if 'id=' in new_state.params:
|
||||
current_hyperlink_id = new_state.params
|
||||
elif new_state.params:
|
||||
# Prepend id to existing params (per OSC 8 spec, params can have
|
||||
# multiple key=value pairs separated by :)
|
||||
current_hyperlink_id = (
|
||||
f'id={self._next_hyperlink_id()}:{new_state.params}')
|
||||
else:
|
||||
current_hyperlink_id = f'id={self._next_hyperlink_id()}'
|
||||
line_content += _make_hyperlink_close(new_state.terminator)
|
||||
|
||||
# Also need to inject the id into the opening
|
||||
# sequence if it didn't have one
|
||||
if 'id=' not in new_state.params:
|
||||
# Find and replace the original open sequence with one that has id
|
||||
old_open = _make_hyperlink_open(
|
||||
new_state.url, new_state.params, new_state.terminator)
|
||||
new_open = _make_hyperlink_open(
|
||||
new_state.url, current_hyperlink_id, new_state.terminator)
|
||||
line_content = line_content.replace(old_open, new_open, 1)
|
||||
|
||||
# Update state for next line, using computed id
|
||||
hyperlink_state = _HyperlinkState(
|
||||
new_state.url, current_hyperlink_id, new_state.terminator)
|
||||
else:
|
||||
hyperlink_state = None
|
||||
current_hyperlink_id = None # Reset id when hyperlink closes
|
||||
|
||||
# Strip trailing whitespace when drop_whitespace is enabled
|
||||
# (matches CPython #140627 fix behavior)
|
||||
if self.drop_whitespace:
|
||||
line_content = line_content.rstrip()
|
||||
lines.append(indent + line_content)
|
||||
is_first_line = False
|
||||
else:
|
||||
# max_lines reached with remaining content —
|
||||
# pop chunks until placeholder fits, then break.
|
||||
placeholder_w = self._width(self.placeholder)
|
||||
while current_line:
|
||||
last_text = self._strip_sequences(current_line[-1])
|
||||
if (last_text.strip()
|
||||
and current_width + placeholder_w <= line_width):
|
||||
line_content = ''.join(current_line)
|
||||
new_state = self._track_hyperlink_state(
|
||||
line_content, hyperlink_state)
|
||||
if new_state is not None:
|
||||
line_content += _make_hyperlink_close(
|
||||
new_state.terminator)
|
||||
lines.append(indent + line_content + self.placeholder)
|
||||
break
|
||||
current_width -= self._width(current_line[-1])
|
||||
del current_line[-1]
|
||||
else:
|
||||
if lines:
|
||||
prev_line = self._rstrip_visible(lines[-1])
|
||||
if (self._width(prev_line) + placeholder_w
|
||||
<= self.width):
|
||||
lines[-1] = prev_line + self.placeholder
|
||||
break
|
||||
lines.append(indent + self.placeholder.lstrip())
|
||||
break
|
||||
|
||||
return lines
|
||||
|
||||
def _track_hyperlink_state(
|
||||
self, text: str,
|
||||
state: _HyperlinkState | None) -> _HyperlinkState | None:
|
||||
"""
|
||||
Track hyperlink state through text.
|
||||
|
||||
:param text: Text to scan for hyperlink sequences.
|
||||
:param state: Current state or None if outside hyperlink.
|
||||
:returns: Updated state after processing text.
|
||||
"""
|
||||
for segment, is_seq in iter_sequences(text):
|
||||
if is_seq:
|
||||
parsed_link = _parse_hyperlink_open(segment)
|
||||
if parsed_link is not None and parsed_link.url: # has URL = open
|
||||
state = parsed_link
|
||||
elif segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07')): # close
|
||||
state = None
|
||||
return state
|
||||
|
||||
def _handle_long_word(self, reversed_chunks: list[str],
|
||||
cur_line: list[str], cur_len: int,
|
||||
width: int) -> None:
|
||||
"""
|
||||
Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`.
|
||||
|
||||
This method ensures that word boundaries are not broken mid-sequence, and respects grapheme
|
||||
cluster boundaries when breaking long words.
|
||||
"""
|
||||
if width < 1:
|
||||
space_left = 1
|
||||
else:
|
||||
space_left = width - cur_len
|
||||
|
||||
chunk = reversed_chunks[-1]
|
||||
|
||||
if self.break_long_words:
|
||||
break_at_hyphen = False
|
||||
hyphen_end = 0
|
||||
|
||||
# Handle break_on_hyphens: find last hyphen within space_left
|
||||
if self.break_on_hyphens:
|
||||
# Strip sequences to find hyphen in logical text
|
||||
stripped = self._strip_sequences(chunk)
|
||||
if len(stripped) > space_left:
|
||||
# Find last hyphen in the portion that fits
|
||||
hyphen_pos = stripped.rfind('-', 0, space_left)
|
||||
if hyphen_pos > 0 and any(c != '-' for c in stripped[:hyphen_pos]):
|
||||
# Map back to original position including sequences
|
||||
hyphen_end = self._map_stripped_pos_to_original(chunk, hyphen_pos + 1)
|
||||
break_at_hyphen = True
|
||||
|
||||
# Break at grapheme boundaries to avoid splitting multi-codepoint characters
|
||||
if break_at_hyphen:
|
||||
actual_end = hyphen_end
|
||||
else:
|
||||
actual_end = self._find_break_position(chunk, space_left)
|
||||
# If no progress possible (e.g., wide char exceeds line width),
|
||||
# force at least one grapheme to avoid infinite loop.
|
||||
# Only force when cur_line is empty; if line has content,
|
||||
# appending nothing is safe and the line will be committed.
|
||||
if actual_end == 0 and not cur_line:
|
||||
actual_end = self._find_first_grapheme_end(chunk)
|
||||
cur_line.append(chunk[:actual_end])
|
||||
reversed_chunks[-1] = chunk[actual_end:]
|
||||
|
||||
elif not cur_line:
|
||||
cur_line.append(reversed_chunks.pop())
|
||||
|
||||
def _map_stripped_pos_to_original(self, text: str, stripped_pos: int) -> int:
|
||||
"""Map a position in stripped text back to original text position."""
|
||||
stripped_idx = 0
|
||||
original_idx = 0
|
||||
|
||||
for segment, is_seq in iter_sequences(text):
|
||||
if is_seq:
|
||||
original_idx += len(segment)
|
||||
elif stripped_idx + len(segment) > stripped_pos:
|
||||
# Position is within this segment
|
||||
return original_idx + (stripped_pos - stripped_idx)
|
||||
else:
|
||||
stripped_idx += len(segment)
|
||||
original_idx += len(segment)
|
||||
|
||||
# Caller guarantees stripped_pos < total stripped chars, so we always
|
||||
# return from within the loop. This line satisfies the type checker.
|
||||
return original_idx # pragma: no cover
|
||||
|
||||
def _find_break_position(self, text: str, max_width: int) -> int:
|
||||
"""Find string index in text that fits within max_width cells."""
|
||||
idx = 0
|
||||
width_so_far = 0
|
||||
|
||||
while idx < len(text):
|
||||
char = text[idx]
|
||||
|
||||
# Skip escape sequences (they don't add width)
|
||||
if char == '\x1b':
|
||||
match = ZERO_WIDTH_PATTERN.match(text, idx)
|
||||
if match:
|
||||
idx = match.end()
|
||||
continue
|
||||
|
||||
# Get grapheme (use start= to avoid slice allocation)
|
||||
grapheme = next(iter_graphemes(text, start=idx))
|
||||
|
||||
grapheme_width = self._width(grapheme)
|
||||
if width_so_far + grapheme_width > max_width:
|
||||
return idx # Found break point
|
||||
|
||||
width_so_far += grapheme_width
|
||||
idx += len(grapheme)
|
||||
|
||||
# Caller guarantees chunk_width > max_width, so a grapheme always
|
||||
# exceeds and we return from within the loop. Type checker requires this.
|
||||
return idx # pragma: no cover
|
||||
|
||||
def _find_first_grapheme_end(self, text: str) -> int:
|
||||
"""Find the end position of the first grapheme."""
|
||||
return len(next(iter_graphemes(text)))
|
||||
|
||||
def _rstrip_visible(self, text: str) -> str:
|
||||
"""Strip trailing visible whitespace, preserving trailing sequences."""
|
||||
segments = list(iter_sequences(text))
|
||||
last_vis = -1
|
||||
for i, (segment, is_seq) in enumerate(segments):
|
||||
if not is_seq and segment.rstrip():
|
||||
last_vis = i
|
||||
if last_vis == -1:
|
||||
return ''
|
||||
result = []
|
||||
for i, (segment, is_seq) in enumerate(segments):
|
||||
if i < last_vis:
|
||||
result.append(segment)
|
||||
elif i == last_vis:
|
||||
result.append(segment.rstrip())
|
||||
elif is_seq:
|
||||
result.append(segment)
|
||||
return ''.join(result)
|
||||
|
||||
|
||||
def wrap(text: str, width: int = 70, *,
|
||||
control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
|
||||
tabsize: int = 8,
|
||||
expand_tabs: bool = True,
|
||||
replace_whitespace: bool = True,
|
||||
ambiguous_width: int = 1,
|
||||
initial_indent: str = '',
|
||||
subsequent_indent: str = '',
|
||||
fix_sentence_endings: bool = False,
|
||||
break_long_words: bool = True,
|
||||
break_on_hyphens: bool = True,
|
||||
drop_whitespace: bool = True,
|
||||
max_lines: int | None = None,
|
||||
placeholder: str = ' [...]',
|
||||
propagate_sgr: bool = True) -> list[str]:
|
||||
r"""
|
||||
Wrap text to fit within given width, returning a list of wrapped lines.
|
||||
|
||||
Like :func:`textwrap.wrap`, but measures width in display cells rather than
|
||||
characters, correctly handling wide characters, combining marks, and terminal
|
||||
escape sequences.
|
||||
|
||||
:param text: Text to wrap, may contain terminal sequences.
|
||||
:param width: Maximum line width in display cells.
|
||||
:param control_codes: How to handle terminal sequences (see :func:`~.width`).
|
||||
:param tabsize: Tab stop width for tab expansion.
|
||||
:param expand_tabs: If True (default), tab characters are expanded
|
||||
to spaces using ``tabsize``.
|
||||
:param replace_whitespace: If True (default), each whitespace character
|
||||
is replaced with a single space after tab expansion. When False,
|
||||
control whitespace like ``\n`` has zero display width (unlike
|
||||
:func:`textwrap.wrap` which counts ``len()``), so wrap points
|
||||
may differ from stdlib for non-space whitespace characters.
|
||||
:param ambiguous_width: Width to use for East Asian Ambiguous (A)
|
||||
characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
|
||||
:param initial_indent: String prepended to first line.
|
||||
:param subsequent_indent: String prepended to subsequent lines.
|
||||
:param fix_sentence_endings: If True, ensure sentences are always
|
||||
separated by exactly two spaces.
|
||||
:param break_long_words: If True, break words longer than width.
|
||||
:param break_on_hyphens: If True, allow breaking at hyphens.
|
||||
:param drop_whitespace: If True (default), whitespace at the beginning
|
||||
and end of each line (after wrapping but before indenting) is dropped.
|
||||
Set to False to preserve whitespace.
|
||||
:param max_lines: If set, output contains at most this many lines, with
|
||||
``placeholder`` appended to the last line if the text was truncated.
|
||||
:param placeholder: String appended to the last line when text is
|
||||
truncated by ``max_lines``. Default is ``' [...]'``.
|
||||
:param propagate_sgr: If True (default), SGR (terminal styling) sequences
|
||||
are propagated across wrapped lines. Each line ends with a reset
|
||||
sequence and the next line begins with the active style restored.
|
||||
:returns: List of wrapped lines without trailing newlines.
|
||||
|
||||
SGR (terminal styling) sequences are propagated across wrapped lines
|
||||
by default. Each line ends with a reset sequence and the next line
|
||||
begins with the active style restored::
|
||||
|
||||
>>> wrap('\x1b[1;34mHello world\x1b[0m', width=6)
|
||||
['\x1b[1;34mHello\x1b[0m', '\x1b[1;34mworld\x1b[0m']
|
||||
|
||||
Set ``propagate_sgr=False`` to disable this behavior.
|
||||
|
||||
Like :func:`textwrap.wrap`, newlines in the input text are treated as
|
||||
whitespace and collapsed. To preserve paragraph breaks, wrap each
|
||||
paragraph separately::
|
||||
|
||||
>>> text = 'First line.\nSecond line.'
|
||||
>>> wrap(text, 40) # newline collapsed to space
|
||||
['First line. Second line.']
|
||||
>>> [line for para in text.split('\n')
|
||||
... for line in (wrap(para, 40) if para else [''])]
|
||||
['First line.', 'Second line.']
|
||||
|
||||
.. seealso::
|
||||
|
||||
:func:`textwrap.wrap`, :class:`textwrap.TextWrapper`
|
||||
Standard library text wrapping (character-based).
|
||||
|
||||
:class:`.SequenceTextWrapper`
|
||||
Class interface for advanced wrapping options.
|
||||
|
||||
.. versionadded:: 0.3.0
|
||||
|
||||
.. versionchanged:: 0.5.0
|
||||
Added ``propagate_sgr`` parameter (default True).
|
||||
|
||||
.. versionchanged:: 0.6.0
|
||||
Added ``expand_tabs``, ``replace_whitespace``, ``fix_sentence_endings``,
|
||||
``drop_whitespace``, ``max_lines``, and ``placeholder`` parameters.
|
||||
|
||||
Example::
|
||||
|
||||
>>> from wcwidth import wrap
|
||||
>>> wrap('hello world', 5)
|
||||
['hello', 'world']
|
||||
>>> wrap('中文字符', 4) # CJK characters (2 cells each)
|
||||
['中文', '字符']
|
||||
"""
|
||||
# pylint: disable=too-many-arguments,too-many-locals
|
||||
wrapper = SequenceTextWrapper(
|
||||
width=width,
|
||||
control_codes=control_codes,
|
||||
tabsize=tabsize,
|
||||
expand_tabs=expand_tabs,
|
||||
replace_whitespace=replace_whitespace,
|
||||
ambiguous_width=ambiguous_width,
|
||||
initial_indent=initial_indent,
|
||||
subsequent_indent=subsequent_indent,
|
||||
fix_sentence_endings=fix_sentence_endings,
|
||||
break_long_words=break_long_words,
|
||||
break_on_hyphens=break_on_hyphens,
|
||||
drop_whitespace=drop_whitespace,
|
||||
max_lines=max_lines,
|
||||
placeholder=placeholder,
|
||||
)
|
||||
lines = wrapper.wrap(text)
|
||||
|
||||
if propagate_sgr:
|
||||
lines = _propagate_sgr(lines)
|
||||
|
||||
return lines
|
||||
21
lib/wcwidth/unicode_versions.py
Normal file
21
lib/wcwidth/unicode_versions.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
Exports function list_versions() for unicode version level support.
|
||||
|
||||
This code generated by wcwidth/bin/update-tables.py on 2026-01-27 00:41:01 UTC.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def list_versions() -> tuple[str, ...]:
|
||||
"""
|
||||
Return Unicode version levels supported by this module release.
|
||||
|
||||
.. versionchanged:: 0.5.0
|
||||
Now returns a single-element tuple containing only the latest version.
|
||||
|
||||
:returns: Supported Unicode version numbers in ascending sorted order.
|
||||
"""
|
||||
return (
|
||||
"17.0.0",
|
||||
)
|
||||
1030
lib/wcwidth/wcwidth.py
Normal file
1030
lib/wcwidth/wcwidth.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user