From 306a6902cba839371f6602cf9ee16532b570d30c Mon Sep 17 00:00:00 2001 From: user Date: Wed, 10 Dec 2025 12:57:27 -0500 Subject: [PATCH 1/9] Fix traceback color output with unicode characters Closes #130273 --- Lib/test/test_traceback.py | 27 +++++++++++++++ Lib/traceback.py | 33 +++++++++++++------ ...-12-10-12-56-47.gh-issue-130273.iCfiY5.rst | 1 + 3 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 96510eeec54640..03b63ab2b81894 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -5273,5 +5273,32 @@ def expected(t, m, fn, l, f, E, e, z): ] self.assertEqual(actual, expected(**colors)) + def test_colorized_traceback_unicode(self): + try: + 啊哈=1; 啊哈/0#### + except Exception as e: + exc = traceback.TracebackException.from_exception(e) + + actual = "".join(exc.format(colorize=True)).splitlines() + def expected(t, m, fn, l, f, E, e, z): + return [ + f" 啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####", + f" {e}~~~~{z}{E}^{z}{e}~{z}", + ] + self.assertEqual(actual[2:4], expected(**colors)) + + try: + ééééé/0 + except Exception as e: + exc = traceback.TracebackException.from_exception(e) + + actual = "".join(exc.format(colorize=True)).splitlines() + def expected(t, m, fn, l, f, E, e, z): + return [ + f" {E}ééééé{z}/0", + f" {E}^^^^^{z}", + ] + self.assertEqual(actual[2:4], expected(**colors)) + if __name__ == "__main__": unittest.main() diff --git a/Lib/traceback.py b/Lib/traceback.py index f95d6bdbd016ac..7239513f436109 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -680,12 +680,12 @@ def output_line(lineno): colorized_line_parts = [] colorized_carets_parts = [] - for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]): + for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]): caret_group = list(group) - if color == "^": + if "^" in color: colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset) colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset) - elif color == "~": + elif "~" in color: colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset) colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset) else: @@ -967,7 +967,24 @@ def setup_positions(expr, force_valid=True): return None -_WIDE_CHAR_SPECIFIERS = "WF" + +def _lookahead(iterator, default): + forked = itertools.tee(iterator, 1)[0] + return next(forked, default) + + +def _zip_display_width(line, carets): + line = itertools.tee(line, 1)[0] + carets = iter(carets) + for char in line: + char_width = _display_width(char) + next_char = _lookahead(line, "") + if next_char and char_width == _display_width(char + next_char): + next(line) + yield char + next_char, "".join(itertools.islice(carets, char_width)) + else: + yield char, "".join(itertools.islice(carets, char_width)) + def _display_width(line, offset=None): """Calculate the extra amount of width space the given source @@ -981,13 +998,9 @@ def _display_width(line, offset=None): if line.isascii(): return offset - import unicodedata - - return sum( - 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1 - for char in line[:offset] - ) + from _pyrepl.utils import wlen + return wlen(line[:offset]) class _ExceptionPrintContext: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst new file mode 100644 index 00000000000000..981c84a9372d04 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst @@ -0,0 +1 @@ +Fix traceback color output with unicode characters From 8edad110695a22acb6ad9c6266826c4b05a28d1e Mon Sep 17 00:00:00 2001 From: user Date: Wed, 10 Dec 2025 15:15:30 -0500 Subject: [PATCH 2/9] mv news blurb --- .../2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst => Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst} (100%) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst similarity index 100% rename from Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst rename to Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst From 794703397a2490f0cb587df8ce1118ea1fcc0404 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 28 Jan 2026 18:04:33 -0500 Subject: [PATCH 3/9] use unicodedata.iter_graphemes --- Lib/traceback.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/Lib/traceback.py b/Lib/traceback.py index 7239513f436109..b1fd024884c907 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -968,22 +968,13 @@ def setup_positions(expr, force_valid=True): return None -def _lookahead(iterator, default): - forked = itertools.tee(iterator, 1)[0] - return next(forked, default) - - def _zip_display_width(line, carets): - line = itertools.tee(line, 1)[0] + import unicodedata carets = iter(carets) - for char in line: + for char in unicodedata.iter_graphemes(line): + char = str(char) char_width = _display_width(char) - next_char = _lookahead(line, "") - if next_char and char_width == _display_width(char + next_char): - next(line) - yield char + next_char, "".join(itertools.islice(carets, char_width)) - else: - yield char, "".join(itertools.islice(carets, char_width)) + yield char, "".join(itertools.islice(carets, char_width)) def _display_width(line, offset=None): From 467656ee4ad09d73776cb60e635038c9dbf10dfa Mon Sep 17 00:00:00 2001 From: user Date: Thu, 19 Feb 2026 10:45:04 -0500 Subject: [PATCH 4/9] mv wlen/str_width to traceback --- Lib/_pyrepl/utils.py | 29 +------------------ Lib/test/test_pyrepl/support.py | 3 +- Lib/test/test_pyrepl/test_utils.py | 45 +----------------------------- Lib/test/test_traceback.py | 45 ++++++++++++++++++++++++++++++ Lib/traceback.py | 37 ++++++++++++++++++++++-- 5 files changed, 83 insertions(+), 76 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 25d7ac1bd0b14e..d79c451949f946 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -1,6 +1,5 @@ from __future__ import annotations import builtins -import functools import keyword import re import token as T @@ -11,12 +10,12 @@ from collections import deque from io import StringIO from tokenize import TokenInfo as TI +from traceback import _str_width as str_width, _wlen as wlen from typing import Iterable, Iterator, Match, NamedTuple, Self from .types import CharBuffer, CharWidths from .trace import trace -ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) IDENTIFIERS_AFTER = {"def", "class"} @@ -59,32 +58,6 @@ class ColorSpan(NamedTuple): tag: str -@functools.cache -def str_width(c: str) -> int: - if ord(c) < 128: - return 1 - # gh-139246 for zero-width joiner and combining characters - if unicodedata.combining(c): - return 0 - category = unicodedata.category(c) - if category == "Cf" and c != "\u00ad": - return 0 - w = unicodedata.east_asian_width(c) - if w in ("N", "Na", "H", "A"): - return 1 - return 2 - - -def wlen(s: str) -> int: - if len(s) == 1 and s != "\x1a": - return str_width(s) - length = sum(str_width(i) for i in s) - # remove lengths of any escape sequences - sequence = ANSI_ESCAPE_SEQUENCE.findall(s) - ctrl_z_cnt = s.count("\x1a") - return length - sum(len(i) for i in sequence) + ctrl_z_cnt - - def unbracket(s: str, including_content: bool = False) -> str: r"""Return `s` with \001 and \002 characters removed. diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py index 4f7f9d77933336..be9f207ef24c28 100644 --- a/Lib/test/test_pyrepl/support.py +++ b/Lib/test/test_pyrepl/support.py @@ -1,12 +1,13 @@ from code import InteractiveConsole from functools import partial +from traceback import ANSI_ESCAPE_SEQUENCE from typing import Iterable from unittest.mock import MagicMock from _pyrepl.console import Console, Event from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig from _pyrepl.simple_interact import _strip_final_indent -from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE +from _pyrepl.utils import unbracket class ScreenEqualMixin: diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 3c55b6bdaeee9e..eddc81489b4eec 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -1,52 +1,9 @@ from unittest import TestCase -from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors +from _pyrepl.utils import prev_next_window, gen_colors class TestUtils(TestCase): - def test_str_width(self): - characters = [ - 'a', - '1', - '_', - '!', - '\x1a', - '\u263A', - '\uffb9', - '\N{LATIN SMALL LETTER E WITH ACUTE}', # é - '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ - '\u00ad', - ] - for c in characters: - self.assertEqual(str_width(c), 1) - - zero_width_characters = [ - '\N{COMBINING ACUTE ACCENT}', - '\N{ZERO WIDTH JOINER}', - ] - for c in zero_width_characters: - with self.subTest(character=c): - self.assertEqual(str_width(c), 0) - - characters = [chr(99989), chr(99999)] - for c in characters: - self.assertEqual(str_width(c), 2) - - def test_wlen(self): - for c in ['a', 'b', '1', '!', '_']: - self.assertEqual(wlen(c), 1) - self.assertEqual(wlen('\x1a'), 2) - - char_east_asian_width_N = chr(3800) - self.assertEqual(wlen(char_east_asian_width_N), 1) - char_east_asian_width_W = chr(4352) - self.assertEqual(wlen(char_east_asian_width_W), 2) - - self.assertEqual(wlen('hello'), 5) - self.assertEqual(wlen('hello' + '\x1a'), 7) - self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1) - self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2) - def test_prev_next_window(self): def gen_normal(): yield 1 diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index e3172069716834..0c55b200872fd7 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -28,6 +28,7 @@ import traceback from functools import partial from pathlib import Path +from traceback import _str_width, _wlen import _colorize MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else '' @@ -1787,6 +1788,50 @@ def f(): ] self.assertEqual(result_lines, expected) + def test_str_width(self): + characters = [ + 'a', + '1', + '_', + '!', + '\x1a', + '\u263A', + '\uffb9', + '\N{LATIN SMALL LETTER E WITH ACUTE}', # é + '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ + '\u00ad', + ] + for c in characters: + self.assertEqual(_str_width(c), 1) + + zero_width_characters = [ + '\N{COMBINING ACUTE ACCENT}', + '\N{ZERO WIDTH JOINER}', + ] + for c in zero_width_characters: + with self.subTest(character=c): + self.assertEqual(_str_width(c), 0) + + characters = [chr(99989), chr(99999)] + for c in characters: + self.assertEqual(_str_width(c), 2) + + def test_wlen(self): + for c in ['a', 'b', '1', '!', '_']: + self.assertEqual(_wlen(c), 1) + self.assertEqual(_wlen('\x1a'), 2) + + char_east_asian_width_N = chr(3800) + self.assertEqual(_wlen(char_east_asian_width_N), 1) + char_east_asian_width_W = chr(4352) + self.assertEqual(_wlen(char_east_asian_width_W), 2) + + self.assertEqual(_wlen('hello'), 5) + self.assertEqual(_wlen('hello' + '\x1a'), 7) + self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1) + self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2) + + class TestKeywordTypoSuggestions(unittest.TestCase): TYPO_CASES = [ ("with block ad something:\n pass", "and"), diff --git a/Lib/traceback.py b/Lib/traceback.py index 2f3acec02dc951..05cb77b6cac900 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -1,8 +1,10 @@ """Extract, format and print information about Python stack traces.""" import collections.abc +import functools import itertools import linecache +import re import sys import textwrap import types @@ -978,6 +980,37 @@ def _zip_display_width(line, carets): yield char, "".join(itertools.islice(carets, char_width)) +@functools.cache +def _str_width(c: str) -> int: + import unicodedata + if ord(c) < 128: + return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 + w = unicodedata.east_asian_width(c) + if w in ("N", "Na", "H", "A"): + return 1 + return 2 + + +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") + + +def _wlen(s: str) -> int: + if len(s) == 1 and s != "\x1a": + return _str_width(s) + length = sum(_str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count("\x1a") + return length - sum(len(i) for i in sequence) + ctrl_z_cnt + + + def _display_width(line, offset=None): """Calculate the extra amount of width space the given source code segment might take if it were to be displayed on a fixed @@ -990,9 +1023,7 @@ def _display_width(line, offset=None): if line.isascii(): return offset - from _pyrepl.utils import wlen - - return wlen(line[:offset]) + return _wlen(line[:offset]) class _ExceptionPrintContext: From 37a4fd971333b8ff6425b2d65f01294936a91ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 7 Apr 2026 13:32:57 +0200 Subject: [PATCH 5/9] Restore _pyrepl.utils.str_width and .wlen; respond to code review --- Lib/_pyrepl/utils.py | 30 ++++++++++++- Lib/test/test_pyrepl/support.py | 3 +- Lib/test/test_pyrepl/test_utils.py | 45 ++++++++++++++++++- Lib/test/test_traceback.py | 44 ------------------ Lib/traceback.py | 12 +++-- ...-12-10-15-15-09.gh-issue-130273.iCfiY5.rst | 2 +- 6 files changed, 83 insertions(+), 53 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 97b5a9fbc40fb1..b1760ce41b71cb 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations import builtins +import functools import keyword import re import token as T @@ -10,12 +11,13 @@ from collections import deque from io import StringIO from tokenize import TokenInfo as TI -from traceback import _str_width as str_width, _wlen as wlen from typing import Iterable, Iterator, Match, NamedTuple, Self from .types import CharBuffer, CharWidths from .trace import trace + +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) IDENTIFIERS_AFTER = frozenset({"def", "class"}) @@ -23,6 +25,32 @@ BUILTINS = frozenset({str(name) for name in dir(builtins) if not name.startswith('_')}) +@functools.cache +def str_width(c: str) -> int: + if ord(c) < 128: + return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 + w = unicodedata.east_asian_width(c) + if w in ("N", "Na", "H", "A"): + return 1 + return 2 + + +def wlen(s: str) -> int: + if len(s) == 1 and s != "\x1a": + return str_width(s) + length = sum(str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count("\x1a") + return length - sum(len(i) for i in sequence) + ctrl_z_cnt + + def THEME(**kwargs): # Not cached: the user can modify the theme inside the interactive session. return _colorize.get_theme(**kwargs).syntax diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py index be9f207ef24c28..4f7f9d77933336 100644 --- a/Lib/test/test_pyrepl/support.py +++ b/Lib/test/test_pyrepl/support.py @@ -1,13 +1,12 @@ from code import InteractiveConsole from functools import partial -from traceback import ANSI_ESCAPE_SEQUENCE from typing import Iterable from unittest.mock import MagicMock from _pyrepl.console import Console, Event from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig from _pyrepl.simple_interact import _strip_final_indent -from _pyrepl.utils import unbracket +from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE class ScreenEqualMixin: diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index eddc81489b4eec..3c55b6bdaeee9e 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -1,9 +1,52 @@ from unittest import TestCase -from _pyrepl.utils import prev_next_window, gen_colors +from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors class TestUtils(TestCase): + def test_str_width(self): + characters = [ + 'a', + '1', + '_', + '!', + '\x1a', + '\u263A', + '\uffb9', + '\N{LATIN SMALL LETTER E WITH ACUTE}', # é + '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ + '\u00ad', + ] + for c in characters: + self.assertEqual(str_width(c), 1) + + zero_width_characters = [ + '\N{COMBINING ACUTE ACCENT}', + '\N{ZERO WIDTH JOINER}', + ] + for c in zero_width_characters: + with self.subTest(character=c): + self.assertEqual(str_width(c), 0) + + characters = [chr(99989), chr(99999)] + for c in characters: + self.assertEqual(str_width(c), 2) + + def test_wlen(self): + for c in ['a', 'b', '1', '!', '_']: + self.assertEqual(wlen(c), 1) + self.assertEqual(wlen('\x1a'), 2) + + char_east_asian_width_N = chr(3800) + self.assertEqual(wlen(char_east_asian_width_N), 1) + char_east_asian_width_W = chr(4352) + self.assertEqual(wlen(char_east_asian_width_W), 2) + + self.assertEqual(wlen('hello'), 5) + self.assertEqual(wlen('hello' + '\x1a'), 7) + self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1) + self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2) + def test_prev_next_window(self): def gen_normal(): yield 1 diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index af93c2207beeca..0dbd5c0b888369 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -31,7 +31,6 @@ import traceback from functools import partial from pathlib import Path -from traceback import _str_width, _wlen import _colorize MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else '' @@ -1791,49 +1790,6 @@ def f(): ] self.assertEqual(result_lines, expected) - def test_str_width(self): - characters = [ - 'a', - '1', - '_', - '!', - '\x1a', - '\u263A', - '\uffb9', - '\N{LATIN SMALL LETTER E WITH ACUTE}', # é - '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ - '\u00ad', - ] - for c in characters: - self.assertEqual(_str_width(c), 1) - - zero_width_characters = [ - '\N{COMBINING ACUTE ACCENT}', - '\N{ZERO WIDTH JOINER}', - ] - for c in zero_width_characters: - with self.subTest(character=c): - self.assertEqual(_str_width(c), 0) - - characters = [chr(99989), chr(99999)] - for c in characters: - self.assertEqual(_str_width(c), 2) - - def test_wlen(self): - for c in ['a', 'b', '1', '!', '_']: - self.assertEqual(_wlen(c), 1) - self.assertEqual(_wlen('\x1a'), 2) - - char_east_asian_width_N = chr(3800) - self.assertEqual(_wlen(char_east_asian_width_N), 1) - char_east_asian_width_W = chr(4352) - self.assertEqual(_wlen(char_east_asian_width_W), 2) - - self.assertEqual(_wlen('hello'), 5) - self.assertEqual(_wlen('hello' + '\x1a'), 7) - self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1) - self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2) - class TestKeywordTypoSuggestions(unittest.TestCase): TYPO_CASES = [ diff --git a/Lib/traceback.py b/Lib/traceback.py index ae521ed327d100..7933031199e724 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -985,9 +985,11 @@ def _zip_display_width(line, carets): @functools.cache def _str_width(c: str) -> int: - import unicodedata + # copied from _pyrepl.utils to fix gh-130273 + if ord(c) < 128: return 1 + import unicodedata # gh-139246 for zero-width joiner and combining characters if unicodedata.combining(c): return 0 @@ -1000,20 +1002,21 @@ def _str_width(c: str) -> int: return 2 -ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") +_ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") def _wlen(s: str) -> int: + # copied from _pyrepl.utils to fix gh-130273 + if len(s) == 1 and s != "\x1a": return _str_width(s) length = sum(_str_width(i) for i in s) # remove lengths of any escape sequences - sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + sequence = _ANSI_ESCAPE_SEQUENCE.findall(s) ctrl_z_cnt = s.count("\x1a") return length - sum(len(i) for i in sequence) + ctrl_z_cnt - def _display_width(line, offset=None): """Calculate the extra amount of width space the given source code segment might take if it were to be displayed on a fixed @@ -1028,6 +1031,7 @@ def _display_width(line, offset=None): return _wlen(line[:offset]) + def _format_note(note, indent, theme): for l in note.split("\n"): yield f"{indent}{theme.note}{l}{theme.reset}\n" diff --git a/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst index 981c84a9372d04..2e0695334fd71e 100644 --- a/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst +++ b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst @@ -1 +1 @@ -Fix traceback color output with unicode characters +Fix traceback color output with Unicode characters. From 1318217f3dd99b1e7ca82dffeea11e99daded8bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 7 Apr 2026 13:47:01 +0200 Subject: [PATCH 6/9] Remove ASCII fast track that was too eager --- Lib/test/test_traceback.py | 60 ++++++++++++++++++++++++++++++++++++++ Lib/traceback.py | 6 +--- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 0dbd5c0b888369..909808825f055e 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -5442,6 +5442,66 @@ def expected(t, m, fn, l, f, E, e, z, n): ] self.assertEqual(actual[2:4], expected(**colors)) + def test_colorized_syntax_error_ascii_display_width(self): + """Caret alignment for ASCII edge cases handled by _wlen. + + The old ASCII fast track in _display_width returned the raw character + offset for ASCII strings, which is wrong for CTRL-Z (display width 2) + and ANSI escape sequences (display width 0). + """ + E = colors["E"] + z = colors["z"] + t = colors["t"] + m = colors["m"] + fn = colors["fn"] + l = colors["l"] + + def _make_syntax_error(text, offset, end_offset): + err = SyntaxError("invalid syntax") + err.filename = "" + err.lineno = 1 + err.end_lineno = 1 + err.text = text + err.offset = offset + err.end_offset = end_offset + return err + + # CTRL-Z (\x1a) is ASCII but displayed as ^Z (2 columns). + # Verify caret aligns when CTRL-Z precedes the error. + err = _make_syntax_error("a\x1a$\n", offset=3, end_offset=4) + exc = traceback.TracebackException.from_exception(err) + actual = "".join(exc.format(colorize=True)) + # 'a' (1 col) + '\x1a' (2 cols) = 3 cols before '$' + self.assertIn( + f' File {fn}""{z}, line {l}1{z}\n' + f' a\x1a{E}${z}\n' + f' {" " * 3}{E}^{z}\n' + f'{t}SyntaxError{z}: {m}invalid syntax{z}\n', + actual, + ) + + # CTRL-Z in the highlighted (error) region counts as 2 columns. + err = _make_syntax_error("$\x1a\n", offset=1, end_offset=3) + exc = traceback.TracebackException.from_exception(err) + actual = "".join(exc.format(colorize=True)) + # '$' (1 col) + '\x1a' (2 cols) = 3 columns of carets + self.assertIn( + f' {E}$\x1a{z}\n' + f' {E}{"^" * 3}{z}\n', + actual, + ) + + # ANSI escape sequences are ASCII but take 0 display columns. + err = _make_syntax_error("a\x1b[1mb$\n", offset=7, end_offset=8) + exc = traceback.TracebackException.from_exception(err) + actual = "".join(exc.format(colorize=True)) + # 'a' (1 col) + '\x1b[1m' (0 cols) + 'b' (1 col) = 2 before '$' + self.assertIn( + f' a\x1b[1mb{E}${z}\n' + f' {" " * 2}{E}^{z}\n', + actual, + ) + class TestLazyImportSuggestions(unittest.TestCase): """Test that lazy imports are not reified when computing AttributeError suggestions.""" diff --git a/Lib/traceback.py b/Lib/traceback.py index 7933031199e724..6c2b14fa494c0e 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -1023,11 +1023,7 @@ def _display_width(line, offset=None): width output device. Supports wide unicode characters and emojis.""" if offset is None: - offset = len(line) - - # Fast track for ASCII-only strings - if line.isascii(): - return offset + return _wlen(line) return _wlen(line[:offset]) From 7b3fa8741fa5be9346e1d94bb7165335fa7f6409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 7 Apr 2026 13:59:21 +0200 Subject: [PATCH 7/9] Add `_zip_display_width` fast track suggested by Victor --- Lib/traceback.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/traceback.py b/Lib/traceback.py index 6c2b14fa494c0e..343d0e5f108c35 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -975,8 +975,13 @@ def setup_positions(expr, force_valid=True): def _zip_display_width(line, carets): - import unicodedata carets = iter(carets) + if line.isascii() and '\x1a' not in line: + for char in line: + yield char, next(carets, "") + return + + import unicodedata for char in unicodedata.iter_graphemes(line): char = str(char) char_width = _display_width(char) From 4ddf024b64901bdf543d7149b96b819e87c7c2ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 7 Apr 2026 14:03:01 +0200 Subject: [PATCH 8/9] Move str_width and wlen to the place in the file where they were before --- Lib/_pyrepl/utils.py | 54 +++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index b1760ce41b71cb..b558b11f5c44fe 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -25,32 +25,6 @@ BUILTINS = frozenset({str(name) for name in dir(builtins) if not name.startswith('_')}) -@functools.cache -def str_width(c: str) -> int: - if ord(c) < 128: - return 1 - # gh-139246 for zero-width joiner and combining characters - if unicodedata.combining(c): - return 0 - category = unicodedata.category(c) - if category == "Cf" and c != "\u00ad": - return 0 - w = unicodedata.east_asian_width(c) - if w in ("N", "Na", "H", "A"): - return 1 - return 2 - - -def wlen(s: str) -> int: - if len(s) == 1 and s != "\x1a": - return str_width(s) - length = sum(str_width(i) for i in s) - # remove lengths of any escape sequences - sequence = ANSI_ESCAPE_SEQUENCE.findall(s) - ctrl_z_cnt = s.count("\x1a") - return length - sum(len(i) for i in sequence) + ctrl_z_cnt - - def THEME(**kwargs): # Not cached: the user can modify the theme inside the interactive session. return _colorize.get_theme(**kwargs).syntax @@ -86,6 +60,34 @@ class ColorSpan(NamedTuple): tag: str +@functools.cache +def str_width(c: str) -> int: + if ord(c) < 128: + return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 + w = unicodedata.east_asian_width(c) + if w in ("N", "Na", "H", "A"): + return 1 + return 2 + + +def wlen(s: str) -> int: + if len(s) == 1 and s != "\x1a": + return str_width(s) + length = sum(str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count("\x1a") + return length - sum(len(i) for i in sequence) + ctrl_z_cnt + + + + def unbracket(s: str, including_content: bool = False) -> str: r"""Return `s` with \001 and \002 characters removed. From f094135bbe8d995b712ef10e4b407457b330e5db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 7 Apr 2026 14:03:44 +0200 Subject: [PATCH 9/9] derp --- Lib/_pyrepl/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index b558b11f5c44fe..d399b4cf53c82a 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -86,8 +86,6 @@ def wlen(s: str) -> int: return length - sum(len(i) for i in sequence) + ctrl_z_cnt - - def unbracket(s: str, including_content: bool = False) -> str: r"""Return `s` with \001 and \002 characters removed.