diff --git a/main.c b/main.c index 722cc27..b60ed82 100644 --- a/main.c +++ b/main.c @@ -1578,18 +1578,47 @@ static void handle_input(char *line) } } -/* Count display columns for a UTF-8 byte string */ +/* Count display columns for a UTF-8 byte string (skips ANSI escapes) */ static size_t display_cols(const char *buf, size_t bytes) { size_t cols = 0; size_t i = 0; while (i < bytes) { unsigned char c = (unsigned char)buf[i]; + /* Skip ANSI escape sequences */ + if (c == 0x1B && i + 1 < bytes && buf[i+1] == '[') { + i += 2; + while (i < bytes && !((unsigned char)buf[i] >= 0x40 && + (unsigned char)buf[i] <= 0x7E)) + i++; + if (i < bytes) i++; /* skip final byte */ + continue; + } if (c < 0x80) { i++; cols++; } - else if ((c & 0xE0) == 0xC0) { i += 2; cols++; } - else if ((c & 0xF0) == 0xE0) { i += 3; cols++; } - else if ((c & 0xF8) == 0xF0) { i += 4; cols++; } - else { i++; cols++; } + else if ((c & 0xE0) == 0xC0) { + /* 2-byte: U+0080..U+07FF — all single width */ + i += 2; cols++; + } else if ((c & 0xF0) == 0xE0) { + /* 3-byte: U+0800..U+FFFF — CJK ranges are double width */ + unsigned int cp = ((c & 0x0F) << 12); + if (i + 1 < bytes) cp |= ((unsigned char)buf[i+1] & 0x3F) << 6; + if (i + 2 < bytes) cp |= ((unsigned char)buf[i+2] & 0x3F); + i += 3; + /* Wide: CJK Unified, Katakana, Hiragana, Hangul, fullwidth, etc. */ + if ((cp >= 0x1100 && cp <= 0x115F) || + (cp >= 0x2E80 && cp <= 0xA4CF && cp != 0x303F) || + (cp >= 0xAC00 && cp <= 0xD7A3) || + (cp >= 0xF900 && cp <= 0xFAFF) || + (cp >= 0xFE10 && cp <= 0xFE6F) || + (cp >= 0xFF01 && cp <= 0xFF60) || + (cp >= 0xFFE0 && cp <= 0xFFE6)) + cols += 2; + else + cols++; + } else if ((c & 0xF8) == 0xF0) { + /* 4-byte: U+10000..U+10FFFF — most are double width (emoji etc) */ + i += 4; cols += 2; + } else { i++; cols++; } } return cols; }