#include "charset.h" #include static int is_utf16_le(const unsigned char *in, size_t len) { return len >= 2 && in[0] == 0xFF && in[1] == 0xFE; } static int is_utf16_be(const unsigned char *in, size_t len) { return len >= 2 && in[0] == 0xFE && in[1] == 0xFF; } static int is_utf8(const unsigned char *in, size_t len) { size_t i; int has_multibyte = 0; for (i = 0; i < len; i++) { if (in[i] < 0x80) continue; has_multibyte = 1; if ((in[i] & 0xE0) == 0xC0) { if (i + 1 >= len || (in[i+1] & 0xC0) != 0x80) return 0; i += 1; } else if ((in[i] & 0xF0) == 0xE0) { if (i + 2 >= len || (in[i+1] & 0xC0) != 0x80 || (in[i+2] & 0xC0) != 0x80) return 0; i += 2; } else if ((in[i] & 0xF8) == 0xF0) { if (i + 3 >= len || (in[i+1] & 0xC0) != 0x80 || (in[i+2] & 0xC0) != 0x80 || (in[i+3] & 0xC0) != 0x80) return 0; i += 3; } else { return 0; } } return has_multibyte; } static int utf8_to_codepoint(const unsigned char *in, size_t remaining, unsigned int *cp) { if (in[0] < 0x80) { *cp = in[0]; return 1; } else if ((in[0] & 0xE0) == 0xC0) { if (remaining < 2) return 0; *cp = ((in[0] & 0x1F) << 6) | (in[1] & 0x3F); return 2; } else if ((in[0] & 0xF0) == 0xE0) { if (remaining < 3) return 0; *cp = ((in[0] & 0x0F) << 12) | ((in[1] & 0x3F) << 6) | (in[2] & 0x3F); return 3; } else if ((in[0] & 0xF8) == 0xF0) { if (remaining < 4) return 0; *cp = ((in[0] & 0x07) << 18) | ((in[1] & 0x3F) << 12) | ((in[2] & 0x3F) << 6) | (in[3] & 0x3F); return 4; } return 0; } static int utf16le_to_iso8859_1(const unsigned char *in, size_t in_len, char *out, size_t out_size) { size_t i, o = 0; for (i = 2; i + 1 < in_len && o + 1 < out_size; i += 2) { unsigned int cp = in[i] | (in[i+1] << 8); out[o++] = cp <= 0xFF ? (char)cp : '?'; } out[o] = '\0'; return (int)o; } static int utf16be_to_iso8859_1(const unsigned char *in, size_t in_len, char *out, size_t out_size) { size_t i, o = 0; for (i = 2; i + 1 < in_len && o + 1 < out_size; i += 2) { unsigned int cp = (in[i] << 8) | in[i+1]; out[o++] = cp <= 0xFF ? (char)cp : '?'; } out[o] = '\0'; return (int)o; } static int utf8_to_iso8859_1(const unsigned char *in, size_t in_len, char *out, size_t out_size) { size_t i = 0, o = 0; while (i < in_len && o + 1 < out_size) { unsigned int cp; int consumed = utf8_to_codepoint(in + i, in_len - i, &cp); if (consumed == 0) { out[o++] = '?'; i++; continue; } out[o++] = cp <= 0xFF ? (char)cp : '?'; i += consumed; } out[o] = '\0'; return (int)o; } int to_iso8859_1(const unsigned char *in, size_t in_len, char *out, size_t out_size) { if (!in || !out || out_size == 0) return 0; if (in_len == 0) { out[0] = '\0'; return 0; } if (is_utf16_le(in, in_len)) return utf16le_to_iso8859_1(in, in_len, out, out_size); if (is_utf16_be(in, in_len)) return utf16be_to_iso8859_1(in, in_len, out, out_size); if (is_utf8(in, in_len)) return utf8_to_iso8859_1(in, in_len, out, out_size); /* Already ISO-8859-1 / ASCII — passthrough */ size_t copy = in_len < out_size - 1 ? in_len : out_size - 1; memcpy(out, in, copy); out[copy] = '\0'; return (int)copy; }