Add client-side language pre-filter to reduce false translations

- Common Swedish/English words checked before API call
- Word boundary matching (non-alpha boundaries)
- Lower threshold for short messages (1 hit vs 2)
- Only filters languages listed in ai_skip_langs
This commit is contained in:
2026-04-30 21:20:05 +02:00
parent 9f5b85ea6f
commit 7ea04be9ee
+39 -1
View File
@@ -188,11 +188,49 @@ static void ai_config_load(void)
static int needs_translation(const char *text)
{
if (!ai_cfg.enabled || !translate_enabled) return 0;
/* Skip very short messages (nicks, URLs, single words like "ok") */
/* Skip very short messages */
int words = 0;
for (const char *p = text; *p; p++)
if (*p == ' ') words++;
if (words < 1 && strlen(text) < 6) return 0;
/* Pre-filter: if text contains common words from skip languages, skip */
static const struct { const char *lang; const char *words[23]; } lang_words[] = {
{"swedish", {"jag", "och", "att", "det", "inte", "var",
"som", "för", "med", "har", "den", "kan",
"ska", "till", "eller", "men", "där",
"när", "från", "ett", "en", "ta",
NULL}},
{"english", {"the", "and", "that", "this", "with",
"have", "was", "are", "you", "not",
"from", "but", "for", "can", NULL}},
{NULL, {NULL}}
};
char lower[1024];
snprintf(lower, sizeof(lower), "%s", text);
for (char *p = lower; *p; p++)
if (*p >= 'A' && *p <= 'Z') *p += 32;
int hits = 0;
for (int i = 0; lang_words[i].lang; i++) {
if (!strstr(ai_cfg.skip_langs, lang_words[i].lang)) continue;
for (int j = 0; lang_words[i].words[j]; j++) {
const char *w = lang_words[i].words[j];
size_t wlen = strlen(w);
char *p = lower;
while ((p = strstr(p, w)) != NULL) {
/* Check word boundaries (non-alpha on both sides) */
int before_ok = (p == lower) ||
!(*(p-1) >= 'a' && *(p-1) <= 'z');
int after_ok = !p[wlen] ||
!(p[wlen] >= 'a' && p[wlen] <= 'z');
if (before_ok && after_ok) { hits++; break; }
p++;
}
}
}
int threshold = (words < 4) ? 1 : 2;
if (hits >= threshold) return 0;
return 1;
}