Add client-side language pre-filter to reduce false translations
- Common Swedish/English words checked before API call - Word boundary matching (non-alpha boundaries) - Lower threshold for short messages (1 hit vs 2) - Only filters languages listed in ai_skip_langs
This commit is contained in:
@@ -188,11 +188,49 @@ static void ai_config_load(void)
|
||||
static int needs_translation(const char *text)
|
||||
{
|
||||
if (!ai_cfg.enabled || !translate_enabled) return 0;
|
||||
/* Skip very short messages (nicks, URLs, single words like "ok") */
|
||||
/* Skip very short messages */
|
||||
int words = 0;
|
||||
for (const char *p = text; *p; p++)
|
||||
if (*p == ' ') words++;
|
||||
if (words < 1 && strlen(text) < 6) return 0;
|
||||
|
||||
/* Pre-filter: if text contains common words from skip languages, skip */
|
||||
static const struct { const char *lang; const char *words[23]; } lang_words[] = {
|
||||
{"swedish", {"jag", "och", "att", "det", "inte", "var",
|
||||
"som", "för", "med", "har", "den", "kan",
|
||||
"ska", "till", "eller", "men", "där",
|
||||
"när", "från", "ett", "en", "ta",
|
||||
NULL}},
|
||||
{"english", {"the", "and", "that", "this", "with",
|
||||
"have", "was", "are", "you", "not",
|
||||
"from", "but", "for", "can", NULL}},
|
||||
{NULL, {NULL}}
|
||||
};
|
||||
char lower[1024];
|
||||
snprintf(lower, sizeof(lower), "%s", text);
|
||||
for (char *p = lower; *p; p++)
|
||||
if (*p >= 'A' && *p <= 'Z') *p += 32;
|
||||
int hits = 0;
|
||||
for (int i = 0; lang_words[i].lang; i++) {
|
||||
if (!strstr(ai_cfg.skip_langs, lang_words[i].lang)) continue;
|
||||
for (int j = 0; lang_words[i].words[j]; j++) {
|
||||
const char *w = lang_words[i].words[j];
|
||||
size_t wlen = strlen(w);
|
||||
char *p = lower;
|
||||
while ((p = strstr(p, w)) != NULL) {
|
||||
/* Check word boundaries (non-alpha on both sides) */
|
||||
int before_ok = (p == lower) ||
|
||||
!(*(p-1) >= 'a' && *(p-1) <= 'z');
|
||||
int after_ok = !p[wlen] ||
|
||||
!(p[wlen] >= 'a' && p[wlen] <= 'z');
|
||||
if (before_ok && after_ok) { hits++; break; }
|
||||
p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
int threshold = (words < 4) ? 1 : 2;
|
||||
if (hits >= threshold) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user