From 7ea04be9ee25d1a768a67737a1c49a4678bd61f5 Mon Sep 17 00:00:00 2001 From: Anders Holck Date: Thu, 30 Apr 2026 21:20:05 +0200 Subject: [PATCH] Add client-side language pre-filter to reduce false translations - Common Swedish/English words checked before API call - Word boundary matching (non-alpha boundaries) - Lower threshold for short messages (1 hit vs 2) - Only filters languages listed in ai_skip_langs --- main.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/main.c b/main.c index c4ac29f..daa42bb 100644 --- a/main.c +++ b/main.c @@ -188,11 +188,49 @@ static void ai_config_load(void) static int needs_translation(const char *text) { if (!ai_cfg.enabled || !translate_enabled) return 0; - /* Skip very short messages (nicks, URLs, single words like "ok") */ + /* Skip very short messages */ int words = 0; for (const char *p = text; *p; p++) if (*p == ' ') words++; if (words < 1 && strlen(text) < 6) return 0; + + /* Pre-filter: if text contains common words from skip languages, skip */ + static const struct { const char *lang; const char *words[23]; } lang_words[] = { + {"swedish", {"jag", "och", "att", "det", "inte", "var", + "som", "för", "med", "har", "den", "kan", + "ska", "till", "eller", "men", "där", + "när", "från", "ett", "en", "ta", + NULL}}, + {"english", {"the", "and", "that", "this", "with", + "have", "was", "are", "you", "not", + "from", "but", "for", "can", NULL}}, + {NULL, {NULL}} + }; + char lower[1024]; + snprintf(lower, sizeof(lower), "%s", text); + for (char *p = lower; *p; p++) + if (*p >= 'A' && *p <= 'Z') *p += 32; + int hits = 0; + for (int i = 0; lang_words[i].lang; i++) { + if (!strstr(ai_cfg.skip_langs, lang_words[i].lang)) continue; + for (int j = 0; lang_words[i].words[j]; j++) { + const char *w = lang_words[i].words[j]; + size_t wlen = strlen(w); + char *p = lower; + while ((p = strstr(p, w)) != NULL) { + /* Check word boundaries (non-alpha on both sides) */ + int before_ok = (p == lower) || + !(*(p-1) >= 'a' && *(p-1) <= 'z'); + int after_ok = !p[wlen] || + !(p[wlen] >= 'a' && p[wlen] <= 'z'); + if (before_ok && after_ok) { hits++; break; } + p++; + } + } + } + int threshold = (words < 4) ? 1 : 2; + if (hits >= threshold) return 0; + return 1; }