diff --git a/main.c b/main.c index c4ac29f..daa42bb 100644 --- a/main.c +++ b/main.c @@ -188,11 +188,49 @@ static void ai_config_load(void) static int needs_translation(const char *text) { if (!ai_cfg.enabled || !translate_enabled) return 0; - /* Skip very short messages (nicks, URLs, single words like "ok") */ + /* Skip very short messages */ int words = 0; for (const char *p = text; *p; p++) if (*p == ' ') words++; if (words < 1 && strlen(text) < 6) return 0; + + /* Pre-filter: if text contains common words from skip languages, skip */ + static const struct { const char *lang; const char *words[23]; } lang_words[] = { + {"swedish", {"jag", "och", "att", "det", "inte", "var", + "som", "för", "med", "har", "den", "kan", + "ska", "till", "eller", "men", "där", + "när", "från", "ett", "en", "ta", + NULL}}, + {"english", {"the", "and", "that", "this", "with", + "have", "was", "are", "you", "not", + "from", "but", "for", "can", NULL}}, + {NULL, {NULL}} + }; + char lower[1024]; + snprintf(lower, sizeof(lower), "%s", text); + for (char *p = lower; *p; p++) + if (*p >= 'A' && *p <= 'Z') *p += 32; + int hits = 0; + for (int i = 0; lang_words[i].lang; i++) { + if (!strstr(ai_cfg.skip_langs, lang_words[i].lang)) continue; + for (int j = 0; lang_words[i].words[j]; j++) { + const char *w = lang_words[i].words[j]; + size_t wlen = strlen(w); + char *p = lower; + while ((p = strstr(p, w)) != NULL) { + /* Check word boundaries (non-alpha on both sides) */ + int before_ok = (p == lower) || + !(*(p-1) >= 'a' && *(p-1) <= 'z'); + int after_ok = !p[wlen] || + !(p[wlen] >= 'a' && p[wlen] <= 'z'); + if (before_ok && after_ok) { hits++; break; } + p++; + } + } + } + int threshold = (words < 4) ? 1 : 2; + if (hits >= threshold) return 0; + return 1; }