diff --git a/lib/mu-str.c b/lib/mu-str.c index 22675332..85e040b0 100644 --- a/lib/mu-str.c +++ b/lib/mu-str.c @@ -475,6 +475,16 @@ process_str (const char *str, gboolean xapian_esc, gboolean query_esc) char *norm, *cur; norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL); + if (G_UNLIKELY(!norm)) { /* not valid utf8? */ + char *u8; + u8 = mu_str_utf8ify (str); + norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL); + g_free (u8); + } + + if (!norm) + return NULL; + gstr = g_string_sized_new (strlen (norm)); for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) { diff --git a/lib/tests/test-mu-str.c b/lib/tests/test-mu-str.c index f2fbfd8e..8cc84e4f 100644 --- a/lib/tests/test-mu-str.c +++ b/lib/tests/test-mu-str.c @@ -221,7 +221,9 @@ test_mu_str_process_term (void) { "Masha@Аркона.ru", "masha_аркона_ru" }, { "foo:ελληνικά", "foo_ελληνικα" }, { "日本語!!", "日本語__" }, - { "£", "_" } + { "£", "_" }, + /* invalid utf8 */ + { "Hello\xC3\x2EWorld", "hello__world" } }; for (i = 0; i != G_N_ELEMENTS(words); ++i) {