* handle invalid utf8; fixes #211 (hopefully)
This commit is contained in:
10
lib/mu-str.c
10
lib/mu-str.c
@ -475,6 +475,16 @@ process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
|
|||||||
char *norm, *cur;
|
char *norm, *cur;
|
||||||
|
|
||||||
norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
|
norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
|
||||||
|
if (G_UNLIKELY(!norm)) { /* not valid utf8? */
|
||||||
|
char *u8;
|
||||||
|
u8 = mu_str_utf8ify (str);
|
||||||
|
norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL);
|
||||||
|
g_free (u8);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!norm)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
gstr = g_string_sized_new (strlen (norm));
|
gstr = g_string_sized_new (strlen (norm));
|
||||||
|
|
||||||
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
|
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
|
||||||
|
|||||||
@ -221,7 +221,9 @@ test_mu_str_process_term (void)
|
|||||||
{ "Masha@Аркона.ru", "masha_аркона_ru" },
|
{ "Masha@Аркона.ru", "masha_аркона_ru" },
|
||||||
{ "foo:ελληνικά", "foo_ελληνικα" },
|
{ "foo:ελληνικά", "foo_ελληνικα" },
|
||||||
{ "日本語!!", "日本語__" },
|
{ "日本語!!", "日本語__" },
|
||||||
{ "£", "_" }
|
{ "£", "_" },
|
||||||
|
/* invalid utf8 */
|
||||||
|
{ "Hello\xC3\x2EWorld", "hello__world" }
|
||||||
};
|
};
|
||||||
|
|
||||||
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
|
for (i = 0; i != G_N_ELEMENTS(words); ++i) {
|
||||||
|
|||||||
Reference in New Issue
Block a user