diff --git a/lib/mu-query.cc b/lib/mu-query.cc index 2d68be8b..b376628f 100644 --- a/lib/mu-query.cc +++ b/lib/mu-query.cc @@ -294,12 +294,13 @@ mu_query_preprocess (const char *query, GError **err) return NULL; for (cur = parts; cur; cur = g_slist_next(cur)) { + char *data; + data = (gchar*)cur->data; /* remove accents and turn to lower-case */ - cur->data = mu_str_normalize_in_place ((gchar*)cur->data, TRUE); /* escape '@', single '_' and ':' if it's not following a * xapian-pfx with '_' */ - cur->data = mu_str_xapian_escape_in_place - ((gchar*)cur->data, TRUE /*escape spaces too*/); + cur->data = mu_str_xapian_escape (data, TRUE, NULL); + g_free (data); } myquery = mu_str_from_list (parts, ' '); diff --git a/lib/mu-store-priv.hh b/lib/mu-store-priv.hh index 0cb55cbe..cb7968a9 100644 --- a/lib/mu-store-priv.hh +++ b/lib/mu-store-priv.hh @@ -1,6 +1,6 @@ /* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */ /* -** Copyright (C) 2011 +** Copyright (C) 2011-2012 ** ** This program is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by the diff --git a/lib/mu-store-write.cc b/lib/mu-store-write.cc index f2dce569..b8019216 100644 --- a/lib/mu-store-write.cc +++ b/lib/mu-store-write.cc @@ -306,7 +306,7 @@ add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) /* for string and string-list */ static void add_terms_values_str (Xapian::Document& doc, char *val, - MuMsgFieldId mfid) + MuMsgFieldId mfid, GStringChunk *strchunk) { /* the value is what we display in search results; the * unchanged original */ @@ -315,7 +315,7 @@ add_terms_values_str (Xapian::Document& doc, char *val, /* now, let's create some search terms... */ if (mu_msg_field_normalize (mfid)) - mu_str_normalize_in_place (val, TRUE); + val = mu_str_normalize_in_place_try (val, TRUE, strchunk); if (mu_msg_field_xapian_index (mfid)) { Xapian::TermGenerator termgen; @@ -323,8 +323,8 @@ add_terms_values_str (Xapian::Document& doc, char *val, termgen.index_text_without_positions (val, 1, prefix(mfid)); } if (mu_msg_field_xapian_escape (mfid)) - mu_str_xapian_escape_in_place (val, - TRUE /*esc_space*/); + val= mu_str_xapian_escape_in_place_try (val, TRUE /*esc_space*/, + strchunk); if (mu_msg_field_xapian_term(mfid)) doc.add_term (prefix(mfid) + std::string(val, 0, _MuStore::MAX_TERM_LENGTH)); @@ -333,31 +333,23 @@ add_terms_values_str (Xapian::Document& doc, char *val, static void add_terms_values_string (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) + MuMsgFieldId mfid, GStringChunk *strchunk) { const char *orig; char *val; - size_t len; if (!(orig = mu_msg_get_field_string (msg, mfid))) return; /* nothing to do */ - /* try stack-allocation, it's much faster*/ - len = strlen (orig); - val = (char*)(G_LIKELY(len < 1024)?g_alloca(len+1):g_malloc(len+1)); - strcpy (val, orig); - - add_terms_values_str (doc, val, mfid); - - if (!(G_LIKELY(len < 1024))) - g_free (val); + val = g_string_chunk_insert (strchunk, orig); + add_terms_values_str (doc, val, mfid, strchunk); } static void add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) + MuMsgFieldId mfid, GStringChunk *strchunk) { const GSList *lst; @@ -373,21 +365,9 @@ add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg, if (lst && mu_msg_field_xapian_term (mfid)) { while (lst) { - size_t len; char *val; - /* try stack-allocation, it's much faster*/ - len = strlen ((char*)lst->data); - if (G_LIKELY(len < 1024)) - val = (char*)g_alloca(len+1); - else - val = (char*)g_malloc(len+1); - strcpy (val, (char*)lst->data); - - add_terms_values_str (doc, val, mfid); - - if (!(G_LIKELY(len < 1024))) - g_free (val); - + val = g_string_chunk_insert (strchunk, (const gchar*)lst->data); + add_terms_values_str (doc, val, mfid, strchunk); lst = g_slist_next ((GSList*)lst); } } @@ -395,10 +375,11 @@ add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg, struct PartData { - PartData (Xapian::Document& doc, MuMsgFieldId mfid): - _doc (doc), _mfid(mfid) {} + PartData (Xapian::Document& doc, MuMsgFieldId mfid, GStringChunk *strchunk): + _doc (doc), _mfid(mfid), _strchunk(strchunk) {} Xapian::Document _doc; MuMsgFieldId _mfid; + GStringChunk *_strchunk; }; static gboolean @@ -434,14 +415,13 @@ index_text_part (MuMsgPart *part, PartData *pdata) termgen.set_document(pdata->_doc); - norm = mu_str_normalize (txt, TRUE); + /* allocated on strchunk, no need to free */ + norm = mu_str_normalize (txt, TRUE, pdata->_strchunk); termgen.index_text_without_positions (norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT)); - g_free (norm); g_free (txt); - return TRUE; } @@ -470,13 +450,11 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) /* save the name of anything that has a filename */ if (part->file_name) { - char val[MuStore::MAX_TERM_LENGTH + 1]; - strncpy (val, part->file_name, sizeof(val)); - - /* now, let's create a term... */ - mu_str_normalize_in_place (val, TRUE); - mu_str_xapian_escape_in_place (val, TRUE /*esc space*/); - + char *val; + /* now, let's create a term... allocated on strchunk, + * no need to free*/ + val = mu_str_xapian_escape (part->file_name, TRUE /*esc space*/, + pdata->_strchunk); pdata->_doc.add_term (file + std::string(val, 0, MuStore::MAX_TERM_LENGTH)); } @@ -490,9 +468,9 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) static void add_terms_values_attach (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) + MuMsgFieldId mfid, GStringChunk *strchunk) { - PartData pdata (doc, mfid); + PartData pdata (doc, mfid, strchunk); mu_msg_part_foreach (msg, TRUE, (MuMsgPartForeachFunc)each_part, &pdata); } @@ -500,7 +478,7 @@ add_terms_values_attach (Xapian::Document& doc, MuMsg *msg, static void add_terms_values_body (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) + MuMsgFieldId mfid, GStringChunk *strchunk) { const char *str; char *norm; @@ -518,32 +496,32 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg, Xapian::TermGenerator termgen; termgen.set_document(doc); - norm = mu_str_normalize (str, TRUE); + /* norm is allocated on strchunk, no need for freeing */ + norm = mu_str_normalize (str, TRUE, strchunk); termgen.index_text_without_positions (norm, 1, prefix(mfid)); - - g_free (norm); } struct _MsgDoc { Xapian::Document *_doc; MuMsg *_msg; MuStore *_store; + GStringChunk *_strchunk; }; typedef struct _MsgDoc MsgDoc; static void -add_terms_values_default (MuMsgFieldId mfid, MsgDoc* msgdoc) +add_terms_values_default (MuMsgFieldId mfid, MsgDoc *msgdoc) { if (mu_msg_field_is_numeric (mfid)) add_terms_values_number (*msgdoc->_doc, msgdoc->_msg, mfid); else if (mu_msg_field_is_string (mfid)) add_terms_values_string - (*msgdoc->_doc, msgdoc->_msg, mfid); + (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk); else if (mu_msg_field_is_string_list(mfid)) add_terms_values_string_list - (*msgdoc->_doc, msgdoc->_msg, mfid); + (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk); else g_return_if_reached (); @@ -564,13 +542,14 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid); break; case MU_MSG_FIELD_ID_BODY_TEXT: - add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid); + add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk); break; /* note: add_terms_values_attach handles _FILE, _MIME and * _ATTACH_TEXT msgfields */ case MU_MSG_FIELD_ID_FILE: - add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid); + add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid, + msgdoc->_strchunk); break; case MU_MSG_FIELD_ID_MIME: case MU_MSG_FIELD_ID_EMBEDDED_TEXT: @@ -622,20 +601,22 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) if (!mu_str_is_empty(contact->name)) { Xapian::TermGenerator termgen; termgen.set_document (*msgdoc->_doc); - char *norm = mu_str_normalize (contact->name, TRUE); + /* note: norm is added to stringchunk, no need for freeing */ + char *norm = mu_str_normalize (contact->name, TRUE, msgdoc->_strchunk); termgen.index_text_without_positions (norm, 1, pfx); - g_free (norm); } /* don't normalize e-mail address, but do lowercase it */ if (!mu_str_is_empty(contact->address)) { char *escaped; + /* note: escaped is added to stringchunk, no need for + * freeing */ escaped = mu_str_xapian_escape (contact->address, - FALSE /*dont esc space*/); + FALSE /*dont esc space*/, + msgdoc->_strchunk); msgdoc->_doc->add_term (std::string (pfx + escaped, 0, MuStore::MAX_TERM_LENGTH)); - g_free (escaped); /* store it also in our contacts cache */ if (msgdoc->_store->contacts()) @@ -645,18 +626,22 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) } } +#define MU_STRING_CHUNK_SIZE 8192 Xapian::Document new_doc_from_message (MuStore *store, MuMsg *msg) { Xapian::Document doc; - MsgDoc docinfo = {&doc, msg, store}; + MsgDoc docinfo = {&doc, msg, store, 0}; + docinfo._strchunk = g_string_chunk_new (MU_STRING_CHUNK_SIZE); mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo); /* also store the contact-info as separate terms */ mu_msg_contact_foreach (msg, (MuMsgContactForeachFunc)each_contact_info, &docinfo); + g_string_chunk_free (docinfo._strchunk); + return doc; } diff --git a/lib/mu-str-normalize.c b/lib/mu-str-normalize.c index 9407465b..f0f8001c 100644 --- a/lib/mu-str-normalize.c +++ b/lib/mu-str-normalize.c @@ -30,34 +30,48 @@ char* -mu_str_normalize (const char *str, gboolean downcase) +mu_str_normalize (const char *str, gboolean downcase, GStringChunk *strchunk) { + char *mystr; + g_return_val_if_fail (str, NULL); - return mu_str_normalize_in_place (g_strdup(str), downcase); + if (strchunk) + mystr = g_string_chunk_insert (strchunk, str); + else + mystr = g_strdup (str); + + return mu_str_normalize_in_place_try (mystr, downcase, strchunk); } /* this implementation should work for _all_ locales. */ static char* -mu_str_normalize_in_place_generic (char *str, gboolean downcase) +mu_str_normalize_in_place_generic (char *str, gboolean downcase, GStringChunk *strchunk) { + + char *norm; + size_t len; + /* FIXME: add accent-folding etc. */ + if (!downcase) + return str; /* nothing to do */ - if (downcase) { + len = strlen (str); + norm = g_utf8_strdown (str, len); - char *norm; - size_t len; - len = strlen (str); - norm = g_utf8_strdown (str, len); - - if (strlen (norm) > len) - g_warning ("normalized text doesn't fit :/"); - - memcpy (str, norm, len); + if (strlen (norm) > len) { + /* this case is rare, but does happen */ + char *copy; + if (!strchunk) + return norm; + copy = g_string_chunk_insert (strchunk, norm); + g_free (norm); + return copy; } + memcpy (str, norm, len); return str; } @@ -78,7 +92,7 @@ mu_str_normalize_in_place_generic (char *str, gboolean downcase) * original 0xc3 0x9f */ char* -mu_str_normalize_in_place (char *str, gboolean downcase) +mu_str_normalize_in_place_try (char *str, gboolean downcase, GStringChunk *strchunk) { const guchar *cur; int i; @@ -386,7 +400,7 @@ mu_str_normalize_in_place (char *str, gboolean downcase) /* our fast-path for latin-utf8 does not work -- bummer! * use something more generic (but a bit slower) */ - return mu_str_normalize_in_place_generic (str, downcase); + return mu_str_normalize_in_place_generic (str, downcase, strchunk); } } diff --git a/lib/mu-str.c b/lib/mu-str.c index 81484c94..aca24217 100644 --- a/lib/mu-str.c +++ b/lib/mu-str.c @@ -430,7 +430,7 @@ check_for_field (const char *str, gboolean *is_field, gboolean *is_range_field) * function expects search terms (not complete queries) * */ char* -mu_str_xapian_escape_in_place (char *term, gboolean esc_space) +mu_str_xapian_escape_in_place_try (char *term, gboolean esc_space, GStringChunk *strchunk) { unsigned char *cur; const char escchar = '_'; @@ -474,15 +474,22 @@ mu_str_xapian_escape_in_place (char *term, gboolean esc_space) } /* downcase try to remove accents etc. */ - return mu_str_normalize_in_place (term, TRUE); + return mu_str_normalize_in_place_try (term, TRUE, strchunk); } char* -mu_str_xapian_escape (const char *query, gboolean esc_space) +mu_str_xapian_escape (const char *query, gboolean esc_space, GStringChunk *strchunk) { + char *mystr; + g_return_val_if_fail (query, NULL); - return mu_str_xapian_escape_in_place (g_strdup(query), esc_space); + if (strchunk) + mystr = g_string_chunk_insert (strchunk, query); + else + mystr = g_strdup (query); + + return mu_str_xapian_escape_in_place_try (mystr, esc_space, strchunk); } diff --git a/lib/mu-str.h b/lib/mu-str.h index 6cae11bd..2d4a0777 100644 --- a/lib/mu-str.h +++ b/lib/mu-str.h @@ -107,14 +107,16 @@ char* mu_str_summarize (const char* str, size_t max_lines) * * @param str a valid utf8 string or NULL * @param downcase if TRUE, convert the string to lowercase + * @param strchunk (optional) if non-NULL, allocate strings on strchunk * - * @return the normalize string, or NULL in case of error or str was NULL + * @return the normalized string, or NULL in case of error or str was + * NULL. Unless strchunk was provided, user must g_free the string when + * no longer needed */ -char* mu_str_normalize (const char *str, gboolean downcase) +char* mu_str_normalize (const char *str, gboolean downcase, + GStringChunk *strchunk) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; - - /** * normalize a string (ie., collapse accented characters etc.), and * optionally, downcase it. this happen by changing the string; if @@ -123,12 +125,14 @@ char* mu_str_normalize (const char *str, gboolean downcase) * * @param str a valid utf8 string or NULL * @param downcase if TRUE, convert the string to lowercase + * @param strchunk (optional) if non-NULL, allocate strings on strchunk * * @return the normalized string, or NULL in case of error or str was - * NULL + * NULL. User only needs to free the returned string if a) return + * value != str and b) strchunk was not provided. */ -char* mu_str_normalize_in_place (char *str, gboolean downcase); - +char* mu_str_normalize_in_place_try (char *str, gboolean downcase, + GStringChunk *strchunk); /** * escape the string for use with xapian matching. in practice, if the @@ -140,10 +144,15 @@ char* mu_str_normalize_in_place (char *str, gboolean downcase); * * @param query a query string * @param esc_space escape space characters as well + * @param strchunk (optional) if non-NULL, allocate strings on strchunk + * + * @return the escaped string or NULL in case of error. User only + * needs to free the returned string if a) return value != query and b) + * strchunk was not provided. * - * @return the escaped string or NULL in case of error */ -char* mu_str_xapian_escape_in_place (char *query, gboolean esc_space); +char* mu_str_xapian_escape_in_place_try (char *query, gboolean esc_space, + GStringChunk *strchunk); /** * escape the string for use with xapian matching. in practice, if the @@ -153,11 +162,14 @@ char* mu_str_xapian_escape_in_place (char *query, gboolean esc_space); * * @param query a query string * @param esc_space escape space characters as well + * @param strchunk (optional) if non-NULL, allocate strings on strchunk * * @return the escaped string (free with g_free) or NULL in case of error + * Unless strchunk was provided, user must g_free the string when + * no longer needed */ -char* mu_str_xapian_escape (const char *query, gboolean esc_space) - G_GNUC_WARN_UNUSED_RESULT; +char* mu_str_xapian_escape (const char *query, gboolean esc_space, + GStringChunk *strchunk) G_GNUC_WARN_UNUSED_RESULT;