* mu-store: store strings for searching now in normalized, lowercase form
This commit is contained in:
@ -17,7 +17,7 @@
|
|||||||
**
|
**
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#if HAVE_CONFIG_H
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif /*HAVE_CONFIG_H*/
|
#endif /*HAVE_CONFIG_H*/
|
||||||
|
|
||||||
@ -211,20 +211,24 @@ add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
|
|||||||
|
|
||||||
if (mu_msg_field_xapian_index (field)) {
|
if (mu_msg_field_xapian_index (field)) {
|
||||||
Xapian::TermGenerator termgen;
|
Xapian::TermGenerator termgen;
|
||||||
|
gchar *norm (mu_msg_str_normalize(str, TRUE));
|
||||||
termgen.set_document (doc);
|
termgen.set_document (doc);
|
||||||
termgen.index_text_without_positions (str, 1, prefix);
|
termgen.index_text_without_positions (norm, 1, prefix);
|
||||||
|
g_free(norm);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mu_msg_field_xapian_term(field)) {
|
if (mu_msg_field_xapian_term(field)) {
|
||||||
/* terms can be up to MU_STORE_MAX_TERM_LENGTH (240)
|
/* terms can be up to MU_STORE_MAX_TERM_LENGTH (240)
|
||||||
* long; this is a Xapian limit */
|
* long; this is a Xapian limit */
|
||||||
doc.add_term (std::string (prefix + value, 0,
|
// doc.add_term (std::string (prefix + value, 0,
|
||||||
MU_STORE_MAX_TERM_LENGTH));
|
// MU_STORE_MAX_TERM_LENGTH));
|
||||||
|
|
||||||
/* add a normalized version as well (accents removed, lowercase) */
|
/* add a normalized version as well (accents removed,
|
||||||
std::string norm(mu_msg_str_normalize(str, TRUE));
|
* lowercase), if it's actually different */
|
||||||
doc.add_term (std::string (prefix + norm, 0,
|
gchar *norm = mu_msg_str_normalize(str, TRUE);
|
||||||
MU_STORE_MAX_TERM_LENGTH));
|
doc.add_term (std::string (prefix + std::string(norm), 0,
|
||||||
|
MU_STORE_MAX_TERM_LENGTH));
|
||||||
|
g_free (norm);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mu_msg_field_xapian_value(field))
|
if (mu_msg_field_xapian_value(field))
|
||||||
@ -237,6 +241,7 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
|
|||||||
const MuMsgField* field)
|
const MuMsgField* field)
|
||||||
{
|
{
|
||||||
const char *str;
|
const char *str;
|
||||||
|
char *norm;
|
||||||
|
|
||||||
if (mu_msg_get_flags(msg) & MU_MSG_FLAG_ENCRYPTED)
|
if (mu_msg_get_flags(msg) & MU_MSG_FLAG_ENCRYPTED)
|
||||||
return; /* don't store encrypted bodies */
|
return; /* don't store encrypted bodies */
|
||||||
@ -247,10 +252,13 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
|
|||||||
|
|
||||||
if (!str)
|
if (!str)
|
||||||
return; /* no body... */
|
return; /* no body... */
|
||||||
|
|
||||||
Xapian::TermGenerator termgen;
|
Xapian::TermGenerator termgen;
|
||||||
termgen.set_document(doc);
|
termgen.set_document(doc);
|
||||||
termgen.index_text(str, 1, mu_msg_field_xapian_prefix(field));
|
|
||||||
|
norm = mu_msg_str_normalize (str, TRUE);
|
||||||
|
termgen.index_text(norm, 1, mu_msg_field_xapian_prefix(field));
|
||||||
|
g_free (norm);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct _MsgDoc {
|
struct _MsgDoc {
|
||||||
@ -316,22 +324,25 @@ each_contact_info (MuMsgContact *contact, MsgDoc *data)
|
|||||||
case MU_MSG_CONTACT_TYPE_CC: pfx = cc_pfx; break;
|
case MU_MSG_CONTACT_TYPE_CC: pfx = cc_pfx; break;
|
||||||
default: return; /* other types (like bcc) are ignored */
|
default: return; /* other types (like bcc) are ignored */
|
||||||
}
|
}
|
||||||
|
|
||||||
// g_print ("[%s %s]\n", pfx.c_str(), contact->address);
|
|
||||||
|
|
||||||
if (contact->name && strlen(contact->name) > 0) {
|
if (contact->name && strlen(contact->name) > 0) {
|
||||||
Xapian::TermGenerator termgen;
|
Xapian::TermGenerator termgen;
|
||||||
termgen.set_document (*data->_doc);
|
termgen.set_document (*data->_doc);
|
||||||
termgen.index_text_without_positions (contact->name, 1, pfx);
|
char *norm = mu_msg_str_normalize (contact->name, TRUE);
|
||||||
|
termgen.index_text_without_positions (norm, 1, pfx);
|
||||||
|
g_free (norm);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (contact->address && strlen (contact->address))
|
/* don't normalize e-mail address, but do lowercase it */
|
||||||
data->_doc->add_term (std::string (pfx + contact->address, 0,
|
if (contact->address && strlen (contact->address)) {
|
||||||
MU_STORE_MAX_TERM_LENGTH));
|
char *lower = g_utf8_strdown (contact->address, -1);
|
||||||
|
data->_doc->add_term (std::string (pfx + lower, 0,
|
||||||
|
MU_STORE_MAX_TERM_LENGTH));
|
||||||
|
g_free (lower);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* get a unique id for this message */
|
/* get a unique id for this message */
|
||||||
static std::string
|
static std::string
|
||||||
get_message_uid (const char* path)
|
get_message_uid (const char* path)
|
||||||
@ -350,8 +361,6 @@ get_message_uid (MuMsg *msg)
|
|||||||
return get_message_uid (mu_msg_get_path(msg));
|
return get_message_uid (mu_msg_get_path(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MuResult
|
MuResult
|
||||||
mu_store_store (MuStore *store, MuMsg *msg)
|
mu_store_store (MuStore *store, MuMsg *msg)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user