* mu4e: use GStringChunk* for string normalization / escaping
- this should fix the rare bug for some non-Latin unicode blocks, simplify some code, and possibly improve performance a bit
This commit is contained in:
@ -294,12 +294,13 @@ mu_query_preprocess (const char *query, GError **err)
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
for (cur = parts; cur; cur = g_slist_next(cur)) {
|
for (cur = parts; cur; cur = g_slist_next(cur)) {
|
||||||
|
char *data;
|
||||||
|
data = (gchar*)cur->data;
|
||||||
/* remove accents and turn to lower-case */
|
/* remove accents and turn to lower-case */
|
||||||
cur->data = mu_str_normalize_in_place ((gchar*)cur->data, TRUE);
|
|
||||||
/* escape '@', single '_' and ':' if it's not following a
|
/* escape '@', single '_' and ':' if it's not following a
|
||||||
* xapian-pfx with '_' */
|
* xapian-pfx with '_' */
|
||||||
cur->data = mu_str_xapian_escape_in_place
|
cur->data = mu_str_xapian_escape (data, TRUE, NULL);
|
||||||
((gchar*)cur->data, TRUE /*escape spaces too*/);
|
g_free (data);
|
||||||
}
|
}
|
||||||
|
|
||||||
myquery = mu_str_from_list (parts, ' ');
|
myquery = mu_str_from_list (parts, ' ');
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */
|
/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */
|
||||||
/*
|
/*
|
||||||
** Copyright (C) 2011 <djcb@djcbsoftware.nl>
|
** Copyright (C) 2011-2012 <djcb@djcbsoftware.nl>
|
||||||
**
|
**
|
||||||
** This program is free software; you can redistribute it and/or modify it
|
** This program is free software; you can redistribute it and/or modify it
|
||||||
** under the terms of the GNU General Public License as published by the
|
** under the terms of the GNU General Public License as published by the
|
||||||
|
|||||||
@ -306,7 +306,7 @@ add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
|
|||||||
/* for string and string-list */
|
/* for string and string-list */
|
||||||
static void
|
static void
|
||||||
add_terms_values_str (Xapian::Document& doc, char *val,
|
add_terms_values_str (Xapian::Document& doc, char *val,
|
||||||
MuMsgFieldId mfid)
|
MuMsgFieldId mfid, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
/* the value is what we display in search results; the
|
/* the value is what we display in search results; the
|
||||||
* unchanged original */
|
* unchanged original */
|
||||||
@ -315,7 +315,7 @@ add_terms_values_str (Xapian::Document& doc, char *val,
|
|||||||
|
|
||||||
/* now, let's create some search terms... */
|
/* now, let's create some search terms... */
|
||||||
if (mu_msg_field_normalize (mfid))
|
if (mu_msg_field_normalize (mfid))
|
||||||
mu_str_normalize_in_place (val, TRUE);
|
val = mu_str_normalize_in_place_try (val, TRUE, strchunk);
|
||||||
|
|
||||||
if (mu_msg_field_xapian_index (mfid)) {
|
if (mu_msg_field_xapian_index (mfid)) {
|
||||||
Xapian::TermGenerator termgen;
|
Xapian::TermGenerator termgen;
|
||||||
@ -323,8 +323,8 @@ add_terms_values_str (Xapian::Document& doc, char *val,
|
|||||||
termgen.index_text_without_positions (val, 1, prefix(mfid));
|
termgen.index_text_without_positions (val, 1, prefix(mfid));
|
||||||
}
|
}
|
||||||
if (mu_msg_field_xapian_escape (mfid))
|
if (mu_msg_field_xapian_escape (mfid))
|
||||||
mu_str_xapian_escape_in_place (val,
|
val= mu_str_xapian_escape_in_place_try (val, TRUE /*esc_space*/,
|
||||||
TRUE /*esc_space*/);
|
strchunk);
|
||||||
if (mu_msg_field_xapian_term(mfid))
|
if (mu_msg_field_xapian_term(mfid))
|
||||||
doc.add_term (prefix(mfid) +
|
doc.add_term (prefix(mfid) +
|
||||||
std::string(val, 0, _MuStore::MAX_TERM_LENGTH));
|
std::string(val, 0, _MuStore::MAX_TERM_LENGTH));
|
||||||
@ -333,31 +333,23 @@ add_terms_values_str (Xapian::Document& doc, char *val,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
|
add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
|
||||||
MuMsgFieldId mfid)
|
MuMsgFieldId mfid, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
const char *orig;
|
const char *orig;
|
||||||
char *val;
|
char *val;
|
||||||
size_t len;
|
|
||||||
|
|
||||||
if (!(orig = mu_msg_get_field_string (msg, mfid)))
|
if (!(orig = mu_msg_get_field_string (msg, mfid)))
|
||||||
return; /* nothing to do */
|
return; /* nothing to do */
|
||||||
|
|
||||||
/* try stack-allocation, it's much faster*/
|
val = g_string_chunk_insert (strchunk, orig);
|
||||||
len = strlen (orig);
|
add_terms_values_str (doc, val, mfid, strchunk);
|
||||||
val = (char*)(G_LIKELY(len < 1024)?g_alloca(len+1):g_malloc(len+1));
|
|
||||||
strcpy (val, orig);
|
|
||||||
|
|
||||||
add_terms_values_str (doc, val, mfid);
|
|
||||||
|
|
||||||
if (!(G_LIKELY(len < 1024)))
|
|
||||||
g_free (val);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg,
|
add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg,
|
||||||
MuMsgFieldId mfid)
|
MuMsgFieldId mfid, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
const GSList *lst;
|
const GSList *lst;
|
||||||
|
|
||||||
@ -373,21 +365,9 @@ add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg,
|
|||||||
|
|
||||||
if (lst && mu_msg_field_xapian_term (mfid)) {
|
if (lst && mu_msg_field_xapian_term (mfid)) {
|
||||||
while (lst) {
|
while (lst) {
|
||||||
size_t len;
|
|
||||||
char *val;
|
char *val;
|
||||||
/* try stack-allocation, it's much faster*/
|
val = g_string_chunk_insert (strchunk, (const gchar*)lst->data);
|
||||||
len = strlen ((char*)lst->data);
|
add_terms_values_str (doc, val, mfid, strchunk);
|
||||||
if (G_LIKELY(len < 1024))
|
|
||||||
val = (char*)g_alloca(len+1);
|
|
||||||
else
|
|
||||||
val = (char*)g_malloc(len+1);
|
|
||||||
strcpy (val, (char*)lst->data);
|
|
||||||
|
|
||||||
add_terms_values_str (doc, val, mfid);
|
|
||||||
|
|
||||||
if (!(G_LIKELY(len < 1024)))
|
|
||||||
g_free (val);
|
|
||||||
|
|
||||||
lst = g_slist_next ((GSList*)lst);
|
lst = g_slist_next ((GSList*)lst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -395,10 +375,11 @@ add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg,
|
|||||||
|
|
||||||
|
|
||||||
struct PartData {
|
struct PartData {
|
||||||
PartData (Xapian::Document& doc, MuMsgFieldId mfid):
|
PartData (Xapian::Document& doc, MuMsgFieldId mfid, GStringChunk *strchunk):
|
||||||
_doc (doc), _mfid(mfid) {}
|
_doc (doc), _mfid(mfid), _strchunk(strchunk) {}
|
||||||
Xapian::Document _doc;
|
Xapian::Document _doc;
|
||||||
MuMsgFieldId _mfid;
|
MuMsgFieldId _mfid;
|
||||||
|
GStringChunk *_strchunk;
|
||||||
};
|
};
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
@ -434,14 +415,13 @@ index_text_part (MuMsgPart *part, PartData *pdata)
|
|||||||
|
|
||||||
termgen.set_document(pdata->_doc);
|
termgen.set_document(pdata->_doc);
|
||||||
|
|
||||||
norm = mu_str_normalize (txt, TRUE);
|
/* allocated on strchunk, no need to free */
|
||||||
|
norm = mu_str_normalize (txt, TRUE, pdata->_strchunk);
|
||||||
|
|
||||||
termgen.index_text_without_positions
|
termgen.index_text_without_positions
|
||||||
(norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT));
|
(norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT));
|
||||||
|
|
||||||
g_free (norm);
|
|
||||||
g_free (txt);
|
g_free (txt);
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -470,13 +450,11 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
|
|||||||
|
|
||||||
/* save the name of anything that has a filename */
|
/* save the name of anything that has a filename */
|
||||||
if (part->file_name) {
|
if (part->file_name) {
|
||||||
char val[MuStore::MAX_TERM_LENGTH + 1];
|
char *val;
|
||||||
strncpy (val, part->file_name, sizeof(val));
|
/* now, let's create a term... allocated on strchunk,
|
||||||
|
* no need to free*/
|
||||||
/* now, let's create a term... */
|
val = mu_str_xapian_escape (part->file_name, TRUE /*esc space*/,
|
||||||
mu_str_normalize_in_place (val, TRUE);
|
pdata->_strchunk);
|
||||||
mu_str_xapian_escape_in_place (val, TRUE /*esc space*/);
|
|
||||||
|
|
||||||
pdata->_doc.add_term
|
pdata->_doc.add_term
|
||||||
(file + std::string(val, 0, MuStore::MAX_TERM_LENGTH));
|
(file + std::string(val, 0, MuStore::MAX_TERM_LENGTH));
|
||||||
}
|
}
|
||||||
@ -490,9 +468,9 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
add_terms_values_attach (Xapian::Document& doc, MuMsg *msg,
|
add_terms_values_attach (Xapian::Document& doc, MuMsg *msg,
|
||||||
MuMsgFieldId mfid)
|
MuMsgFieldId mfid, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
PartData pdata (doc, mfid);
|
PartData pdata (doc, mfid, strchunk);
|
||||||
mu_msg_part_foreach (msg, TRUE,
|
mu_msg_part_foreach (msg, TRUE,
|
||||||
(MuMsgPartForeachFunc)each_part, &pdata);
|
(MuMsgPartForeachFunc)each_part, &pdata);
|
||||||
}
|
}
|
||||||
@ -500,7 +478,7 @@ add_terms_values_attach (Xapian::Document& doc, MuMsg *msg,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
|
add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
|
||||||
MuMsgFieldId mfid)
|
MuMsgFieldId mfid, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
const char *str;
|
const char *str;
|
||||||
char *norm;
|
char *norm;
|
||||||
@ -518,32 +496,32 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
|
|||||||
Xapian::TermGenerator termgen;
|
Xapian::TermGenerator termgen;
|
||||||
termgen.set_document(doc);
|
termgen.set_document(doc);
|
||||||
|
|
||||||
norm = mu_str_normalize (str, TRUE);
|
/* norm is allocated on strchunk, no need for freeing */
|
||||||
|
norm = mu_str_normalize (str, TRUE, strchunk);
|
||||||
termgen.index_text_without_positions (norm, 1, prefix(mfid));
|
termgen.index_text_without_positions (norm, 1, prefix(mfid));
|
||||||
|
|
||||||
g_free (norm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct _MsgDoc {
|
struct _MsgDoc {
|
||||||
Xapian::Document *_doc;
|
Xapian::Document *_doc;
|
||||||
MuMsg *_msg;
|
MuMsg *_msg;
|
||||||
MuStore *_store;
|
MuStore *_store;
|
||||||
|
GStringChunk *_strchunk;
|
||||||
};
|
};
|
||||||
typedef struct _MsgDoc MsgDoc;
|
typedef struct _MsgDoc MsgDoc;
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
add_terms_values_default (MuMsgFieldId mfid, MsgDoc* msgdoc)
|
add_terms_values_default (MuMsgFieldId mfid, MsgDoc *msgdoc)
|
||||||
{
|
{
|
||||||
if (mu_msg_field_is_numeric (mfid))
|
if (mu_msg_field_is_numeric (mfid))
|
||||||
add_terms_values_number
|
add_terms_values_number
|
||||||
(*msgdoc->_doc, msgdoc->_msg, mfid);
|
(*msgdoc->_doc, msgdoc->_msg, mfid);
|
||||||
else if (mu_msg_field_is_string (mfid))
|
else if (mu_msg_field_is_string (mfid))
|
||||||
add_terms_values_string
|
add_terms_values_string
|
||||||
(*msgdoc->_doc, msgdoc->_msg, mfid);
|
(*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk);
|
||||||
else if (mu_msg_field_is_string_list(mfid))
|
else if (mu_msg_field_is_string_list(mfid))
|
||||||
add_terms_values_string_list
|
add_terms_values_string_list
|
||||||
(*msgdoc->_doc, msgdoc->_msg, mfid);
|
(*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk);
|
||||||
else
|
else
|
||||||
g_return_if_reached ();
|
g_return_if_reached ();
|
||||||
|
|
||||||
@ -564,13 +542,14 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
|
|||||||
add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid);
|
add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid);
|
||||||
break;
|
break;
|
||||||
case MU_MSG_FIELD_ID_BODY_TEXT:
|
case MU_MSG_FIELD_ID_BODY_TEXT:
|
||||||
add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid);
|
add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid, msgdoc->_strchunk);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* note: add_terms_values_attach handles _FILE, _MIME and
|
/* note: add_terms_values_attach handles _FILE, _MIME and
|
||||||
* _ATTACH_TEXT msgfields */
|
* _ATTACH_TEXT msgfields */
|
||||||
case MU_MSG_FIELD_ID_FILE:
|
case MU_MSG_FIELD_ID_FILE:
|
||||||
add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid);
|
add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid,
|
||||||
|
msgdoc->_strchunk);
|
||||||
break;
|
break;
|
||||||
case MU_MSG_FIELD_ID_MIME:
|
case MU_MSG_FIELD_ID_MIME:
|
||||||
case MU_MSG_FIELD_ID_EMBEDDED_TEXT:
|
case MU_MSG_FIELD_ID_EMBEDDED_TEXT:
|
||||||
@ -622,20 +601,22 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
|
|||||||
if (!mu_str_is_empty(contact->name)) {
|
if (!mu_str_is_empty(contact->name)) {
|
||||||
Xapian::TermGenerator termgen;
|
Xapian::TermGenerator termgen;
|
||||||
termgen.set_document (*msgdoc->_doc);
|
termgen.set_document (*msgdoc->_doc);
|
||||||
char *norm = mu_str_normalize (contact->name, TRUE);
|
/* note: norm is added to stringchunk, no need for freeing */
|
||||||
|
char *norm = mu_str_normalize (contact->name, TRUE, msgdoc->_strchunk);
|
||||||
termgen.index_text_without_positions (norm, 1, pfx);
|
termgen.index_text_without_positions (norm, 1, pfx);
|
||||||
g_free (norm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* don't normalize e-mail address, but do lowercase it */
|
/* don't normalize e-mail address, but do lowercase it */
|
||||||
if (!mu_str_is_empty(contact->address)) {
|
if (!mu_str_is_empty(contact->address)) {
|
||||||
|
|
||||||
char *escaped;
|
char *escaped;
|
||||||
|
/* note: escaped is added to stringchunk, no need for
|
||||||
|
* freeing */
|
||||||
escaped = mu_str_xapian_escape (contact->address,
|
escaped = mu_str_xapian_escape (contact->address,
|
||||||
FALSE /*dont esc space*/);
|
FALSE /*dont esc space*/,
|
||||||
|
msgdoc->_strchunk);
|
||||||
msgdoc->_doc->add_term
|
msgdoc->_doc->add_term
|
||||||
(std::string (pfx + escaped, 0, MuStore::MAX_TERM_LENGTH));
|
(std::string (pfx + escaped, 0, MuStore::MAX_TERM_LENGTH));
|
||||||
g_free (escaped);
|
|
||||||
|
|
||||||
/* store it also in our contacts cache */
|
/* store it also in our contacts cache */
|
||||||
if (msgdoc->_store->contacts())
|
if (msgdoc->_store->contacts())
|
||||||
@ -645,18 +626,22 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MU_STRING_CHUNK_SIZE 8192
|
||||||
|
|
||||||
Xapian::Document
|
Xapian::Document
|
||||||
new_doc_from_message (MuStore *store, MuMsg *msg)
|
new_doc_from_message (MuStore *store, MuMsg *msg)
|
||||||
{
|
{
|
||||||
Xapian::Document doc;
|
Xapian::Document doc;
|
||||||
MsgDoc docinfo = {&doc, msg, store};
|
MsgDoc docinfo = {&doc, msg, store, 0};
|
||||||
|
docinfo._strchunk = g_string_chunk_new (MU_STRING_CHUNK_SIZE);
|
||||||
|
|
||||||
mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo);
|
mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo);
|
||||||
/* also store the contact-info as separate terms */
|
/* also store the contact-info as separate terms */
|
||||||
mu_msg_contact_foreach (msg, (MuMsgContactForeachFunc)each_contact_info,
|
mu_msg_contact_foreach (msg, (MuMsgContactForeachFunc)each_contact_info,
|
||||||
&docinfo);
|
&docinfo);
|
||||||
|
|
||||||
|
g_string_chunk_free (docinfo._strchunk);
|
||||||
|
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -30,34 +30,48 @@
|
|||||||
|
|
||||||
|
|
||||||
char*
|
char*
|
||||||
mu_str_normalize (const char *str, gboolean downcase)
|
mu_str_normalize (const char *str, gboolean downcase, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
|
char *mystr;
|
||||||
|
|
||||||
g_return_val_if_fail (str, NULL);
|
g_return_val_if_fail (str, NULL);
|
||||||
|
|
||||||
return mu_str_normalize_in_place (g_strdup(str), downcase);
|
if (strchunk)
|
||||||
|
mystr = g_string_chunk_insert (strchunk, str);
|
||||||
|
else
|
||||||
|
mystr = g_strdup (str);
|
||||||
|
|
||||||
|
return mu_str_normalize_in_place_try (mystr, downcase, strchunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* this implementation should work for _all_ locales. */
|
/* this implementation should work for _all_ locales. */
|
||||||
static char*
|
static char*
|
||||||
mu_str_normalize_in_place_generic (char *str, gboolean downcase)
|
mu_str_normalize_in_place_generic (char *str, gboolean downcase, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
/* FIXME: add accent-folding etc. */
|
|
||||||
|
|
||||||
if (downcase) {
|
|
||||||
|
|
||||||
char *norm;
|
char *norm;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
|
||||||
|
/* FIXME: add accent-folding etc. */
|
||||||
|
if (!downcase)
|
||||||
|
return str; /* nothing to do */
|
||||||
|
|
||||||
len = strlen (str);
|
len = strlen (str);
|
||||||
norm = g_utf8_strdown (str, len);
|
norm = g_utf8_strdown (str, len);
|
||||||
|
|
||||||
if (strlen (norm) > len)
|
|
||||||
g_warning ("normalized text doesn't fit :/");
|
|
||||||
|
|
||||||
memcpy (str, norm, len);
|
if (strlen (norm) > len) {
|
||||||
|
/* this case is rare, but does happen */
|
||||||
|
char *copy;
|
||||||
|
if (!strchunk)
|
||||||
|
return norm;
|
||||||
|
copy = g_string_chunk_insert (strchunk, norm);
|
||||||
|
g_free (norm);
|
||||||
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memcpy (str, norm, len);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,7 +92,7 @@ mu_str_normalize_in_place_generic (char *str, gboolean downcase)
|
|||||||
* original 0xc3 0x9f
|
* original 0xc3 0x9f
|
||||||
*/
|
*/
|
||||||
char*
|
char*
|
||||||
mu_str_normalize_in_place (char *str, gboolean downcase)
|
mu_str_normalize_in_place_try (char *str, gboolean downcase, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
const guchar *cur;
|
const guchar *cur;
|
||||||
int i;
|
int i;
|
||||||
@ -386,7 +400,7 @@ mu_str_normalize_in_place (char *str, gboolean downcase)
|
|||||||
/* our fast-path for latin-utf8 does not work -- bummer!
|
/* our fast-path for latin-utf8 does not work -- bummer!
|
||||||
* use something more generic (but a bit slower)
|
* use something more generic (but a bit slower)
|
||||||
*/
|
*/
|
||||||
return mu_str_normalize_in_place_generic (str, downcase);
|
return mu_str_normalize_in_place_generic (str, downcase, strchunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
15
lib/mu-str.c
15
lib/mu-str.c
@ -430,7 +430,7 @@ check_for_field (const char *str, gboolean *is_field, gboolean *is_range_field)
|
|||||||
* function expects search terms (not complete queries)
|
* function expects search terms (not complete queries)
|
||||||
* */
|
* */
|
||||||
char*
|
char*
|
||||||
mu_str_xapian_escape_in_place (char *term, gboolean esc_space)
|
mu_str_xapian_escape_in_place_try (char *term, gboolean esc_space, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
unsigned char *cur;
|
unsigned char *cur;
|
||||||
const char escchar = '_';
|
const char escchar = '_';
|
||||||
@ -474,15 +474,22 @@ mu_str_xapian_escape_in_place (char *term, gboolean esc_space)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* downcase try to remove accents etc. */
|
/* downcase try to remove accents etc. */
|
||||||
return mu_str_normalize_in_place (term, TRUE);
|
return mu_str_normalize_in_place_try (term, TRUE, strchunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
char*
|
char*
|
||||||
mu_str_xapian_escape (const char *query, gboolean esc_space)
|
mu_str_xapian_escape (const char *query, gboolean esc_space, GStringChunk *strchunk)
|
||||||
{
|
{
|
||||||
|
char *mystr;
|
||||||
|
|
||||||
g_return_val_if_fail (query, NULL);
|
g_return_val_if_fail (query, NULL);
|
||||||
|
|
||||||
return mu_str_xapian_escape_in_place (g_strdup(query), esc_space);
|
if (strchunk)
|
||||||
|
mystr = g_string_chunk_insert (strchunk, query);
|
||||||
|
else
|
||||||
|
mystr = g_strdup (query);
|
||||||
|
|
||||||
|
return mu_str_xapian_escape_in_place_try (mystr, esc_space, strchunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
34
lib/mu-str.h
34
lib/mu-str.h
@ -107,14 +107,16 @@ char* mu_str_summarize (const char* str, size_t max_lines)
|
|||||||
*
|
*
|
||||||
* @param str a valid utf8 string or NULL
|
* @param str a valid utf8 string or NULL
|
||||||
* @param downcase if TRUE, convert the string to lowercase
|
* @param downcase if TRUE, convert the string to lowercase
|
||||||
|
* @param strchunk (optional) if non-NULL, allocate strings on strchunk
|
||||||
*
|
*
|
||||||
* @return the normalize string, or NULL in case of error or str was NULL
|
* @return the normalized string, or NULL in case of error or str was
|
||||||
|
* NULL. Unless strchunk was provided, user must g_free the string when
|
||||||
|
* no longer needed
|
||||||
*/
|
*/
|
||||||
char* mu_str_normalize (const char *str, gboolean downcase)
|
char* mu_str_normalize (const char *str, gboolean downcase,
|
||||||
|
GStringChunk *strchunk)
|
||||||
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
|
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* normalize a string (ie., collapse accented characters etc.), and
|
* normalize a string (ie., collapse accented characters etc.), and
|
||||||
* optionally, downcase it. this happen by changing the string; if
|
* optionally, downcase it. this happen by changing the string; if
|
||||||
@ -123,12 +125,14 @@ char* mu_str_normalize (const char *str, gboolean downcase)
|
|||||||
*
|
*
|
||||||
* @param str a valid utf8 string or NULL
|
* @param str a valid utf8 string or NULL
|
||||||
* @param downcase if TRUE, convert the string to lowercase
|
* @param downcase if TRUE, convert the string to lowercase
|
||||||
|
* @param strchunk (optional) if non-NULL, allocate strings on strchunk
|
||||||
*
|
*
|
||||||
* @return the normalized string, or NULL in case of error or str was
|
* @return the normalized string, or NULL in case of error or str was
|
||||||
* NULL
|
* NULL. User only needs to free the returned string if a) return
|
||||||
|
* value != str and b) strchunk was not provided.
|
||||||
*/
|
*/
|
||||||
char* mu_str_normalize_in_place (char *str, gboolean downcase);
|
char* mu_str_normalize_in_place_try (char *str, gboolean downcase,
|
||||||
|
GStringChunk *strchunk);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* escape the string for use with xapian matching. in practice, if the
|
* escape the string for use with xapian matching. in practice, if the
|
||||||
@ -140,10 +144,15 @@ char* mu_str_normalize_in_place (char *str, gboolean downcase);
|
|||||||
*
|
*
|
||||||
* @param query a query string
|
* @param query a query string
|
||||||
* @param esc_space escape space characters as well
|
* @param esc_space escape space characters as well
|
||||||
|
* @param strchunk (optional) if non-NULL, allocate strings on strchunk
|
||||||
|
*
|
||||||
|
* @return the escaped string or NULL in case of error. User only
|
||||||
|
* needs to free the returned string if a) return value != query and b)
|
||||||
|
* strchunk was not provided.
|
||||||
*
|
*
|
||||||
* @return the escaped string or NULL in case of error
|
|
||||||
*/
|
*/
|
||||||
char* mu_str_xapian_escape_in_place (char *query, gboolean esc_space);
|
char* mu_str_xapian_escape_in_place_try (char *query, gboolean esc_space,
|
||||||
|
GStringChunk *strchunk);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* escape the string for use with xapian matching. in practice, if the
|
* escape the string for use with xapian matching. in practice, if the
|
||||||
@ -153,11 +162,14 @@ char* mu_str_xapian_escape_in_place (char *query, gboolean esc_space);
|
|||||||
*
|
*
|
||||||
* @param query a query string
|
* @param query a query string
|
||||||
* @param esc_space escape space characters as well
|
* @param esc_space escape space characters as well
|
||||||
|
* @param strchunk (optional) if non-NULL, allocate strings on strchunk
|
||||||
*
|
*
|
||||||
* @return the escaped string (free with g_free) or NULL in case of error
|
* @return the escaped string (free with g_free) or NULL in case of error
|
||||||
|
* Unless strchunk was provided, user must g_free the string when
|
||||||
|
* no longer needed
|
||||||
*/
|
*/
|
||||||
char* mu_str_xapian_escape (const char *query, gboolean esc_space)
|
char* mu_str_xapian_escape (const char *query, gboolean esc_space,
|
||||||
G_GNUC_WARN_UNUSED_RESULT;
|
GStringChunk *strchunk) G_GNUC_WARN_UNUSED_RESULT;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user