* improve support for non-latin languages (cyrillic etc.) (WIP)

- change the various escaping / normalization functions to better deal with
    non-ascii, non-latin languages, such as Russian.

    It seems. now we can match 'Тесла' or 'Аркона' without problem.

  - added unit test.

  - WIP -- needs more testing.
This commit is contained in:
djcb
2012-04-16 01:10:46 +03:00
parent 557ce2839b
commit 0be852b288
7 changed files with 138 additions and 79 deletions

View File

@ -139,7 +139,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_FILE,
MU_MSG_FIELD_TYPE_STRING,
"file" , 'j', 'J',
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_NORMALIZE |
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_ESCAPE |
FLAG_DONT_CACHE | FLAG_XAPIAN_PREFIX_ONLY
},
@ -164,7 +164,8 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_TYPE_STRING,
"path", 'l', 'L', /* 'l' for location */
FLAG_GMIME | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY
FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY |
FLAG_XAPIAN_ESCAPE
},
{
@ -172,7 +173,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_TYPE_STRING,
"maildir", 'm', 'M',
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_VALUE |
FLAG_NORMALIZE | FLAG_XAPIAN_ESCAPE | FLAG_XAPIAN_PREFIX_ONLY
FLAG_XAPIAN_ESCAPE | FLAG_XAPIAN_PREFIX_ONLY
},
@ -204,7 +205,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_TYPE_STRING,
"subject", 's', 'S',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE |
FLAG_XAPIAN_TERM | FLAG_NORMALIZE | FLAG_XAPIAN_ESCAPE
FLAG_XAPIAN_TERM | FLAG_XAPIAN_ESCAPE
},
{
@ -234,7 +235,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_TYPE_STRING_LIST,
"tag", 'x', 'X',
FLAG_GMIME | FLAG_XAPIAN_TERM | FLAG_XAPIAN_PREFIX_ONLY |
FLAG_NORMALIZE | FLAG_XAPIAN_ESCAPE
FLAG_XAPIAN_ESCAPE
},
{ /* special, internal field, to get a unique key */