message: try to detect body text language

Try to detect the language of the e-mail body and make it searchable.
This commit is contained in:
Dirk-Jan C. Binnema
2023-05-11 23:22:29 +03:00
parent ad64093183
commit 7f2eeb1010
4 changed files with 29 additions and 9 deletions

View File

@ -29,6 +29,7 @@
#include <utils/mu-utils.hh>
#include <utils/mu-error.hh>
#include <utils/mu-option.hh>
#include <utils/mu-lang-detector.hh>
#include <atomic>
#include <mutex>
@ -67,6 +68,8 @@ struct Message::Private {
Option<std::string> body_txt;
Option<std::string> body_html;
Option<std::string> embedded;
Option<std::string> language; /* body ISO language code */
};
@ -531,6 +534,14 @@ process_message(const MimeMessage& mime_msg, const std::string& path,
info.mailing_list = get_mailing_list(mime_msg);
if (info.mailing_list)
info.flags |= Flags::MailingList;
if (info.body_txt) { /* attempt to get the body-language */
if (const auto lang{detect_language(info.body_txt.value())}; lang) {
info.language = lang->code;
g_debug("detected language: %s", lang->code);
} else
g_debug("could not detect language");
}
}
static Mu::Result<std::string>
@ -586,8 +597,6 @@ fake_message_id(const std::string& path)
* based on a field. So we add them here.
*/
static void
doc_add_list_post(Document& doc, const MimeMessage& mime_msg)
{
@ -643,7 +652,7 @@ fill_document(Message::Private& priv)
doc_add_reply_to(doc, mime_msg); /* only in sexp */
field_for_each([&](auto&& field) {
/* insist on expliclity handling each */
/* insist on explicitly handling each */
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
switch(field.id) {
@ -652,7 +661,6 @@ fill_document(Message::Private& priv)
break;
case Field::Id::BodyText:
doc.add(field.id, priv.body_txt);
break;
case Field::Id::Cc:
doc.add(field.id, mime_msg.contacts(Contact::Type::Cc));
@ -676,6 +684,9 @@ fill_document(Message::Private& priv)
case Field::Id::From:
doc.add(field.id, mime_msg.contacts(Contact::Type::From));
break;
case Field::Id::Language:
doc.add(field.id, priv.language);
break;
case Field::Id::Maildir: /* already */
break;
case Field::Id::MailingList: