message: use html-to-text scraper for html parts

We were dumping the HTML-parts as-is in the Xapian indexer; however,
it's better to remove the html decoration first, and just pass the text.

We use the new built-in html->text scraper for that.
This commit is contained in:
Dirk-Jan C. Binnema
2023-07-23 14:46:11 +03:00
parent 56b8fad89e
commit b795242d5a
7 changed files with 31 additions and 69 deletions

View File

@ -79,17 +79,6 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
throw std::logic_error("not a search term");
}
/* hack... import html text as if it were plain text. */
static void
add_body_html(Xapian::Document& doc, const Field& field, const std::string& val)
{
static Field body_field = field_from_id(Field::Id::BodyText);
Xapian::TermGenerator termgen;
termgen.set_document(doc);
termgen.index_text(utf8_flatten(val), 1, body_field.xapian_term());
}
void
Document::add(Field::Id id, const std::string& val)
{
@ -100,11 +89,9 @@ Document::add(Field::Id id, const std::string& val)
if (field.is_searchable())
add_search_term(xdoc_, field, val);
else if (id == Field::Id::XBodyHtml)
add_body_html(xdoc_, field, val);
if (field.include_in_sexp()) {
if (field.include_in_sexp())
put_prop(field, val);
}
}
void