message: use html-to-text scraper for html parts

We were dumping the HTML-parts as-is in the Xapian indexer; however, it's better to remove the html decoration first, and just pass the text. We use the new built-in html->text scraper for that.
2023-07-23 14:46:11 +03:00
parent 56b8fad89e
commit b795242d5a
7 changed files with 31 additions and 69 deletions
--- a/lib/message/mu-document.cc
+++ b/lib/message/mu-document.cc
@ -79,17 +79,6 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
 		throw std::logic_error("not a search term");
 }

-/* hack... import html text as if it were plain text. */
-static void
-add_body_html(Xapian::Document& doc, const Field& field, const std::string& val)
-{
-	static Field body_field = field_from_id(Field::Id::BodyText);
-
-	Xapian::TermGenerator termgen;
-	termgen.set_document(doc);
-	termgen.index_text(utf8_flatten(val), 1, body_field.xapian_term());
-}
-
 void
 Document::add(Field::Id id, const std::string& val)
 {
@ -100,11 +89,9 @@ Document::add(Field::Id id, const std::string& val)

 	if (field.is_searchable())
 		add_search_term(xdoc_, field, val);
-	else if (id == Field::Id::XBodyHtml)
-		add_body_html(xdoc_, field, val);
-	if (field.include_in_sexp()) {
+
+	if (field.include_in_sexp())
 		put_prop(field, val);
-	}
 }

 void