message: use html-to-text scraper for html parts
We were dumping the HTML-parts as-is in the Xapian indexer; however, it's better to remove the html decoration first, and just pass the text. We use the new built-in html->text scraper for that.
This commit is contained in:
@ -79,17 +79,6 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
|
||||
throw std::logic_error("not a search term");
|
||||
}
|
||||
|
||||
/* hack... import html text as if it were plain text. */
|
||||
static void
|
||||
add_body_html(Xapian::Document& doc, const Field& field, const std::string& val)
|
||||
{
|
||||
static Field body_field = field_from_id(Field::Id::BodyText);
|
||||
|
||||
Xapian::TermGenerator termgen;
|
||||
termgen.set_document(doc);
|
||||
termgen.index_text(utf8_flatten(val), 1, body_field.xapian_term());
|
||||
}
|
||||
|
||||
void
|
||||
Document::add(Field::Id id, const std::string& val)
|
||||
{
|
||||
@ -100,11 +89,9 @@ Document::add(Field::Id id, const std::string& val)
|
||||
|
||||
if (field.is_searchable())
|
||||
add_search_term(xdoc_, field, val);
|
||||
else if (id == Field::Id::XBodyHtml)
|
||||
add_body_html(xdoc_, field, val);
|
||||
if (field.include_in_sexp()) {
|
||||
|
||||
if (field.include_in_sexp())
|
||||
put_prop(field, val);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
Reference in New Issue
Block a user