mu: index html text as if it were plain text

This is a bit of hack to include html text in results.

Of course, html text is not really plain text, so this is a bit of a
hack until we introduce some html parsing step.
This commit is contained in:
Dirk-Jan C. Binnema
2023-01-31 23:12:05 +02:00
parent ea08378ce6
commit abfa6f277c
3 changed files with 83 additions and 15 deletions

View File

@ -83,6 +83,16 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
throw std::logic_error("not a search term");
}
/* hack... import html text as if it were plain text. */
static void
add_body_html(Xapian::Document& doc, const Field& field, const std::string& val)
{
static Field body_field = field_from_id(Field::Id::BodyText);
Xapian::TermGenerator termgen;
termgen.set_document(doc);
termgen.index_text(utf8_flatten(val), 1, body_field.xapian_term());
}
void
Document::add(Field::Id id, const std::string& val)
@ -94,7 +104,8 @@ Document::add(Field::Id id, const std::string& val)
if (field.is_searchable())
add_search_term(xdoc_, field, val);
else if (id == Field::Id::XBodyHtml)
add_body_html(xdoc_, field, val);
if (field.include_in_sexp()) {
put_prop(field, val);
}