query: filter out docs without query-matches

This avoid including unwanted messages in threaded results.
Also some cleanups.
This commit is contained in:
Dirk-Jan C. Binnema
2021-02-15 19:11:22 +02:00
parent 950883ad39
commit 579f841bdd
4 changed files with 44 additions and 49 deletions

View File

@ -102,7 +102,7 @@ private:
} MU_XAPIAN_CATCH_BLOCK_RETURN (Nothing); } MU_XAPIAN_CATCH_BLOCK_RETURN (Nothing);
}; };
struct MatchDeciderLeader: public MatchDecider { struct MatchDeciderLeader final: public MatchDecider {
MatchDeciderLeader (QueryFlags qflags, DeciderInfo& info): MatchDeciderLeader (QueryFlags qflags, DeciderInfo& info):
MatchDecider(qflags, info) MatchDecider(qflags, info)
{} {}
@ -155,7 +155,7 @@ Mu::make_leader_decider (QueryFlags qflags, DeciderInfo& info)
return std::make_unique<MatchDeciderLeader>(qflags, info); return std::make_unique<MatchDeciderLeader>(qflags, info);
} }
struct MatchDeciderRelated: public MatchDecider { struct MatchDeciderRelated final: public MatchDecider {
MatchDeciderRelated(QueryFlags qflags, DeciderInfo& info): MatchDeciderRelated(QueryFlags qflags, DeciderInfo& info):
MatchDecider(qflags, info) {} MatchDecider(qflags, info) {}
/** /**
@ -200,8 +200,8 @@ Mu::make_related_decider (QueryFlags qflags, DeciderInfo& info)
return std::make_unique<MatchDeciderRelated>(qflags, info); return std::make_unique<MatchDeciderRelated>(qflags, info);
} }
struct MatchDeciderFinal: public MatchDecider { struct MatchDeciderThread final: public MatchDecider {
MatchDeciderFinal(QueryFlags qflags, DeciderInfo& info): MatchDeciderThread(QueryFlags qflags, DeciderInfo& info):
MatchDecider{qflags, info} {} MatchDecider{qflags, info} {}
/** /**
* operator() * operator()
@ -219,16 +219,13 @@ struct MatchDeciderFinal: public MatchDecider {
// we may have seen this match in the "Leader" query, // we may have seen this match in the "Leader" query,
// or in the second (unbuounded) related query; // or in the second (unbuounded) related query;
const auto it{decider_info_.matches.find(doc.get_docid())}; const auto it{decider_info_.matches.find(doc.get_docid())};
if (it == decider_info_.matches.end()) return it != decider_info_.matches.end() && !it->second.thread_path.empty();
return false;
else
return should_include(it->second);
} }
}; };
std::unique_ptr<Xapian::MatchDecider> std::unique_ptr<Xapian::MatchDecider>
Mu::make_final_decider (QueryFlags qflags, DeciderInfo& info) Mu::make_thread_decider (QueryFlags qflags, DeciderInfo& info)
{ {
return std::make_unique<MatchDeciderFinal>(qflags, info); return std::make_unique<MatchDeciderThread>(qflags, info);
} }

View File

@ -68,16 +68,16 @@ std::unique_ptr<Xapian::MatchDecider> make_related_decider(QueryFlags qflags,
/** /**
* Make a "final" decider, that is, a MatchDecider that removes all but * Make a "thread" decider, that is, a MatchDecider that removes all but the
* the document excepts for the ones included earlier. * document excepts for the ones found during initial/related searches.
* *
* @param qflags query flags * @param qflags query flags
* @param match_info receives information about the matches. * @param match_info receives information about the matches.
* *
* @return a unique_ptr to a match decider. * @return a unique_ptr to a match decider.
*/ */
std::unique_ptr<Xapian::MatchDecider> make_final_decider (QueryFlags qflags, std::unique_ptr<Xapian::MatchDecider> make_thread_decider (QueryFlags qflags,
DeciderInfo& info); DeciderInfo& info);
} // namepace Mu } // namepace Mu

View File

@ -81,7 +81,7 @@ struct Container {
// That the sub-root-levels of thtreas are always sorted // That the sub-root-levels of thtreas are always sorted
// by date, in ascending order. // by date, in ascending order.
std::string thread_date_key; std::string thread_date_key;
Option<QueryMatch&> query_match; Option<QueryMatch&> query_match;
bool is_nuked{}; bool is_nuked{};
@ -103,7 +103,7 @@ using ContainerVec = Container::ContainerVec;
static std::ostream& static std::ostream&
operator<<(std::ostream& os, const Container& container) operator<<(std::ostream& os, const Container& container)
{ {
os << "container: " << std::right << std::setw(10) << &container os << "container: " << std::right << std::setw(10) << &container
<< ": parent: " << std::right << std::setw(10) << container.parent << ": parent: " << std::right << std::setw(10) << container.parent
<< " [" << container.thread_date_key << "]" << " [" << container.thread_date_key << "]"
<< "\n children: "; << "\n children: ";
@ -122,7 +122,6 @@ operator<<(std::ostream& os, const Container& container)
using IdTable = std::unordered_map<std::string, Container>; using IdTable = std::unordered_map<std::string, Container>;
using DupTable = std::multimap<std::string, Container>; using DupTable = std::multimap<std::string, Container>;
//template <typename QueryResultsType> using DupsVec = std::vector<decltype(QueryResultsType::value_type)>;
static void static void
handle_duplicates (IdTable& id_table, DupTable& dup_table) handle_duplicates (IdTable& id_table, DupTable& dup_table)
@ -165,8 +164,8 @@ determine_id_table (QueryResultsType& qres)
auto c_it = id_table.find(msgid); auto c_it = id_table.find(msgid);
auto& container = [&]()->Container& { auto& container = [&]()->Container& {
if (c_it != id_table.end()) { if (c_it != id_table.end()) {
assert(!c_it->second.query_match); if (!c_it->second.query_match) // hmm, dup?
c_it->second.query_match = mi.query_match(); c_it->second.query_match = mi.query_match();
return c_it->second; return c_it->second;
} else { } else {
// Else: // Else:
@ -218,7 +217,7 @@ determine_id_table (QueryResultsType& qres)
parent_ref_container = ref_container; parent_ref_container = ref_container;
} }
// Add the query_match to the chain. // Add the query_match to the chain.
if (parent_ref_container && !container.parent) { if (parent_ref_container && !container.parent) {
if (!parent_ref_container->is_reachable(&container)) if (!parent_ref_container->is_reachable(&container))
@ -276,13 +275,13 @@ static void
prune (Container* child) prune (Container* child)
{ {
Container *container{child->parent}; Container *container{child->parent};
for (auto& grandchild: child->children) { for (auto& grandchild: child->children) {
grandchild->parent = container; grandchild->parent = container;
if (container) if (container)
container->children.emplace_back(grandchild); container->children.emplace_back(grandchild);
} }
child->children.clear(); child->children.clear();
child->is_nuked = true; child->is_nuked = true;
@ -295,7 +294,7 @@ static bool
prune_empty_containers (Container& container) prune_empty_containers (Container& container)
{ {
Containers to_prune; Containers to_prune;
container.for_each_child([&](auto& child){ container.for_each_child([&](auto& child){
if (prune_empty_containers(*child)) if (prune_empty_containers(*child))
to_prune.emplace_back(child); to_prune.emplace_back(child);
@ -303,7 +302,7 @@ prune_empty_containers (Container& container)
for (auto& child: to_prune) for (auto& child: to_prune)
prune (child); prune (child);
// Never nuke these. // Never nuke these.
if (container.query_match) if (container.query_match)
return false; return false;
@ -328,7 +327,7 @@ static void
prune_empty_containers (IdTable& id_table) prune_empty_containers (IdTable& id_table)
{ {
for (auto&& item: id_table) { for (auto&& item: id_table) {
auto& child(item.second); auto& child(item.second);
if (child.parent) if (child.parent)
continue; // not a root child. continue; // not a root child.
@ -384,7 +383,7 @@ update_container (Container& container, bool descending,
first->query_match->flags |= QueryMatch::Flags::First; first->query_match->flags |= QueryMatch::Flags::First;
Container* last = container.children.back(); Container* last = container.children.back();
if (last->query_match) if (last->query_match)
last->query_match->flags |= QueryMatch::Flags::Last; last->query_match->flags |= QueryMatch::Flags::Last;
} }
// calculate the "thread-subject", which is for UI // calculate the "thread-subject", which is for UI
@ -408,7 +407,7 @@ update_container (Container& container, bool descending,
return true; return true;
} }
static void static void
update_containers (Containers& children, bool descending, ThreadPath& tpath, update_containers (Containers& children, bool descending, ThreadPath& tpath,
@ -450,7 +449,7 @@ static void
sort_container (Container& container) sort_container (Container& container)
{ {
// 1. childless container. // 1. childless container.
if (container.children.empty()) { if (container.children.empty()) {
container.thread_date_key = container.query_match->date_key; container.thread_date_key = container.query_match->date_key;
return; return;
@ -493,12 +492,12 @@ sort_siblings (IdTable& id_table, bool descending)
if (c->query_match) // for debugging, remember if (c->query_match) // for debugging, remember
c->query_match->thread_date = c->thread_date_key; c->query_match->thread_date = c->thread_date_key;
} }
// and then sort the root set. // and then sort the root set.
// //
// The difference with the sub-root containers is that at the top-level, // The difference with the sub-root containers is that at the top-level,
// we can sort either in ascending or descending order, while on the // we can sort either in ascending or descending order, while on the
// subroot level it's always in ascending order. // subroot level it's always in ascending order.
// //
// Note that unless we're testing, _xapian_ will handle // Note that unless we're testing, _xapian_ will handle
// the ascending/descending of the top level. // the ascending/descending of the top level.
@ -524,7 +523,7 @@ operator<<(std::ostream& os, const IdTable& id_table)
os << item.first << " => " << item.second << "\n"; os << item.first << " => " << item.second << "\n";
} }
os << "------------------------------------------------\n"; os << "------------------------------------------------\n";
std::set<std::string> ids; std::set<std::string> ids;
for (auto&& item: id_table) { for (auto&& item: id_table) {
if (item.second.query_match) if (item.second.query_match)
@ -551,7 +550,7 @@ calculate_threads_real (Results& qres, bool descending)
if (g_test_verbose()) if (g_test_verbose())
std::cout << "*** id-table(1):\n" << id_table << "\n"; std::cout << "*** id-table(1):\n" << id_table << "\n";
// // Step 2: get the root set // // Step 2: get the root set
// // Step 3: discard id_table // // Step 3: discard id_table
@ -786,11 +785,11 @@ static void
test_prune_empty_with_children() test_prune_empty_with_children()
{ {
// m6 should be nuked // m6 should be nuked
auto results = MockQueryResults { auto results = MockQueryResults {
MockQueryResult{ "m1", "1", {"m7", "m6"} }, MockQueryResult{ "m1", "1", {"m7", "m6"} },
MockQueryResult{ "m2", "2", {"m7", "m6"} }, MockQueryResult{ "m2", "2", {"m7", "m6"} },
}; };
calculate_threads(results, false); calculate_threads(results, false);
assert_thread_paths (results, { assert_thread_paths (results, {
@ -843,9 +842,9 @@ test_thread_info_descending()
assert_thread_paths (results, { assert_thread_paths (results, {
{ "m1", "0:z"}, // 5 { "m1", "0:z"}, // 5
{ "m2", "1:z" }, // 2 { "m2", "1:z" }, // 2
{ "m4", "1:f:z" }, // 2 { "m4", "1:f:z" }, // 2
{ "m3", "1:e:z"}, // 3 { "m3", "1:e:z"}, // 3
}); });
g_assert_true (results[0].query_match().has_flag( g_assert_true (results[0].query_match().has_flag(
@ -856,7 +855,7 @@ test_thread_info_descending()
QueryMatch::Flags::Last)); QueryMatch::Flags::Last));
g_assert_true (results[3].query_match().has_flag( g_assert_true (results[3].query_match().has_flag(
QueryMatch::Flags::First)); QueryMatch::Flags::First));
} }

View File

@ -49,7 +49,7 @@ struct Query::Private {
const StringSet& thread_ids, const StringSet& thread_ids,
MuMsgFieldId sortfieldid, QueryFlags qflags) const; MuMsgFieldId sortfieldid, QueryFlags qflags) const;
Option<QueryResults> run_threaded (QueryResults &qres, Xapian::Enquire& enq, Option<QueryResults> run_threaded (QueryResults&& qres, Xapian::Enquire& enq,
QueryFlags qflags) const; QueryFlags qflags) const;
Option<QueryResults> run_singular (const std::string& expr, MuMsgFieldId sortfieldid, Option<QueryResults> run_singular (const std::string& expr, MuMsgFieldId sortfieldid,
QueryFlags qflags, size_t maxnum) const; QueryFlags qflags, size_t maxnum) const;
@ -118,32 +118,31 @@ Query::Private::make_related_enquire (const Xapian::Query& first_q,
} }
struct ThreadKeyMaker: public Xapian::KeyMaker { struct ThreadKeyMaker: public Xapian::KeyMaker {
ThreadKeyMaker (const QueryMatches& matches, bool descending): ThreadKeyMaker (const QueryMatches& matches): match_info_(matches) {}
match_info_(matches),
not_found_key_{descending ? "#" : "z"}
{}
std::string operator()(const Xapian::Document& doc) const override { std::string operator()(const Xapian::Document& doc) const override {
const auto it{match_info_.find(doc.get_docid())}; const auto it{match_info_.find(doc.get_docid())};
return (it == match_info_.end()) ? not_found_key_ : it->second.thread_path; if (it == match_info_.end())
g_warning ("not found! %u", doc.get_docid());
return (it == match_info_.end()) ? "" : it->second.thread_path;
} }
const QueryMatches& match_info_; const QueryMatches& match_info_;
const std::string not_found_key_;
}; };
Option<QueryResults> Option<QueryResults>
Query::Private::run_threaded (QueryResults &qres, Xapian::Enquire& enq, Query::Private::run_threaded (QueryResults&& qres, Xapian::Enquire& enq,
QueryFlags qflags) const QueryFlags qflags) const
{ {
const auto descending{any_of(qflags & QueryFlags::Descending)}; const auto descending{any_of(qflags & QueryFlags::Descending)};
calculate_threads(qres, descending); calculate_threads(qres, descending);
ThreadKeyMaker key_maker{qres.query_matches(), descending}; ThreadKeyMaker key_maker{qres.query_matches()};
enq.set_sort_by_key(&key_maker, descending); enq.set_sort_by_key(&key_maker, descending);
DeciderInfo minfo; DeciderInfo minfo;
minfo.matches = qres.query_matches(); minfo.matches = qres.query_matches();
auto mset{enq.get_mset(0, qres.size(), {}, make_final_decider(qflags, minfo).get())}; auto mset{enq.get_mset(0, qres.size(), {},
make_thread_decider(qflags, minfo).get())};
mset.fetch(); mset.fetch();
return QueryResults{mset, std::move(qres.query_matches())}; return QueryResults{mset, std::move(qres.query_matches())};
@ -174,7 +173,7 @@ Query::Private::run_singular (const std::string& expr, MuMsgFieldId sortfieldid,
auto qres{QueryResults{mset, std::move(minfo.matches)}}; auto qres{QueryResults{mset, std::move(minfo.matches)}};
return threading ? run_threaded(qres, enq, qflags) : qres; return threading ? run_threaded(std::move(qres), enq, qflags) : qres;
} }
@ -209,7 +208,7 @@ Query::Private::run_related (const std::string& expr, MuMsgFieldId sortfieldid,
{}, make_related_decider(qflags, minfo).get())}; {}, make_related_decider(qflags, minfo).get())};
auto qres{QueryResults{r_mset, std::move(minfo.matches)}}; auto qres{QueryResults{r_mset, std::move(minfo.matches)}};
return threading ? run_threaded(qres, r_enq, qflags) : qres; return threading ? run_threaded(std::move(qres), r_enq, qflags) : qres;
} }
Option<QueryResults> Option<QueryResults>