From 579f841bddf8fa30bf41d7059a569e3910482e7d Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Mon, 15 Feb 2021 19:11:22 +0200 Subject: [PATCH] query: filter out docs without query-matches This avoid including unwanted messages in threaded results. Also some cleanups. --- lib/mu-query-match-deciders.cc | 17 ++++++------- lib/mu-query-match-deciders.hh | 8 +++--- lib/mu-query-threads.cc | 45 +++++++++++++++++----------------- lib/mu-query.cc | 23 +++++++++-------- 4 files changed, 44 insertions(+), 49 deletions(-) diff --git a/lib/mu-query-match-deciders.cc b/lib/mu-query-match-deciders.cc index e2e17bb4..9f5a8aeb 100644 --- a/lib/mu-query-match-deciders.cc +++ b/lib/mu-query-match-deciders.cc @@ -102,7 +102,7 @@ private: } MU_XAPIAN_CATCH_BLOCK_RETURN (Nothing); }; -struct MatchDeciderLeader: public MatchDecider { +struct MatchDeciderLeader final: public MatchDecider { MatchDeciderLeader (QueryFlags qflags, DeciderInfo& info): MatchDecider(qflags, info) {} @@ -155,7 +155,7 @@ Mu::make_leader_decider (QueryFlags qflags, DeciderInfo& info) return std::make_unique(qflags, info); } -struct MatchDeciderRelated: public MatchDecider { +struct MatchDeciderRelated final: public MatchDecider { MatchDeciderRelated(QueryFlags qflags, DeciderInfo& info): MatchDecider(qflags, info) {} /** @@ -200,8 +200,8 @@ Mu::make_related_decider (QueryFlags qflags, DeciderInfo& info) return std::make_unique(qflags, info); } -struct MatchDeciderFinal: public MatchDecider { - MatchDeciderFinal(QueryFlags qflags, DeciderInfo& info): +struct MatchDeciderThread final: public MatchDecider { + MatchDeciderThread(QueryFlags qflags, DeciderInfo& info): MatchDecider{qflags, info} {} /** * operator() @@ -219,16 +219,13 @@ struct MatchDeciderFinal: public MatchDecider { // we may have seen this match in the "Leader" query, // or in the second (unbuounded) related query; const auto it{decider_info_.matches.find(doc.get_docid())}; - if (it == decider_info_.matches.end()) - return false; - else - return should_include(it->second); + return it != decider_info_.matches.end() && !it->second.thread_path.empty(); } }; std::unique_ptr -Mu::make_final_decider (QueryFlags qflags, DeciderInfo& info) +Mu::make_thread_decider (QueryFlags qflags, DeciderInfo& info) { - return std::make_unique(qflags, info); + return std::make_unique(qflags, info); } diff --git a/lib/mu-query-match-deciders.hh b/lib/mu-query-match-deciders.hh index f9f42f24..8a1c8a25 100644 --- a/lib/mu-query-match-deciders.hh +++ b/lib/mu-query-match-deciders.hh @@ -68,16 +68,16 @@ std::unique_ptr make_related_decider(QueryFlags qflags, /** - * Make a "final" decider, that is, a MatchDecider that removes all but - * the document excepts for the ones included earlier. + * Make a "thread" decider, that is, a MatchDecider that removes all but the + * document excepts for the ones found during initial/related searches. * * @param qflags query flags * @param match_info receives information about the matches. * * @return a unique_ptr to a match decider. */ -std::unique_ptr make_final_decider (QueryFlags qflags, - DeciderInfo& info); +std::unique_ptr make_thread_decider (QueryFlags qflags, + DeciderInfo& info); } // namepace Mu diff --git a/lib/mu-query-threads.cc b/lib/mu-query-threads.cc index 00f94595..3c888f16 100644 --- a/lib/mu-query-threads.cc +++ b/lib/mu-query-threads.cc @@ -81,7 +81,7 @@ struct Container { // That the sub-root-levels of thtreas are always sorted // by date, in ascending order. std::string thread_date_key; - + Option query_match; bool is_nuked{}; @@ -103,7 +103,7 @@ using ContainerVec = Container::ContainerVec; static std::ostream& operator<<(std::ostream& os, const Container& container) { - os << "container: " << std::right << std::setw(10) << &container + os << "container: " << std::right << std::setw(10) << &container << ": parent: " << std::right << std::setw(10) << container.parent << " [" << container.thread_date_key << "]" << "\n children: "; @@ -122,7 +122,6 @@ operator<<(std::ostream& os, const Container& container) using IdTable = std::unordered_map; using DupTable = std::multimap; -//template using DupsVec = std::vector; static void handle_duplicates (IdTable& id_table, DupTable& dup_table) @@ -165,8 +164,8 @@ determine_id_table (QueryResultsType& qres) auto c_it = id_table.find(msgid); auto& container = [&]()->Container& { if (c_it != id_table.end()) { - assert(!c_it->second.query_match); - c_it->second.query_match = mi.query_match(); + if (!c_it->second.query_match) // hmm, dup? + c_it->second.query_match = mi.query_match(); return c_it->second; } else { // Else: @@ -218,7 +217,7 @@ determine_id_table (QueryResultsType& qres) parent_ref_container = ref_container; } - + // Add the query_match to the chain. if (parent_ref_container && !container.parent) { if (!parent_ref_container->is_reachable(&container)) @@ -276,13 +275,13 @@ static void prune (Container* child) { Container *container{child->parent}; - + for (auto& grandchild: child->children) { grandchild->parent = container; if (container) container->children.emplace_back(grandchild); } - + child->children.clear(); child->is_nuked = true; @@ -295,7 +294,7 @@ static bool prune_empty_containers (Container& container) { Containers to_prune; - + container.for_each_child([&](auto& child){ if (prune_empty_containers(*child)) to_prune.emplace_back(child); @@ -303,7 +302,7 @@ prune_empty_containers (Container& container) for (auto& child: to_prune) prune (child); - + // Never nuke these. if (container.query_match) return false; @@ -328,7 +327,7 @@ static void prune_empty_containers (IdTable& id_table) { for (auto&& item: id_table) { - + auto& child(item.second); if (child.parent) continue; // not a root child. @@ -384,7 +383,7 @@ update_container (Container& container, bool descending, first->query_match->flags |= QueryMatch::Flags::First; Container* last = container.children.back(); if (last->query_match) - last->query_match->flags |= QueryMatch::Flags::Last; + last->query_match->flags |= QueryMatch::Flags::Last; } // calculate the "thread-subject", which is for UI @@ -408,7 +407,7 @@ update_container (Container& container, bool descending, return true; } - + static void update_containers (Containers& children, bool descending, ThreadPath& tpath, @@ -450,7 +449,7 @@ static void sort_container (Container& container) { // 1. childless container. - + if (container.children.empty()) { container.thread_date_key = container.query_match->date_key; return; @@ -493,12 +492,12 @@ sort_siblings (IdTable& id_table, bool descending) if (c->query_match) // for debugging, remember c->query_match->thread_date = c->thread_date_key; } - + // and then sort the root set. // // The difference with the sub-root containers is that at the top-level, // we can sort either in ascending or descending order, while on the - // subroot level it's always in ascending order. + // subroot level it's always in ascending order. // // Note that unless we're testing, _xapian_ will handle // the ascending/descending of the top level. @@ -524,7 +523,7 @@ operator<<(std::ostream& os, const IdTable& id_table) os << item.first << " => " << item.second << "\n"; } os << "------------------------------------------------\n"; - + std::set ids; for (auto&& item: id_table) { if (item.second.query_match) @@ -551,7 +550,7 @@ calculate_threads_real (Results& qres, bool descending) if (g_test_verbose()) std::cout << "*** id-table(1):\n" << id_table << "\n"; - + // // Step 2: get the root set // // Step 3: discard id_table @@ -786,11 +785,11 @@ static void test_prune_empty_with_children() { // m6 should be nuked - auto results = MockQueryResults { + auto results = MockQueryResults { MockQueryResult{ "m1", "1", {"m7", "m6"} }, MockQueryResult{ "m2", "2", {"m7", "m6"} }, }; - + calculate_threads(results, false); assert_thread_paths (results, { @@ -843,9 +842,9 @@ test_thread_info_descending() assert_thread_paths (results, { { "m1", "0:z"}, // 5 - { "m2", "1:z" }, // 2 + { "m2", "1:z" }, // 2 { "m4", "1:f:z" }, // 2 - { "m3", "1:e:z"}, // 3 + { "m3", "1:e:z"}, // 3 }); g_assert_true (results[0].query_match().has_flag( @@ -856,7 +855,7 @@ test_thread_info_descending() QueryMatch::Flags::Last)); g_assert_true (results[3].query_match().has_flag( QueryMatch::Flags::First)); - + } diff --git a/lib/mu-query.cc b/lib/mu-query.cc index dd91aa71..b98f77ac 100644 --- a/lib/mu-query.cc +++ b/lib/mu-query.cc @@ -49,7 +49,7 @@ struct Query::Private { const StringSet& thread_ids, MuMsgFieldId sortfieldid, QueryFlags qflags) const; - Option run_threaded (QueryResults &qres, Xapian::Enquire& enq, + Option run_threaded (QueryResults&& qres, Xapian::Enquire& enq, QueryFlags qflags) const; Option run_singular (const std::string& expr, MuMsgFieldId sortfieldid, QueryFlags qflags, size_t maxnum) const; @@ -118,32 +118,31 @@ Query::Private::make_related_enquire (const Xapian::Query& first_q, } struct ThreadKeyMaker: public Xapian::KeyMaker { - ThreadKeyMaker (const QueryMatches& matches, bool descending): - match_info_(matches), - not_found_key_{descending ? "#" : "z"} - {} + ThreadKeyMaker (const QueryMatches& matches): match_info_(matches) {} std::string operator()(const Xapian::Document& doc) const override { const auto it{match_info_.find(doc.get_docid())}; - return (it == match_info_.end()) ? not_found_key_ : it->second.thread_path; + if (it == match_info_.end()) + g_warning ("not found! %u", doc.get_docid()); + return (it == match_info_.end()) ? "" : it->second.thread_path; } const QueryMatches& match_info_; - const std::string not_found_key_; }; Option -Query::Private::run_threaded (QueryResults &qres, Xapian::Enquire& enq, +Query::Private::run_threaded (QueryResults&& qres, Xapian::Enquire& enq, QueryFlags qflags) const { const auto descending{any_of(qflags & QueryFlags::Descending)}; calculate_threads(qres, descending); - ThreadKeyMaker key_maker{qres.query_matches(), descending}; + ThreadKeyMaker key_maker{qres.query_matches()}; enq.set_sort_by_key(&key_maker, descending); DeciderInfo minfo; minfo.matches = qres.query_matches(); - auto mset{enq.get_mset(0, qres.size(), {}, make_final_decider(qflags, minfo).get())}; + auto mset{enq.get_mset(0, qres.size(), {}, + make_thread_decider(qflags, minfo).get())}; mset.fetch(); return QueryResults{mset, std::move(qres.query_matches())}; @@ -174,7 +173,7 @@ Query::Private::run_singular (const std::string& expr, MuMsgFieldId sortfieldid, auto qres{QueryResults{mset, std::move(minfo.matches)}}; - return threading ? run_threaded(qres, enq, qflags) : qres; + return threading ? run_threaded(std::move(qres), enq, qflags) : qres; } @@ -209,7 +208,7 @@ Query::Private::run_related (const std::string& expr, MuMsgFieldId sortfieldid, {}, make_related_decider(qflags, minfo).get())}; auto qres{QueryResults{r_mset, std::move(minfo.matches)}}; - return threading ? run_threaded(qres, r_enq, qflags) : qres; + return threading ? run_threaded(std::move(qres), r_enq, qflags) : qres; } Option