From a69cacf919fb37d63c3b4746248304de3f91373f Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Mon, 13 Jun 2011 23:04:20 +0300 Subject: [PATCH] * WIP: mu-msg-threader, and hook it up with msg-iter --- src/Makefile.am | 2 + src/mu-msg-iter.cc | 25 ++-- src/mu-msg-threader.c | 312 ++++++++++++++++++++++++++++++++++++++++++ src/mu-msg-threader.h | 40 ++++++ 4 files changed, 370 insertions(+), 9 deletions(-) create mode 100644 src/mu-msg-threader.c create mode 100644 src/mu-msg-threader.h diff --git a/src/Makefile.am b/src/Makefile.am index 05d9d990..a788ee87 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -81,6 +81,8 @@ libmu_la_SOURCES= \ mu-msg-prio.c \ mu-msg-prio.h \ mu-msg-priv.h \ + mu-msg-threader.c \ + mu-msg-threader.h \ mu-msg.c \ mu-msg.h \ mu-msg.h \ diff --git a/src/mu-msg-iter.cc b/src/mu-msg-iter.cc index c036eef9..8333dfe9 100644 --- a/src/mu-msg-iter.cc +++ b/src/mu-msg-iter.cc @@ -27,6 +27,8 @@ #include "mu-util.h" #include "mu-msg.h" #include "mu-msg-iter.h" +#include "mu-msg-threader.h" + /* just a guess... */ #define MAX_FETCH_SIZE 10000 @@ -36,7 +38,7 @@ static gboolean update_msg (MuMsgIter *iter); struct _MuMsgIter { _MuMsgIter (const Xapian::Enquire &enq, size_t maxnum): - _enq(enq), _msg(0) { + _enq(enq), _msg(0), _threader(0) { _matches = _enq.get_mset (0, maxnum); _cursor = _matches.begin(); @@ -46,21 +48,27 @@ struct _MuMsgIter { if (_matches.size() <= MAX_FETCH_SIZE) _matches.fetch (); - if (!_matches.empty()) + if (!_matches.empty()) { update_msg (this); + _threader = mu_msg_threader_new (); + mu_msg_threader_calculate (_threader, this); + } } ~_MuMsgIter () { if (_msg) mu_msg_unref (_msg); + + mu_msg_threader_destroy (_threader); } - const Xapian::Enquire _enq; - Xapian::MSet _matches; - Xapian::MSet::const_iterator _cursor; + const Xapian::Enquire _enq; + Xapian::MSet _matches; + Xapian::MSet::const_iterator _cursor; + Xapian::Document _doc; - Xapian::Document _doc; - MuMsg *_msg; + MuMsg *_msg; + MuMsgThreader *_threader; }; @@ -82,8 +90,7 @@ mu_msg_iter_destroy (MuMsgIter *iter) { try { delete iter; } MU_XAPIAN_CATCH_BLOCK; } - - + MuMsg* mu_msg_iter_get_msg (MuMsgIter *iter, GError **err) { diff --git a/src/mu-msg-threader.c b/src/mu-msg-threader.c new file mode 100644 index 00000000..57258980 --- /dev/null +++ b/src/mu-msg-threader.c @@ -0,0 +1,312 @@ +/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/ + +/* +** Copyright (C) 2011 Dirk-Jan C. Binnema +** +** This program is free software; you can redistribute it and/or modify it +** under the terms of the GNU General Public License as published by the +** Free Software Foundation; either version 3, or (at your option) any +** later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software Foundation, +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** +*/ + +#include "mu-msg-threader.h" + +struct _MuMsgThreader { + GHashTable *_ids; +}; + +struct _Container { + MuMsg *_msg; + struct _Container *_parent; + GSList *_children; +}; +typedef struct _Container Container; + +static Container *container_new (MuMsg *msg); +static void container_destroy (Container *c); + + +/* breath-first recursive traversal */ +typedef gboolean (*ContainerTraverseFunc) (Container *c, gpointer user_data); +static gboolean container_traverse (Container *c, ContainerTraverseFunc func, + gpointer user_data); +static gboolean container_traverse_list (GSList *containers, ContainerTraverseFunc func, + gpointer user_data); +static void container_add_child (Container *c, Container *child); + +MuMsgThreader * +mu_msg_threader_new (void) +{ + MuMsgThreader *self; + + self = g_slice_new (MuMsgThreader); + self->_ids = g_hash_table_new_full (g_str_hash, + g_str_equal, + NULL, /* we don't copy msgid */ + (GDestroyNotify)container_destroy); + return self; +} + +void +mu_msg_threader_destroy (MuMsgThreader *self) +{ + if (!self) + return; + + g_hash_table_destroy (self->_ids); + + g_slice_free (MuMsgThreader, self); +} + + +static Container *store_msg_in_container (GHashTable *ids, MuMsg *msg); +static void handle_references (GHashTable *ids, Container *c); +static Container* find_or_create (GHashTable *ids, char *msgid); +static GSList *find_root_set (GHashTable *ids); + +/* msg threading algorithm, based on JWZ's algorithm, + * http://www.jwz.org/doc/threading.html */ +gboolean +mu_msg_threader_calculate (MuMsgThreader *self, MuMsgIter *iter) +{ + g_return_val_if_fail (iter, FALSE); + + /* 1. for all messages... */ + for (mu_msg_iter_reset (iter); !mu_msg_iter_is_done (iter); + mu_msg_iter_next (iter)) { + + Container *c; + GSList *root_set; + + /* 1.A */ + c = store_msg_in_container (self->_ids, + mu_msg_iter_get_msg (iter, NULL)); + if (!c) { + g_printerr ("failed to create container\n"); + continue; + } + + /* 1.B and C */ + handle_references (self->_ids, c); + + /* step 2 */ + root_set = find_root_set (self->_ids); + { + GSList *cur; + for (cur = root_set; cur; cur = g_slist_next (cur)) { + MuMsg *msg; + msg = ((Container*)cur->data)->_msg; + if (!msg) + g_printerr ("\n"); + else { + const gchar *subj; + subj = mu_msg_get_subject (msg); + g_printerr ("%s\n", subj ? subj : ""); + } + } + + } + + + + } + + mu_msg_iter_reset (iter); /* go all the way back */ + + return TRUE; +} + + +static Container* /* 1A */ +store_msg_in_container (GHashTable *ids, MuMsg *msg) +{ + /* 1.A */ + Container *c; + const char *msgid; + + msgid = mu_msg_get_msgid (msg); + + g_return_val_if_fail (msgid, NULL); + + c = find_or_create (ids, (gchar*)msgid); + + if (!c->_msg) + c->_msg = mu_msg_ref (msg); + else + g_printerr ("%s: duplicate msgid; ignore\n", + __FUNCTION__); /* FIXME */ + return c; +} + + + +static void +set_parent_child (Container *parent, Container *child) +{ + /* FIXME: set relationship, but first check if they're not + * already linked */ + + container_add_child (parent, child); + child->_parent = parent; + + g_printerr ("%s: %p is a parent of %p\n", __FUNCTION__, + (gpointer)parent, (gpointer)child); +} + + + +static void /* 1B */ +handle_references (GHashTable *ids, Container *c) +{ + const gchar *refsstr; + gchar **refs, **cur; + + refsstr = mu_msg_get_references_str (c->_msg); + if (!refsstr) + return; /* nothing to do */ + + refs = g_strsplit (refsstr,",",-1); + + /* go over over our list of refs */ + for (cur = refs; *cur && *(cur + 1); ++cur) { + Container *c1, *c2; /* two consecutive refs in the list; + * we register them as parent, child */ + c1 = find_or_create (ids, *cur); + c2 = find_or_create (ids, *(cur + 1)); + + set_parent_child (c1, c2); + } + + /* now cur points to the final ref; we register that with + * ourselves...*/ + set_parent_child (find_or_create (ids, *cur), c); + + g_strfreev (refs); +} + + + +/* find a container for the given msgid; if it does not exist yet, + * create a new one, and register it */ +static Container* +find_or_create (GHashTable *ids, char *msgid) +{ + Container *c; + + g_return_val_if_fail (msgid, NULL); + + c = g_hash_table_lookup (ids, msgid); + if (c) { /* we found the container? */ + g_printerr ("%s: found %s\n", __FUNCTION__, msgid); + return c; + } + + /* no container yet, create it */ + c = container_new (NULL); + + /* no need to copy msgid, as the MuMsg will be around still, + * owning the const gchar* FIXME we must g_strdup for some + * reason */ + g_hash_table_insert (ids, g_strdup(msgid), c); + g_printerr ("%s: created %s\n", __FUNCTION__, msgid); + + return c; +} + +static void +filter_parentless (const gchar *msgid, Container *c, GSList **lst) +{ + if (!c->_parent) + *lst = g_slist_prepend (*lst, c); +} + + + +static GSList * +find_root_set (GHashTable *ids) +{ + GSList *lst; + + lst = NULL; + g_hash_table_foreach (ids, (GHFunc)filter_parentless, &lst); + + return lst; +} + + + + +static Container* +container_new (MuMsg *msg) +{ + Container *c; + + c = g_slice_new0 (Container); + if (msg) + c->_msg = mu_msg_ref (msg); + + return c; +} + +static void +container_destroy (Container *c) +{ + if (!c) + return; + + if (c->_msg) + mu_msg_unref (c->_msg); + + //g_slice_free (Container, c); +} + + +/* depth-first traverse all children, grand-children ... n-children of + * container */ +static gboolean +container_traverse (Container *c, ContainerTraverseFunc func, + gpointer user_data) +{ + /* if func returns FALSE, quit */ + if (!func (c, user_data)) + return FALSE; + + if (c->_children) + return container_traverse_list (c->_children, + func, user_data); + return TRUE; +} + + +static gboolean +container_traverse_list (GSList *containers, ContainerTraverseFunc func, + gpointer user_data) +{ + GSList *cur; + + for (cur = containers; cur; cur = g_slist_next (cur)) + if (!container_traverse ((Container*)cur->data, func, user_data)) + return FALSE; /* quit */ + return TRUE; +} + + +static void +container_add_child (Container *c, Container *child) +{ + g_return_if_fail (c != child); + g_return_if_fail (child); + + c->_children = g_slist_prepend (c->_children, child); +} + diff --git a/src/mu-msg-threader.h b/src/mu-msg-threader.h new file mode 100644 index 00000000..b14953e5 --- /dev/null +++ b/src/mu-msg-threader.h @@ -0,0 +1,40 @@ +/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/ + +/* +** Copyright (C) 2011 Dirk-Jan C. Binnema +** +** This program is free software; you can redistribute it and/or modify it +** under the terms of the GNU General Public License as published by the +** Free Software Foundation; either version 3, or (at your option) any +** later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software Foundation, +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** +*/ + + +#ifndef __MU_MSG_THREADER_H__ +#define __MU_MSG_THREADER_H__ + +#include +#include + +G_BEGIN_DECLS + +struct _MuMsgThreader; +typedef struct _MuMsgThreader MuMsgThreader; + +MuMsgThreader *mu_msg_threader_new (void); +void mu_msg_threader_destroy (MuMsgThreader *self); +gboolean mu_msg_threader_calculate (MuMsgThreader *self, MuMsgIter *iter); + +G_END_DECLS + +#endif /*__MU_MSG_THREADER_H__*/