pidgin/pidgin

Implement and test multi-trie

2014-03-26, Tomasz Wasilczyk
f1310093e434
Parents 4a2cf3314f4b
Children ca4afaddaffa
Implement and test multi-trie
--- a/libpurple/tests/test_trie.c Wed Mar 26 14:31:26 2014 +0100
+++ b/libpurple/tests/test_trie.c Wed Mar 26 19:38:58 2014 +0100
@@ -7,7 +7,7 @@
test_trie_replace_cb(GString *out, const gchar *word, gpointer word_data,
gpointer user_data)
{
- /* the "test" word */
+ /* the "test" word for the test_trie_replace test */
if ((int)word_data == 0x1001)
return FALSE;
@@ -35,10 +35,10 @@
in = "Alice is testing her trie implementation, "
"but she's far away from making test tree overtested";
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)7);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)1);
- assert_string_equal("Alice is [7:1002] her [7:1004] [7:1006]ation,"
- " but she's far away from making test [7:1005] [7:1003]", out);
+ assert_string_equal("Alice is [1:1002] her [1:1004] [1:1006]ation,"
+ " but she's far away from making test [1:1005] [1:1003]", out);
g_object_unref(trie);
g_free(out);
@@ -57,9 +57,9 @@
in = "test";
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)5);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2);
- assert_string_equal("[5:2002]", out);
+ assert_string_equal("[2:2002]", out);
g_object_unref(trie);
g_free(out);
@@ -100,7 +100,7 @@
in = "";
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)4);
assert_string_equal("", out);
@@ -109,6 +109,51 @@
}
END_TEST
+START_TEST(test_trie_multi_replace)
+{
+ PurpleTrie *trie1, *trie2, *trie3;
+ GSList *tries = NULL;
+ const gchar *in;
+ gchar *out;
+
+ trie1 = purple_trie_new();
+ trie2 = purple_trie_new();
+ trie3 = purple_trie_new();
+
+ /* appending is not efficient, but we have only 3 elements */
+ tries = g_slist_append(tries, trie1);
+ tries = g_slist_append(tries, trie2);
+ tries = g_slist_append(tries, trie3);
+
+ purple_trie_add(trie1, "test", (gpointer)0x5011);
+ purple_trie_add(trie1, "trie1", (gpointer)0x5012);
+ purple_trie_add(trie1, "Alice", (gpointer)0x5013);
+
+ purple_trie_add(trie2, "test", (gpointer)0x5021);
+ purple_trie_add(trie2, "trie2", (gpointer)0x5022);
+ purple_trie_add(trie2, "example", (gpointer)0x5023);
+ purple_trie_add(trie2, "Ali", (gpointer)0x5024);
+
+ /* "tester" without last (accepting) letter of "test" */
+ purple_trie_add(trie3, "teser", (gpointer)0x5031);
+ purple_trie_add(trie3, "trie3", (gpointer)0x5032);
+ purple_trie_add(trie3, "tester", (gpointer)0x5033);
+ purple_trie_add(trie3, "example", (gpointer)0x5034);
+ purple_trie_add(trie3, "Al", (gpointer)0x5035);
+
+ in = "test tester trie trie1 trie2 trie3 example Alice";
+
+ out = purple_trie_multi_replace(tries, in,
+ test_trie_replace_cb, (gpointer)5);
+
+ assert_string_equal("[5:5011] [5:5011]er trie [5:5012] [5:5022] "
+ "[5:5032] [5:5023] [5:5035]ice", out);
+
+ g_slist_free_full(tries, g_object_unref);
+ g_free(out);
+}
+END_TEST
+
Suite *
purple_trie_suite(void)
{
@@ -119,6 +164,7 @@
tcase_add_test(tc, test_trie_replace_whole);
tcase_add_test(tc, test_trie_replace_inner);
tcase_add_test(tc, test_trie_replace_empty);
+ tcase_add_test(tc, test_trie_multi_replace);
suite_add_tcase(s, tc);
--- a/libpurple/trie.c Wed Mar 26 14:31:26 2014 +0100
+++ b/libpurple/trie.c Wed Mar 26 19:38:58 2014 +0100
@@ -74,6 +74,17 @@
PurpleTrieRecord *found_word;
};
+typedef struct
+{
+ PurpleTrieState *state;
+
+ PurpleTrieState *root_state;
+ gboolean reset_on_match;
+
+ PurpleTrieReplaceCb replace_cb;
+ gpointer user_data;
+} PurpleTrieMachine;
+
/*******************************************************************************
* Records list
******************************************************************************/
@@ -354,14 +365,63 @@
* Searching
******************************************************************************/
+static void
+purple_trie_replace_advance(PurpleTrieMachine *m, const guchar character)
+{
+ /* change state after processing a character */
+ while (TRUE) {
+ /* Perfect fit - next character is the same, as the child of the
+ * prefix we reached so far. */
+ if (m->state->children && m->state->children[character]) {
+ m->state = m->state->children[character];
+ break;
+ }
+
+ /* We reached root, that's a pity. */
+ if (m->state == m->root_state)
+ break;
+
+ /* Let's try a bit shorter suffix. */
+ m->state = m->state->longest_suffix;
+ }
+}
+
+static gboolean
+purple_trie_replace_do_replacement(PurpleTrieMachine *m, GString *out)
+{
+ gboolean was_replaced = FALSE;
+ gsize str_old_len;
+
+ /* if we reached a "found" state, let's process it */
+ if (!m->state->found_word)
+ return FALSE;
+
+ /* let's get back to the beginning of the word */
+ g_assert(out->len >= m->state->found_word->word_len - 1);
+ str_old_len = out->len;
+ out->len -= m->state->found_word->word_len - 1;
+
+ was_replaced = m->replace_cb(out, m->state->found_word->word,
+ m->state->found_word->data, m->user_data);
+
+ /* output was untouched, revert to the previous position */
+ if (!was_replaced)
+ out->len = str_old_len;
+
+ if (was_replaced || m->reset_on_match)
+ m->state = m->root_state;
+
+ return was_replaced;
+}
+
gchar *
purple_trie_replace(PurpleTrie *trie, const gchar *src,
PurpleTrieReplaceCb replace_cb, gpointer user_data)
{
PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
- PurpleTrieState *state;
- gsize i = 0;
+ PurpleTrieMachine machine;
GString *out;
+ gsize i;
if (src == NULL)
return NULL;
@@ -371,49 +431,20 @@
purple_trie_states_build(trie);
+ machine.state = priv->root_state;
+ machine.root_state = priv->root_state;
+ machine.reset_on_match = priv->reset_on_match;
+ machine.replace_cb = replace_cb;
+ machine.user_data = user_data;
+
out = g_string_new(NULL);
- state = priv->root_state;
+ i = 0;
while (src[i] != '\0') {
guchar character = src[i++];
- gboolean was_replaced = FALSE;
-
- /* change state after processing a character */
- while (TRUE) {
- /* Perfect fit - next character is the same, as the
- * child of the prefix we reached so far. */
- if (state->children && state->children[character]) {
- state = state->children[character];
- break;
- }
-
- /* We reached root, that's a pity. */
- if (state == priv->root_state)
- break;
-
- /* Let's try a bit shorter suffix. */
- state = state->longest_suffix;
- }
+ gboolean was_replaced;
- /* if we reached a "found" state, let's process it */
- if (state->found_word) {
- gsize str_old_len;
-
- /* let's get back to the beginning of the word */
- g_assert(out->len >= state->found_word->word_len - 1);
- str_old_len = out->len;
- out->len -= state->found_word->word_len - 1;
-
- was_replaced = replace_cb(out, state->found_word->word,
- state->found_word->data, user_data);
-
- /* output string was untouched, rollback to the
- * previous position*/
- if (!was_replaced)
- out->len = str_old_len;
-
- if (was_replaced || priv->reset_on_match)
- state = priv->root_state;
- }
+ purple_trie_replace_advance(&machine, character);
+ was_replaced = purple_trie_replace_do_replacement(&machine, out);
/* We skipped a character without finding any records,
* let's just copy it to the output. */
@@ -424,6 +455,78 @@
return g_string_free(out, FALSE);
}
+gchar *
+purple_trie_multi_replace(const GSList *tries, const gchar *src,
+ PurpleTrieReplaceCb replace_cb, gpointer user_data)
+{
+ guint tries_count, m_idx;
+ PurpleTrieMachine *machines;
+ GString *out;
+ gsize i;
+
+ if (src == NULL)
+ return NULL;
+
+ g_return_val_if_fail(replace_cb != NULL, g_strdup(src));
+
+ tries_count = g_slist_length((GSList*)tries);
+ if (tries_count == 0)
+ return g_strdup(src);
+
+ /* Initialize all machines. */
+ machines = g_new(PurpleTrieMachine, tries_count);
+ for (i = 0; i < tries_count; i++, tries = tries->next) {
+ PurpleTrie *trie = tries->data;
+ PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
+
+ if (priv == NULL) {
+ g_warn_if_reached();
+ g_free(machines);
+ return NULL;
+ }
+
+ purple_trie_states_build(trie);
+
+ machines[i].state = priv->root_state;
+ machines[i].root_state = priv->root_state;
+ machines[i].reset_on_match = priv->reset_on_match;
+ machines[i].replace_cb = replace_cb;
+ machines[i].user_data = user_data;
+ }
+
+ out = g_string_new(NULL);
+ i = 0;
+ while (src[i] != '\0') {
+ guchar character = src[i++];
+ gboolean was_replaced = FALSE;
+
+ /* Advance every machine and possibly perform a replacement. */
+ for (m_idx = 0; m_idx < tries_count; m_idx++) {
+ purple_trie_replace_advance(&machines[m_idx], character);
+ if (was_replaced)
+ continue;
+ was_replaced = purple_trie_replace_do_replacement(
+ &machines[m_idx], out);
+ }
+
+ /* We skipped a character without finding any records,
+ * let's just copy it to the output. */
+ if (!was_replaced)
+ g_string_append_c(out, character);
+
+ /* If we replaced a word, reset _all_ machines */
+ if (was_replaced) {
+ for (m_idx = 0; m_idx < tries_count; m_idx++) {
+ machines[m_idx].state =
+ machines[m_idx].root_state;
+ }
+ }
+ }
+
+ g_free(machines);
+ return g_string_free(out, FALSE);
+}
+
/*******************************************************************************
* Records
@@ -483,6 +586,7 @@
* Object stuff
******************************************************************************/
+/* TODO: an option to make it eager or lazy (now, it's eager) */
enum
{
PROP_ZERO,
--- a/libpurple/trie.h Wed Mar 26 14:31:26 2014 +0100
+++ b/libpurple/trie.h Wed Mar 26 19:38:58 2014 +0100
@@ -128,7 +128,7 @@
* @replace_cb: The replacement function.
* @user_data: Custom data to be passed to @replace_cb.
*
- * Looks for all occuriences of words added to @trie, in @src string.
+ * Processes @src string and replaces all occuriences of words added to @trie.
* It's O(strlen(src)), if replace_cb runs in O(strlen(word)).
*
* Returns: resulting string. Must be g_free'd when you are done using it.
@@ -137,6 +137,26 @@
purple_trie_replace(PurpleTrie *trie, const gchar *src,
PurpleTrieReplaceCb replace_cb, gpointer user_data);
+/**
+ * purple_trie_multi_replace:
+ * @tries: The list of tries.
+ * @src: The source string.
+ * @replace_cb: The replacement function.
+ * @user_data: Custom data to be passed to @replace_cb.
+ *
+ * Processes @src and replaces all occuriences of words added to tries in list
+ * @tries. Entries added to tries on the beginning of the list have higher
+ * priority, than ones added further.
+ *
+ * Differend GSList's can be combined to possess common parts, so you can create
+ * a "tree of tries".
+ *
+ * Returns: resulting string. Must be g_free'd when you are done using it.
+ */
+gchar *
+purple_trie_multi_replace(const GSList *tries, const gchar *src,
+ PurpleTrieReplaceCb replace_cb, gpointer user_data);
+
G_END_DECLS
#endif /* PURPLE_MEMORY_POOL_H */