--- a/libpurple/tests/test_trie.c Wed Mar 26 14:31:26 2014 +0100
+++ b/libpurple/tests/test_trie.c Wed Mar 26 19:38:58 2014 +0100
@@ -7,7 +7,7 @@
test_trie_replace_cb(GString *out, const gchar *word, gpointer word_data,
+ /* the "test" word for the test_trie_replace test */ if ((int)word_data == 0x1001)
@@ -35,10 +35,10 @@
in = "Alice is testing her trie implementation, "
"but she's far away from making test tree overtested";
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)7);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)1); - assert_string_equal("Alice is [7:1002] her [7:1004] [7:1006]ation,"
- " but she's far away from making test [7:1005] [7:1003]", out);
+ assert_string_equal("Alice is [1:1002] her [1:1004] [1:1006]ation," + " but she's far away from making test [1:1005] [1:1003]", out); @@ -57,9 +57,9 @@
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)5);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2); - assert_string_equal("[5:2002]", out);
+ assert_string_equal("[2:2002]", out); @@ -100,7 +100,7 @@
- out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)2);
+ out = purple_trie_replace(trie, in, test_trie_replace_cb, (gpointer)4); assert_string_equal("", out);
@@ -109,6 +109,51 @@
+START_TEST(test_trie_multi_replace) + PurpleTrie *trie1, *trie2, *trie3; + trie1 = purple_trie_new(); + trie2 = purple_trie_new(); + trie3 = purple_trie_new(); + /* appending is not efficient, but we have only 3 elements */ + tries = g_slist_append(tries, trie1); + tries = g_slist_append(tries, trie2); + tries = g_slist_append(tries, trie3); + purple_trie_add(trie1, "test", (gpointer)0x5011); + purple_trie_add(trie1, "trie1", (gpointer)0x5012); + purple_trie_add(trie1, "Alice", (gpointer)0x5013); + purple_trie_add(trie2, "test", (gpointer)0x5021); + purple_trie_add(trie2, "trie2", (gpointer)0x5022); + purple_trie_add(trie2, "example", (gpointer)0x5023); + purple_trie_add(trie2, "Ali", (gpointer)0x5024); + /* "tester" without last (accepting) letter of "test" */ + purple_trie_add(trie3, "teser", (gpointer)0x5031); + purple_trie_add(trie3, "trie3", (gpointer)0x5032); + purple_trie_add(trie3, "tester", (gpointer)0x5033); + purple_trie_add(trie3, "example", (gpointer)0x5034); + purple_trie_add(trie3, "Al", (gpointer)0x5035); + in = "test tester trie trie1 trie2 trie3 example Alice"; + out = purple_trie_multi_replace(tries, in, + test_trie_replace_cb, (gpointer)5); + assert_string_equal("[5:5011] [5:5011]er trie [5:5012] [5:5022] " + "[5:5032] [5:5023] [5:5035]ice", out); + g_slist_free_full(tries, g_object_unref); @@ -119,6 +164,7 @@
tcase_add_test(tc, test_trie_replace_whole);
tcase_add_test(tc, test_trie_replace_inner);
tcase_add_test(tc, test_trie_replace_empty);
+ tcase_add_test(tc, test_trie_multi_replace); --- a/libpurple/trie.c Wed Mar 26 14:31:26 2014 +0100
+++ b/libpurple/trie.c Wed Mar 26 19:38:58 2014 +0100
@@ -74,6 +74,17 @@
PurpleTrieRecord *found_word;
+ PurpleTrieState *state; + PurpleTrieState *root_state; + gboolean reset_on_match; + PurpleTrieReplaceCb replace_cb; /*******************************************************************************
******************************************************************************/
@@ -354,14 +365,63 @@
******************************************************************************/
+purple_trie_replace_advance(PurpleTrieMachine *m, const guchar character) + /* change state after processing a character */ + /* Perfect fit - next character is the same, as the child of the + * prefix we reached so far. */ + if (m->state->children && m->state->children[character]) { + m->state = m->state->children[character]; + /* We reached root, that's a pity. */ + if (m->state == m->root_state) + /* Let's try a bit shorter suffix. */ + m->state = m->state->longest_suffix; +purple_trie_replace_do_replacement(PurpleTrieMachine *m, GString *out) + gboolean was_replaced = FALSE; + /* if we reached a "found" state, let's process it */ + if (!m->state->found_word) + /* let's get back to the beginning of the word */ + g_assert(out->len >= m->state->found_word->word_len - 1); + str_old_len = out->len; + out->len -= m->state->found_word->word_len - 1; + was_replaced = m->replace_cb(out, m->state->found_word->word, + m->state->found_word->data, m->user_data); + /* output was untouched, revert to the previous position */ + out->len = str_old_len; + if (was_replaced || m->reset_on_match) + m->state = m->root_state; purple_trie_replace(PurpleTrie *trie, const gchar *src,
PurpleTrieReplaceCb replace_cb, gpointer user_data)
PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie);
- PurpleTrieState *state;
+ PurpleTrieMachine machine; @@ -371,49 +431,20 @@
purple_trie_states_build(trie);
+ machine.state = priv->root_state; + machine.root_state = priv->root_state; + machine.reset_on_match = priv->reset_on_match; + machine.replace_cb = replace_cb; + machine.user_data = user_data; out = g_string_new(NULL);
- state = priv->root_state;
guchar character = src[i++];
- gboolean was_replaced = FALSE;
- /* change state after processing a character */
- /* Perfect fit - next character is the same, as the
- * child of the prefix we reached so far. */
- if (state->children && state->children[character]) {
- state = state->children[character];
- /* We reached root, that's a pity. */
- if (state == priv->root_state)
- /* Let's try a bit shorter suffix. */
- state = state->longest_suffix;
- /* if we reached a "found" state, let's process it */
- if (state->found_word) {
- /* let's get back to the beginning of the word */
- g_assert(out->len >= state->found_word->word_len - 1);
- str_old_len = out->len;
- out->len -= state->found_word->word_len - 1;
- was_replaced = replace_cb(out, state->found_word->word,
- state->found_word->data, user_data);
- /* output string was untouched, rollback to the
- out->len = str_old_len;
- if (was_replaced || priv->reset_on_match)
- state = priv->root_state;
+ purple_trie_replace_advance(&machine, character); + was_replaced = purple_trie_replace_do_replacement(&machine, out); /* We skipped a character without finding any records,
* let's just copy it to the output. */
@@ -424,6 +455,78 @@
return g_string_free(out, FALSE);
+purple_trie_multi_replace(const GSList *tries, const gchar *src, + PurpleTrieReplaceCb replace_cb, gpointer user_data) + guint tries_count, m_idx; + PurpleTrieMachine *machines; + g_return_val_if_fail(replace_cb != NULL, g_strdup(src)); + tries_count = g_slist_length((GSList*)tries); + /* Initialize all machines. */ + machines = g_new(PurpleTrieMachine, tries_count); + for (i = 0; i < tries_count; i++, tries = tries->next) { + PurpleTrie *trie = tries->data; + PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie); + purple_trie_states_build(trie); + machines[i].state = priv->root_state; + machines[i].root_state = priv->root_state; + machines[i].reset_on_match = priv->reset_on_match; + machines[i].replace_cb = replace_cb; + machines[i].user_data = user_data; + out = g_string_new(NULL); + while (src[i] != '\0') { + guchar character = src[i++]; + gboolean was_replaced = FALSE; + /* Advance every machine and possibly perform a replacement. */ + for (m_idx = 0; m_idx < tries_count; m_idx++) { + purple_trie_replace_advance(&machines[m_idx], character); + was_replaced = purple_trie_replace_do_replacement( + &machines[m_idx], out); + /* We skipped a character without finding any records, + * let's just copy it to the output. */ + g_string_append_c(out, character); + /* If we replaced a word, reset _all_ machines */ + for (m_idx = 0; m_idx < tries_count; m_idx++) { + machines[m_idx].state = + machines[m_idx].root_state; + return g_string_free(out, FALSE); /*******************************************************************************
@@ -483,6 +586,7 @@
******************************************************************************/
+/* TODO: an option to make it eager or lazy (now, it's eager) */ --- a/libpurple/trie.h Wed Mar 26 14:31:26 2014 +0100
+++ b/libpurple/trie.h Wed Mar 26 19:38:58 2014 +0100
@@ -128,7 +128,7 @@
* @replace_cb: The replacement function.
* @user_data: Custom data to be passed to @replace_cb.
- * Looks for all occuriences of words added to @trie, in @src string.
+ * Processes @src string and replaces all occuriences of words added to @trie. * It's O(strlen(src)), if replace_cb runs in O(strlen(word)).
* Returns: resulting string. Must be g_free'd when you are done using it.
@@ -137,6 +137,26 @@
purple_trie_replace(PurpleTrie *trie, const gchar *src,
PurpleTrieReplaceCb replace_cb, gpointer user_data);
+ * purple_trie_multi_replace: + * @tries: The list of tries. + * @src: The source string. + * @replace_cb: The replacement function. + * @user_data: Custom data to be passed to @replace_cb. + * Processes @src and replaces all occuriences of words added to tries in list + * @tries. Entries added to tries on the beginning of the list have higher + * priority, than ones added further. + * Differend GSList's can be combined to possess common parts, so you can create + * Returns: resulting string. Must be g_free'd when you are done using it. +purple_trie_multi_replace(const GSList *tries, const gchar *src, + PurpleTrieReplaceCb replace_cb, gpointer user_data); #endif /* PURPLE_MEMORY_POOL_H */