* Purple is the legal property of its developers, whose names are too
* numerous to list here. Please refer to the COPYRIGHT file distributed
* with this source distribution
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
/* A single internal (that don't have any children) consists
* of 256 + 4 pointers. That's 1040 bytes on 32-bit machine or 2080 bytes
* Thus, in 10500-byte pool block we can hold about 5-10 internal states.
* Threshold of 100 states means, we'd need 10-20 "small" blocks before
* switching to ~1-2 large blocks.
#define PURPLE_TRIE_LARGE_THRESHOLD 100
#define PURPLE_TRIE_STATES_SMALL_POOL_BLOCK_SIZE 10880
#define PURPLE_TRIE_STATES_LARGE_POOL_BLOCK_SIZE 102400
typedef struct _PurpleTrieRecord PurpleTrieRecord;
typedef struct _PurpleTrieState PurpleTrieState;
typedef struct _PurpleTrieRecordList PurpleTrieRecordList;
* The trie object instance.
PurpleMemoryPool *records_str_mempool;
PurpleMemoryPool *records_obj_mempool;
PurpleTrieRecordList *records;
gsize records_total_size;
PurpleMemoryPool *states_mempool;
PurpleTrieState *root_state;
struct _PurpleTrieRecordList
PurpleTrieRecordList *next;
PurpleTrieRecordList *prev;
PurpleTrieState **children;
PurpleTrieState *longest_suffix;
PurpleTrieRecord *found_word;
PurpleTrieState *root_state;
PurpleTrieReplaceCb replace_cb;
PurpleTrieFindCb find_cb;
/* TODO: an option to make it eager or lazy (now, it's eager) */
static GParamSpec *properties[PROP_LAST];
G_DEFINE_TYPE_WITH_PRIVATE(PurpleTrie, purple_trie, G_TYPE_OBJECT);
/*******************************************************************************
******************************************************************************/
static PurpleTrieRecordList *
purple_record_list_new(PurpleMemoryPool *mpool, PurpleTrieRecord *rec)
PurpleTrieRecordList *node;
node = purple_memory_pool_alloc0(mpool,
sizeof(PurpleTrieRecordList), sizeof(gpointer));
g_return_val_if_fail(node != NULL, NULL);
static PurpleTrieRecordList *
purple_record_list_prepend(PurpleMemoryPool *mpool,
PurpleTrieRecordList *old_head, PurpleTrieRecord *rec)
PurpleTrieRecordList *new_head;
new_head = purple_record_list_new(mpool, rec);
g_return_val_if_fail(new_head != NULL, NULL);
new_head->next = old_head;
old_head->prev = new_head;
static PurpleTrieRecordList *
purple_record_list_copy(PurpleMemoryPool *mpool,
const PurpleTrieRecordList *head)
PurpleTrieRecordList *new_head = NULL, *new_tail = NULL;
PurpleTrieRecordList *node;
node = purple_record_list_new(mpool, head->rec);
g_return_val_if_fail(node != NULL, NULL); /* there is no leak */
static PurpleTrieRecordList *
purple_record_list_remove(PurpleTrieRecordList *head,
PurpleTrieRecordList *node)
g_return_val_if_fail(head != NULL, NULL);
g_return_val_if_fail(node != NULL, head);
g_return_val_if_fail(head->prev == NULL, NULL);
g_return_val_if_fail(node->prev != NULL, NULL);
node->prev->next = node->next;
node->next->prev = node->prev;
/*******************************************************************************
******************************************************************************/
purple_trie_states_cleanup(PurpleTriePrivate *priv)
if (priv->root_state != NULL) {
purple_memory_pool_cleanup(priv->states_mempool);
/* Allocates a state and binds it to the parent. */
purple_trie_state_new(PurpleTriePrivate *priv, PurpleTrieState *parent,
state = purple_memory_pool_alloc0(priv->states_mempool,
sizeof(PurpleTrieState), sizeof(gpointer));
g_return_val_if_fail(state != NULL, NULL);
if (parent->children == NULL) {
parent->children = purple_memory_pool_alloc0(
/* PurpleTrieState *children[G_MAXUCHAR + 1] */
if (parent->children == NULL) {
purple_memory_pool_free(priv->states_mempool, state);
parent->children[character] = state;
purple_trie_states_build(PurpleTriePrivate *priv)
PurpleMemoryPool *reclist_mpool;
PurpleTrieRecordList *reclist, *it;
if (priv->root_state != NULL)
if (priv->records_total_size < PURPLE_TRIE_LARGE_THRESHOLD) {
purple_memory_pool_set_block_size(priv->states_mempool,
PURPLE_TRIE_STATES_SMALL_POOL_BLOCK_SIZE);
purple_memory_pool_set_block_size(priv->states_mempool,
PURPLE_TRIE_STATES_LARGE_POOL_BLOCK_SIZE);
priv->root_state = root = purple_trie_state_new(priv, NULL, '\0');
g_return_val_if_fail(root != NULL, FALSE);
g_assert(root->longest_suffix == NULL);
/* reclist is a list of words not yet added to the trie. Shorter words
* are removed from the list, when they are fully added to the trie. */
reclist_mpool = purple_memory_pool_new();
reclist = purple_record_list_copy(reclist_mpool, priv->records);
/* extra_data on every element of reclist will be a pointer to a trie
* node -- the prefix of the word with len of cur_len */
for (it = reclist; it != NULL; it = it->next) {
/* Iterate over indexes of words -- every loop iteration checks certain
* index of all remaining words. Loop finishes when there are no words
* longer than cur_len. */
for (cur_len = 0; reclist != NULL; cur_len++) {
for (it = reclist; it; it = it->next) {
PurpleTrieRecord *rec = it->rec;
guchar character = rec->word[cur_len];
PurpleTrieState *prefix = it->extra_data;
PurpleTrieState *lon_suf_parent;
g_assert(character != '\0');
if (prefix->children && prefix->children[character]) {
/* Word's prefix is already in the trie, added
prefix = prefix->children[character];
/* We need to create a new branch of trie. */
prefix = purple_trie_state_new(priv, prefix,
g_object_unref(reclist_mpool);
/* prefix is now of length increased by one character. */
/* The whole word is now added to the trie. */
if (rec->word[cur_len + 1] == '\0') {
if (prefix->found_word == NULL)
prefix->found_word = rec;
purple_debug_warning("trie", "found "
"a collision of \"%s\" words",
/* "it" is not modified here, so it->next is
reclist = purple_record_list_remove(reclist, it);
/* We need to fill the longest_suffix field -- a longest
* complete suffix of the prefix we created. We look for
* that suffix in any path starting in root and ending
* in the (cur_len - 1) level of trie. */
if (prefix->longest_suffix != NULL)
lon_suf_parent = prefix->parent->longest_suffix;
if (lon_suf_parent->children &&
lon_suf_parent->children[character])
prefix->longest_suffix = lon_suf_parent->
lon_suf_parent = lon_suf_parent->longest_suffix;
if (prefix->longest_suffix == NULL)
prefix->longest_suffix = root;
if (prefix->found_word == NULL) {
prefix->longest_suffix->found_word;
g_object_unref(reclist_mpool);
/*******************************************************************************
******************************************************************************/
purple_trie_advance(PurpleTrieMachine *m, const guchar character)
/* change state after processing a character */
/* Perfect fit - next character is the same, as the child of the
* prefix we reached so far. */
if (m->state->children && m->state->children[character]) {
m->state = m->state->children[character];
/* We reached root, that's a pity. */
if (m->state == m->root_state)
/* Let's try a bit shorter suffix. */
m->state = m->state->longest_suffix;
purple_trie_replace_do_replacement(PurpleTrieMachine *m, GString *out)
gboolean was_replaced = FALSE;
/* if we reached a "found" state, let's process it */
if (!m->state->found_word)
/* let's get back to the beginning of the word */
g_assert(out->len >= m->state->found_word->word_len - 1);
out->len -= m->state->found_word->word_len - 1;
was_replaced = m->replace_cb(out, m->state->found_word->word,
m->state->found_word->data, m->user_data);
/* output was untouched, revert to the previous position */
if (was_replaced || m->reset_on_match)
m->state = m->root_state;
purple_trie_find_do_discovery(PurpleTrieMachine *m)
/* if we reached a "found" state, let's process it */
if (!m->state->found_word)
was_accepted = m->find_cb(m->state->found_word->word,
m->state->found_word->data, m->user_data);
if (was_accepted && m->reset_on_match)
m->state = m->root_state;
purple_trie_replace(PurpleTrie *trie, const gchar *src,
PurpleTrieReplaceCb replace_cb, gpointer user_data)
PurpleTriePrivate *priv = NULL;
PurpleTrieMachine machine;
g_return_val_if_fail(replace_cb != NULL, g_strdup(src));
g_return_val_if_fail(PURPLE_IS_TRIE(trie), NULL);
priv = purple_trie_get_instance_private(trie);
purple_trie_states_build(priv);
machine.state = priv->root_state;
machine.root_state = priv->root_state;
machine.reset_on_match = priv->reset_on_match;
machine.replace_cb = replace_cb;
machine.user_data = user_data;
out = g_string_new(NULL);
guchar character = src[i++];
purple_trie_advance(&machine, character);
was_replaced = purple_trie_replace_do_replacement(&machine, out);
/* We skipped a character without finding any records,
* let's just copy it to the output. */
g_string_append_c(out, character);
return g_string_free(out, FALSE);
purple_trie_multi_replace(const GSList *tries, const gchar *src,
PurpleTrieReplaceCb replace_cb, gpointer user_data)
guint tries_count, m_idx;
PurpleTrieMachine *machines;
g_return_val_if_fail(replace_cb != NULL, g_strdup(src));
tries_count = g_slist_length((GSList*)tries);
/* Initialize all machines. */
machines = g_new(PurpleTrieMachine, tries_count);
for (i = 0; i < tries_count; i++, tries = tries->next) {
PurpleTrie *trie = tries->data;
PurpleTriePrivate *priv = NULL;
if (!PURPLE_TRIE(trie)) {
priv = purple_trie_get_instance_private(trie);
purple_trie_states_build(priv);
machines[i].state = priv->root_state;
machines[i].root_state = priv->root_state;
machines[i].reset_on_match = priv->reset_on_match;
machines[i].replace_cb = replace_cb;
machines[i].user_data = user_data;
out = g_string_new(NULL);
guchar character = src[i++];
gboolean was_replaced = FALSE;
/* Advance every machine and possibly perform a replacement. */
for (m_idx = 0; m_idx < tries_count; m_idx++) {
purple_trie_advance(&machines[m_idx], character);
was_replaced = purple_trie_replace_do_replacement(
/* We skipped a character without finding any records,
* let's just copy it to the output. */
g_string_append_c(out, character);
/* If we replaced a word, reset _all_ machines */
for (m_idx = 0; m_idx < tries_count; m_idx++) {
machines[m_idx].root_state;
return g_string_free(out, FALSE);
purple_trie_find(PurpleTrie *trie, const gchar *src,
PurpleTrieFindCb find_cb, gpointer user_data)
PurpleTriePrivate *priv = NULL;
PurpleTrieMachine machine;
g_return_val_if_fail(PURPLE_IS_TRIE(trie), 0);
priv = purple_trie_get_instance_private(trie);
purple_trie_states_build(priv);
machine.state = priv->root_state;
machine.root_state = priv->root_state;
machine.reset_on_match = priv->reset_on_match;
machine.find_cb = find_cb;
machine.user_data = user_data;
guchar character = src[i++];
purple_trie_advance(&machine, character);
was_found = purple_trie_find_do_discovery(&machine);
purple_trie_multi_find(const GSList *tries, const gchar *src,
PurpleTrieFindCb find_cb, gpointer user_data)
guint tries_count, m_idx;
PurpleTrieMachine *machines;
tries_count = g_slist_length((GSList*)tries);
/* Initialize all machines. */
machines = g_new(PurpleTrieMachine, tries_count);
for (i = 0; i < tries_count; i++, tries = tries->next) {
PurpleTrie *trie = tries->data;
PurpleTriePrivate *priv = NULL;
if (!PURPLE_IS_TRIE(trie)) {
priv = purple_trie_get_instance_private(trie);
purple_trie_states_build(priv);
machines[i].state = priv->root_state;
machines[i].root_state = priv->root_state;
machines[i].reset_on_match = priv->reset_on_match;
machines[i].find_cb = find_cb;
machines[i].user_data = user_data;
guchar character = src[i++];
gboolean was_found = FALSE;
/* Advance every machine and possibly perform a replacement. */
for (m_idx = 0; m_idx < tries_count; m_idx++) {
purple_trie_advance(&machines[m_idx], character);
purple_trie_find_do_discovery(&machines[m_idx]);
/* If we replaced a word, reset _all_ machines */
for (m_idx = 0; m_idx < tries_count; m_idx++) {
if (!machines[m_idx].reset_on_match)
machines[m_idx].root_state;
/*******************************************************************************
******************************************************************************/
purple_trie_add(PurpleTrie *trie, const gchar *word, gpointer data)
PurpleTriePrivate *priv = NULL;
g_return_val_if_fail(PURPLE_IS_TRIE(trie), FALSE);
g_return_val_if_fail(word != NULL, FALSE);
g_return_val_if_fail(word[0] != '\0', FALSE);
priv = purple_trie_get_instance_private(trie);
if (g_hash_table_lookup(priv->records_map, word) != NULL) {
purple_debug_warning("trie", "record exists: %s", word);
/* Every change in a trie invalidates longest_suffix map.
* These prefixes could be updated instead of cleaning the whole graph.
purple_trie_states_cleanup(priv);
rec = purple_memory_pool_alloc(priv->records_obj_mempool,
sizeof(PurpleTrieRecord), sizeof(gpointer));
rec->word = purple_memory_pool_strdup(priv->records_str_mempool, word);
rec->word_len = strlen(word);
g_assert(rec->word_len > 0);
priv->records_total_size += rec->word_len;
priv->records = purple_record_list_prepend(priv->records_obj_mempool,
g_hash_table_insert(priv->records_map, rec->word, priv->records);
purple_trie_remove(PurpleTrie *trie, const gchar *word)
PurpleTriePrivate *priv = NULL;
PurpleTrieRecordList *it;
g_return_if_fail(PURPLE_IS_TRIE(trie));
g_return_if_fail(word != NULL);
g_return_if_fail(word[0] != '\0');
priv = purple_trie_get_instance_private(trie);
it = g_hash_table_lookup(priv->records_map, word);
/* see purple_trie_add */
purple_trie_states_cleanup(priv);
priv->records_total_size -= it->rec->word_len;
priv->records = purple_record_list_remove(priv->records, it);
g_hash_table_remove(priv->records_map, it->rec->word);
purple_memory_pool_free(priv->records_str_mempool, it->rec->word);
purple_memory_pool_free(priv->records_obj_mempool, it->rec);
purple_memory_pool_free(priv->records_obj_mempool, it);
purple_trie_get_size(PurpleTrie *trie)
PurpleTriePrivate *priv = NULL;
g_return_val_if_fail(PURPLE_IS_TRIE(trie), 0);
priv = purple_trie_get_instance_private(trie);
return g_hash_table_size(priv->records_map);
/*******************************************************************************
******************************************************************************/
purple_trie_get_reset_on_match(PurpleTrie *trie)
PurpleTriePrivate *priv = NULL;
g_return_val_if_fail(PURPLE_IS_TRIE(trie), FALSE);
priv = purple_trie_get_instance_private(trie);
return priv->reset_on_match;
purple_trie_set_reset_on_match(PurpleTrie *trie, gboolean reset)
PurpleTriePrivate *priv = NULL;
g_return_if_fail(PURPLE_IS_TRIE(trie));
priv = purple_trie_get_instance_private(trie);
priv->reset_on_match = reset;
g_object_notify_by_pspec(G_OBJECT(trie), properties[PROP_RESET_ON_MATCH]);
/*******************************************************************************
******************************************************************************/
return g_object_new(PURPLE_TYPE_TRIE, NULL);
purple_trie_init(PurpleTrie *trie)
PurpleTriePrivate *priv = purple_trie_get_instance_private(trie);
priv->records_obj_mempool = purple_memory_pool_new();
priv->records_str_mempool = purple_memory_pool_new();
priv->states_mempool = purple_memory_pool_new();
purple_memory_pool_set_block_size(priv->states_mempool,
PURPLE_TRIE_STATES_SMALL_POOL_BLOCK_SIZE);
priv->records_map = g_hash_table_new(g_str_hash, g_str_equal);
purple_trie_finalize(GObject *obj)
PurpleTriePrivate *priv =
purple_trie_get_instance_private(PURPLE_TRIE(obj));
g_hash_table_destroy(priv->records_map);
g_object_unref(priv->records_obj_mempool);
g_object_unref(priv->records_str_mempool);
g_object_unref(priv->states_mempool);
G_OBJECT_CLASS(purple_trie_parent_class)->finalize(obj);
purple_trie_get_property(GObject *obj, guint param_id, GValue *value,
PurpleTrie *trie = PURPLE_TRIE(obj);
PurpleTriePrivate *priv = purple_trie_get_instance_private(trie);
case PROP_RESET_ON_MATCH:
g_value_set_boolean(value, priv->reset_on_match);
G_OBJECT_WARN_INVALID_PROPERTY_ID(obj, param_id, pspec);
purple_trie_set_property(GObject *obj, guint param_id,
const GValue *value, GParamSpec *pspec)
PurpleTrie *trie = PURPLE_TRIE(obj);
PurpleTriePrivate *priv = purple_trie_get_instance_private(trie);
case PROP_RESET_ON_MATCH:
priv->reset_on_match = g_value_get_boolean(value);
G_OBJECT_WARN_INVALID_PROPERTY_ID(obj, param_id, pspec);
purple_trie_class_init(PurpleTrieClass *klass)
GObjectClass *obj_class = G_OBJECT_CLASS(klass);
obj_class->finalize = purple_trie_finalize;
obj_class->get_property = purple_trie_get_property;
obj_class->set_property = purple_trie_set_property;
properties[PROP_RESET_ON_MATCH] = g_param_spec_boolean("reset-on-match",
"Reset on match", "Determines, if the search state machine "
"should be reset to the initial state on every match. This "
"ensures, that every match is distinct from each other. "
"Please note, that it's not well-defined for a replace "
"operation, so it's better to leave this value default, unless "
"you perform only find operations.", TRUE,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS);
g_object_class_install_properties(obj_class, PROP_LAST, properties);