Pull the purple_markup_* api out of util.[ch] to purplemarkup.[ch]. No code was changed just moved it from one file to the other.
Testing Done:
Compile and unit tests.
Reviewed at https://reviews.imfreedom.org/r/171/
--- a/doc/reference/libpurple/libpurple-docs.xml Thu Oct 15 20:25:08 2020 -0500
+++ b/doc/reference/libpurple/libpurple-docs.xml Fri Oct 16 02:27:21 2020 -0500
@@ -72,6 +72,7 @@
<xi:include href="xml/purplecredentialprovider.xml" />
<xi:include href="xml/purpleimconversation.xml" />
<xi:include href="xml/purplekeyvaluepair.xml" />
+ <xi:include href="xml/purplemarkup.xml" /> <xi:include href="xml/purpleprotocolfactory.xml" />
<xi:include href="xml/purpleprotocolim.xml" />
<xi:include href="xml/purpleprotocolmedia.xml" />
--- a/libpurple/cmds.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/cmds.c Fri Oct 16 02:27:21 2020 -0500
@@ -19,7 +19,7 @@
+#include "purplemarkup.h" static PurpleCommandsUiOps *cmds_ui_ops = NULL;
--- a/libpurple/conversation.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/conversation.c Fri Oct 16 02:27:21 2020 -0500
@@ -32,11 +32,11 @@
+#include "purplemarkup.h" #include "purpleprivate.h"
typedef struct _PurpleConversationPrivate PurpleConversationPrivate;
--- a/libpurple/log.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/log.c Fri Oct 16 02:27:21 2020 -0500
@@ -33,7 +33,7 @@
+#include "purplemarkup.h" static GSList *loggers = NULL;
--- a/libpurple/meson.build Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/meson.build Fri Oct 16 02:27:21 2020 -0500
@@ -57,6 +57,7 @@
'purplecredentialprovider.c',
'purpleimconversation.c',
'purpleprotocolfactory.c',
@@ -141,6 +142,7 @@
'purpleimconversation.h',
'purpleprotocolfactory.h',
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/libpurple/purplemarkup.c Fri Oct 16 02:27:21 2020 -0500
@@ -0,0 +1,1561 @@
+ * Purple - Internet Messenging Library + * Copyright (C) Pidgin Developers <devel@pidgin.im> + * Purple is the legal property of its developers, whose names are too numerous + * to list here. Please refer to the COPYRIGHT file distributed with this + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses/>. +#include "purplemarkup.h" + * This function is stolen from glib's gmarkup.c and modified to not + * replace ' with ' +static void append_escaped_text(GString *str, + const gchar *text, gssize length) + next = g_utf8_next_char (p); + g_string_append (str, "&"); + g_string_append (str, "<"); + g_string_append (str, ">"); + g_string_append (str, """); + c = g_utf8_get_char (p); + if ((0x1 <= c && c <= 0x8) || + (0xb <= c && c <= 0xc) || + (0xe <= c && c <= 0x1f) || + (0x7f <= c && c <= 0x84) || + (0x86 <= c && c <= 0x9f)) + g_string_append_printf (str, "&#x%x;", c); + g_string_append_len (str, p, next - p); +/* This function is stolen from glib's gmarkup.c */ +gchar *purple_markup_escape_text(const gchar *text, gssize length) + g_return_val_if_fail(text != NULL, NULL); + /* prealloc at least as long as original text */ + str = g_string_sized_new(length); + append_escaped_text(str, text, length); + return g_string_free(str, FALSE); +purple_markup_unescape_entity(const char *text, int *length) + if (!text || *text != '&') +#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1))) + else if(IS_ENTITY("<")) + else if(IS_ENTITY(">")) + else if(IS_ENTITY(" ")) + else if(IS_ENTITY("©")) + pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */ + else if(IS_ENTITY(""")) + else if(IS_ENTITY("®")) + pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */ + else if(IS_ENTITY("'")) + else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) { + const char *start = text + 2; + pound = g_ascii_strtoull(start, &end, base); + if (pound == 0 || pound > INT_MAX || *end != ';') { + len = (end - text) + 1; + buflen = g_unichar_to_utf8((gunichar)pound, buf); +purple_markup_get_css_property(const gchar *style, + const gchar *css_str = style; + const gchar *css_value_start; + const gchar *css_value_end; + g_return_val_if_fail(opt != NULL, NULL); + /* find the CSS property */ + /* skip whitespace characters */ + while (*css_str && g_ascii_isspace(*css_str)) + if (!g_ascii_isalpha(*css_str)) + if (g_ascii_strncasecmp(css_str, opt, strlen(opt))) + /* go to next css property positioned after the next ';' */ + while (*css_str && *css_str != '"' && *css_str != ';') + /* find the CSS value position in the string */ + css_str += strlen(opt); + while (*css_str && g_ascii_isspace(*css_str)) + while (*css_str && g_ascii_isspace(*css_str)) + if (*css_str == '\0' || *css_str == '"' || *css_str == ';') + /* mark the CSS value */ + css_value_start = css_str; + while (*css_str && *css_str != '"' && *css_str != ';') + css_value_end = css_str - 1; + /* Removes trailing whitespace */ + while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end)) + tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1); + ret = purple_unescape_html(tmp); +gboolean purple_markup_is_rtl(const char *html) + const gchar *start, *end; + if (purple_markup_find_tag("span", html, &start, &end, &attributes)) + /* tmp is a member of attributes and is free with g_datalist_clear call */ + const char *tmp = g_datalist_get_data(&attributes, "dir"); + if (tmp && !g_ascii_strcasecmp(tmp, "RTL")) + tmp = g_datalist_get_data(&attributes, "style"); + char *tmp2 = purple_markup_get_css_property(tmp, "direction"); + if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL")) + g_datalist_clear(&attributes); +purple_markup_find_tag(const char *needle, const char *haystack, + const char **start, const char **end, GData **attributes) + const char *cur = haystack; + gboolean found = FALSE; + gboolean in_tag = FALSE; + gboolean in_attr = FALSE; + const char *in_quotes = NULL; + g_return_val_if_fail( needle != NULL, FALSE); + g_return_val_if_fail( *needle != '\0', FALSE); + g_return_val_if_fail( haystack != NULL, FALSE); + g_return_val_if_fail( start != NULL, FALSE); + g_return_val_if_fail( end != NULL, FALSE); + g_return_val_if_fail(attributes != NULL, FALSE); + needlelen = strlen(needle); + g_datalist_init(&attribs); + while (*cur && !found) { + const char *close = cur; + while (*close && *close != *in_quotes) + /* if we got the close quote, store the value and carry on from * + * after it. if we ran to the end of the string, point to the NULL * + * and we're outta here */ + /* only store a value if we have an attribute name */ + size_t len = close - cur; + char *val = g_strndup(cur, len); + g_datalist_set_data_full(&attribs, name, val, g_free); + const char *close = cur; + while (*close && *close != '>' && *close != '"' && + *close != '\'' && *close != ' ' && *close != '=') + /* if we got the equals, store the name of the attribute. if we got + * the quote, save the attribute and go straight to quote mode. + * otherwise the tag closed or we reached the end of the string, + * so we can get outta here */ + size_t len = close - cur; + /* don't store a blank attribute name */ + name = g_ascii_strdown(cur, len); + /* swallow extra spaces inside tag */ + while (*cur && *cur == ' ') cur++; + /* if we hit a < followed by the name of our tag... */ + if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) { + cur = cur + needlelen + 1; + /* if we're pointing at a space or a >, we found the right tag. if * + * we're not, we've found a longer tag, so we need to skip to the * + * >, but not being distracted by >s inside quotes. */ + if (*cur == ' ' || *cur == '>') { + while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') { + while (*cur && *cur != '"') + } else if (*cur == '\'') { + while (*cur && *cur != '\'') + /* clean up any attribute name from a premature termination */ +struct purple_parse_tag { +/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method + recommended in the GCC docs). It contains 'continue's that should + affect the while-loop in purple_markup_html_to_xhtml and doing the + above would break that. + Also, remember to put braces in constructs that require them for + multiple statements when using this macro. */ +#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ + const char *o = c + strlen("<" x); \ + const char *p = NULL, *q = NULL, *r = NULL; \ + /* o = iterating over full tag \ + * q = start of quoted bit \ + GString *innards = g_string_new(""); \ + if(!q && (*o == '\"' || *o == '\'') ) { \ + if(*o == *q) { /* end of quoted bit */ \ + char *unescaped = g_strndup(q+1, o-q-1); \ + char *escaped = g_markup_escape_text(unescaped, -1); \ + g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ + } else if(*c == '\\') { \ + } else if(*o == '<') { \ + } else if(*o == '>') { \ + innards = g_string_append_c(innards, *o); \ + if(p && !r) { /* got an end of tag and no other < earlier */\ + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ + tags = g_list_prepend(tags, pt); \ + xhtml = g_string_append(xhtml, "<" y); \ + xhtml = g_string_append(xhtml, innards->str); \ + xhtml = g_string_append_c(xhtml, '>'); \ + } else { /* got end of tag with earlier < *or* didn't get anything */ \ + xhtml = g_string_append(xhtml, "<"); \ + plain = g_string_append_c(plain, '<'); \ + g_string_free(innards, TRUE); \ + if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ + (*(c+strlen("<" x)) == '>' || \ + !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ + xhtml = g_string_append(xhtml, "<" y); \ + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ + tags = g_list_prepend(tags, pt); \ + xhtml = g_string_append_c(xhtml, '>'); \ + xhtml = g_string_append(xhtml, "/>");\ + c = strchr(c, '>') + 1; \ +/* Don't forget to check the note above for ALLOW_TAG_ALT. */ +#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) +purple_markup_html_to_xhtml(const char *html, char **xhtml_out, + GList *tags = NULL, *tag; +#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \ +#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>'))) + g_return_if_fail(xhtml_out != NULL || plain_out != NULL); + xhtml = g_string_new(""); + plain = g_string_new(""); + if(*(c+1) == '/') { /* closing tag */ + struct purple_parse_tag *pt = tag->data; + if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { + c += strlen(pt->src_tag) + 3; + struct purple_parse_tag *pt = tags->data; + if(xhtml && !pt->ignore) + g_string_append_printf(xhtml, "</%s>", pt->dest_tag); + if(plain && purple_strequal(pt->src_tag, "a")) { + /* if this is a link, we have to add the url to the plaintext, too */ + (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 || + g_utf8_collate(url->str + 7, cdata->str) != 0))) + g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str))); + g_string_free(cdata, TRUE); + tags = g_list_delete_link(tags, tags); + tags = g_list_delete_link(tags, tag); + /* a closing tag we weren't expecting... + * we'll let it slide, if it's really a tag...if it's + * just a </ we'll escape it properly */ + while(*end && g_ascii_isalpha(*end)) + xhtml = g_string_append(xhtml, "<"); + plain = g_string_append_c(plain, '<'); + } else { /* opening tag */ + ALLOW_TAG("blockquote"); + /* we only allow html to start the message */ + ALLOW_TAG_ALT("i", "em"); + ALLOW_TAG_ALT("italic", "em"); + /* we skip <HR> because it's not legal in XHTML-IM. However, + * we still want to send something sensible, so we put a + * linebreak in its place. <BR> also needs special handling + * because putting a </BR> to close it would just be dumb. */ + if((!g_ascii_strncasecmp(c, "<br", 3) + || !g_ascii_strncasecmp(c, "<hr", 3)) + !g_ascii_strncasecmp(c+3, "/>", 2) || + !g_ascii_strncasecmp(c+3, " />", 3))) { + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<br/>"); + if(plain && *c != '\n') + plain = g_string_append_c(plain, '\n'); + if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) { + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); + else if (*(c+2) == 'o') + pt->src_tag = "strong"; + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>"); + if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); + pt->src_tag = *(c+2) == '>' ? "u" : "underline"; + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); + if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); + pt->src_tag = *(c+2) == '>' ? "s" : "strike"; + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); + if(!g_ascii_strncasecmp(c, "<sub>", 5)) { + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); + if(!g_ascii_strncasecmp(c, "<sup>", 5)) { + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); + if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) { + GString *src = NULL, *alt = NULL; +#define ESCAPE(from, to) \ + while (VALID_CHAR(from)) { \ + if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \ + to = g_string_append(to, "&"); \ + else if (*from == '\'') \ + to = g_string_append(to, "'"); \ + to = g_string_append_c(to, *from); \ + while (*p && *p != '>') { + if (!g_ascii_strncasecmp(p, "src=", 4)) { + g_string_free(src, TRUE); + src = g_string_new(""); + } else if (!g_ascii_strncasecmp(p, "alt=", 4)) { + g_string_free(alt, TRUE); + alt = g_string_new(""); + if ((c = strchr(p, '>')) != NULL) + /* src and alt are required! */ + g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : ""); + plain = g_string_append(plain, purple_unescape_html(alt->str)); + xhtml = g_string_append(xhtml, alt->str); + g_string_free(alt, TRUE); + g_string_free(src, TRUE); + if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) { + struct purple_parse_tag *pt; + while (*p && *p != '>') { + if (!g_ascii_strncasecmp(p, "href=", 5)) { + g_string_free(url, TRUE); + url = g_string_new(""); + g_string_free(cdata, TRUE); + cdata = g_string_new(""); + while (VALID_CHAR(q)) { + if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL)) + url = g_string_append(url, "&"); + url = g_string_append(url, """); + url = g_string_append_c(url, *q); + if ((c = strchr(p, '>')) != NULL) + pt = g_new0(struct purple_parse_tag, 1); + tags = g_list_prepend(tags, pt); + g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : ""); +#define ESCAPE(from, to) \ + while (VALID_CHAR(from)) { \ + if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \ + to = g_string_append(to, "&"); \ + else if (*from == '\'') \ + to = g_string_append_c(to, '\"'); \ + to = g_string_append_c(to, *from); \ + if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { + GString *style = g_string_new(""); + struct purple_parse_tag *pt; + while (*p && *p != '>') { + if (!g_ascii_strncasecmp(p, "back=", 5)) { + GString *color = g_string_new(""); + g_string_append_printf(style, "background: %s; ", color->str); + g_string_free(color, TRUE); + } else if (!g_ascii_strncasecmp(p, "color=", 6)) { + GString *color = g_string_new(""); + g_string_append_printf(style, "color: %s; ", color->str); + g_string_free(color, TRUE); + } else if (!g_ascii_strncasecmp(p, "face=", 5)) { + GString *face = g_string_new(""); + g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str)); + g_string_free(face, TRUE); + } else if (!g_ascii_strncasecmp(p, "size=", 5)) { + const char *size = "medium"; + g_string_append_printf(style, "font-size: %s; ", size); + if ((c = strchr(p, '>')) != NULL) + pt = g_new0(struct purple_parse_tag, 1); + tags = g_list_prepend(tags, pt); + if(style->len && xhtml) + g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str)); + g_string_free(style, TRUE); + if (!g_ascii_strncasecmp(c, "<body ", 6)) { + gboolean did_something = FALSE; + while (*p && *p != '>') { + if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) { + struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); + GString *color = g_string_new(""); + while (VALID_CHAR(q)) { + color = g_string_append_c(color, *q); + g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str)); + g_string_free(color, TRUE); + if ((c = strchr(p, '>')) != NULL) + tags = g_list_prepend(tags, pt); + if (did_something) continue; + /* this has to come after the special case for bgcolor */ + if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { + char *p = strstr(c + strlen("<!--"), "-->"); + xhtml = g_string_append(xhtml, "<!--"); + xhtml = g_string_append(xhtml, "<"); + plain = g_string_append_c(plain, '<'); + if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) { + g_snprintf(buf, sizeof(buf), "%c", *c); + xhtml = g_string_append_len(xhtml, c, len); + plain = g_string_append(plain, pln); + cdata = g_string_append_len(cdata, c, len); + xhtml = g_string_append_c(xhtml, *c); + plain = g_string_append_c(plain, *c); + cdata = g_string_append_c(cdata, *c); + for (tag = tags; tag ; tag = tag->next) { + struct purple_parse_tag *pt = tag->data; + g_string_append_printf(xhtml, "</%s>", pt->dest_tag); + *xhtml_out = g_string_free(xhtml, FALSE); + *plain_out = g_string_free(plain, FALSE); + g_string_free(url, TRUE); + g_string_free(cdata, TRUE); +/* The following are probably reasonable changes: + * - \n should be converted to a normal space + * - in addition to <br>, <p> and <div> etc. should also be converted into \n + * - We want to turn </td>#whitespace<td> sequences into a single tab + * - We want to turn </tr>#whitespace<tr> sequences into a single \n + * - <script>...</script> and <style>...</style> should be completely removed +purple_markup_strip_html(const char *str) + gboolean visible = TRUE; + gboolean closing_td_p = FALSE; + const gchar *cdata_close_tag = NULL, *ent; + for (i = 0, j = 0; str2[i]; i++) + /* Note: Don't even assume any other tag is a tag in CDATA */ + if (g_ascii_strncasecmp(str2 + i, cdata_close_tag, + strlen(cdata_close_tag)) == 0) + i += strlen(cdata_close_tag) - 1; + cdata_close_tag = NULL; + else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p) + else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0) + if(g_ascii_isspace(str2[k])) + /* Scan until we end the tag either implicitly (closed start + * tag) or explicitly, using a sloppy method (i.e., < or > + * inside quoted attributes will screw us up) + while (str2[k] && str2[k] != '<' && str2[k] != '>') + /* If we've got an <a> tag with an href, save the address + if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 && + g_ascii_isspace(str2[i+2])) + int st; /* start of href, inclusive [ */ + int end; /* end of href, exclusive ) */ + /* Find start of href */ + for (st = i + 3; st < k; st++) + if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0) + if (str2[st] == '"' || str2[st] == '\'') + /* find end of address */ + for (end = st; end < k && str2[end] != delim; end++) + /* All the work is done in the loop construct above. */ + /* If there's an address, save it. If there was + * already one saved, kill it. */ + tmp = g_strndup(str2 + st, end - st); + href = purple_unescape_html(tmp); + /* Replace </a> with an ascii representation of the + * address the link was pointing to. */ + else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0) + size_t hrlen = strlen(href); + /* Only insert the href if it's different from the CDATA. */ + if ((hrlen != (gsize)(j - href_st) || + strncmp(str2 + href_st, href, hrlen)) && + (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */ + strncmp(str2 + href_st, href + 7, hrlen - 7))) + memmove(str2 + j, href, hrlen); + /* Check for tags which should be mapped to newline (but ignore some of + * the tags at the beginning of the text) */ + else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0 + || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0 + || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0 + || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0 + || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0)) + || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0 + || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0) + /* Check for tags which begin CDATA and need to be closed */ + else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0) + cdata_close_tag = "</script>"; + else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0) + cdata_close_tag = "</style>"; + /* Update the index and continue checking after the tag */ + i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k; + else if (cdata_close_tag) + else if (!g_ascii_isspace(str2[i])) + if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL) + str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i]; +badentity(const char *c) + if (!g_ascii_strncasecmp(c, "<", 4) || + !g_ascii_strncasecmp(c, ">", 4) || + !g_ascii_strncasecmp(c, """, 6)) { +process_link(GString *ret, + const char *start, const char *c, + char *url_buf, *tmpurlbuf; + if (!badchar(*t) && !badentity(t)) + if (*t == ',' && *(t + 1) != ' ') { + if (t > start && *(t - 1) == '.') + if (t > start && *(t - 1) == ')' && inside_paren > 0) + url_buf = g_strndup(c, t - c); + tmpurlbuf = purple_unescape_html(url_buf); + g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>", +purple_markup_linkify(const char *text) + const char *c, *t, *q = NULL; + char *tmpurlbuf, *url_buf; + gboolean inside_html = FALSE; + ret = g_string_new(""); + if(*c == '(' && !inside_html) { + ret = g_string_append_c(ret, *c); + } else if(!q && (*c == '\"' || *c == '\'')) { + if (!g_ascii_strncasecmp(c, "<A", 2)) { + if (!g_ascii_strncasecmp(c, "/A>", 3)) { + ret = g_string_append_c(ret, *c); + } else if (!g_ascii_strncasecmp(c, "http://", 7)) { + c = process_link(ret, text, c, 7, "", inside_paren); + } else if (!g_ascii_strncasecmp(c, "https://", 8)) { + c = process_link(ret, text, c, 8, "", inside_paren); + } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) { + c = process_link(ret, text, c, 6, "", inside_paren); + } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) { + c = process_link(ret, text, c, 7, "", inside_paren); + } else if (!g_ascii_strncasecmp(c, "file://", 7)) { + c = process_link(ret, text, c, 7, "", inside_paren); + } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) { + c = process_link(ret, text, c, 4, "http://", inside_paren); + } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) { + c = process_link(ret, text, c, 4, "ftp://", inside_paren); + } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) { + c = process_link(ret, text, c, 5, "", inside_paren); + } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) { + if (badchar(*t) || badentity(t)) { + if (t > text && *(t - 1) == '.') + if ((d = strstr(c + 7, "?")) != NULL && d < t) + url_buf = g_strndup(c + 7, d - c - 7); + url_buf = g_strndup(c + 7, t - c - 7); + if (!purple_email_is_valid(url_buf)) { + url_buf = g_strndup(c, t - c); + tmpurlbuf = purple_unescape_html(url_buf); + g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", + } else if (c != text && (*c == '@')) { + GString *gurl_buf = NULL; + const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; + if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1))) + gurl_buf = g_string_new(""); + /* iterate backwards grabbing the local part of an email address */ + g = g_utf8_get_char(t); + if (badchar(*t) || (g >= 127) || (*t == '(') || + ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "<", 4) || + !g_ascii_strncasecmp(t - 3, ">", 4))) || + (t > (text+4) && (!g_ascii_strncasecmp(t - 5, """, 6)))))) { + /* local part will already be part of ret, strip it out */ + ret = g_string_truncate(ret, ret->len - (c - t)); + ret = g_string_append_unichar(ret, g); + g_string_prepend_unichar(gurl_buf, g); + t = g_utf8_find_prev_char(text, t); + ret = g_string_assign(ret, ""); + t = g_utf8_find_next_char(c, NULL); + /* iterate forwards grabbing the domain part of an email address */ + g = g_utf8_get_char(t); + if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) { + url_buf = g_string_free(gurl_buf, FALSE); + /* strip off trailing periods */ + for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) + tmpurlbuf = purple_unescape_html(url_buf); + if (purple_email_is_valid(tmpurlbuf)) { + g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>", + g_string_append(ret, url_buf); + g_string_append_unichar(gurl_buf, g); + t = g_utf8_find_next_char(t, NULL); + g_string_free(gurl_buf, TRUE); + if(*c == ')' && !inside_html) { + ret = g_string_append_c(ret, *c); + ret = g_string_append_c(ret, *c); + return g_string_free(ret, FALSE); +char *purple_unescape_text(const char *in) + ret = g_string_new(""); + if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) { + g_string_append(ret, ent); + g_string_append_c(ret, *c); + return g_string_free(ret, FALSE); +char *purple_unescape_html(const char *html) + ret = g_string_new(""); + if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) { + g_string_append(ret, ent); + } else if (!strncmp(c, "<br>", 4)) { + g_string_append_c(ret, '\n'); + g_string_append_c(ret, *c); + return g_string_free(ret, FALSE); +purple_markup_slice(const char *str, guint x, guint y) + gboolean appended = FALSE; + g_return_val_if_fail(str != NULL, NULL); + g_return_val_if_fail(x <= y, NULL); + ret = g_string_new(""); + while (*str && (z < y)) { + c = g_utf8_get_char(str); + char *end = strchr(str, '>'); + g_string_free(ret, TRUE); + while ((tag = g_queue_pop_head(q))) + if (!g_ascii_strncasecmp(str, "<img ", 5)) { + z += strlen("[Image]"); + } else if (!g_ascii_strncasecmp(str, "<br", 3)) { + } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) { + z += strlen("\n---\n"); + } else if (!g_ascii_strncasecmp(str, "</", 2)) { + tmp = g_queue_pop_head(q); + /* push it unto the stack */ + tmp = g_strndup(str, end - str + 1); + g_queue_push_head(q, tmp); + g_string_append_len(ret, str, end - str + 1); + char *end = strchr(str, ';'); + g_string_free(ret, TRUE); + while ((tag = g_queue_pop_head(q))) + g_string_append_len(ret, str, end - str + 1); + if (z == x && z > 0 && !appended) { + g_string_append(ret, tag); + g_string_append_unichar(ret, c); + str = g_utf8_next_char(str); + while ((tag = g_queue_pop_head(q))) { + name = purple_markup_get_tag_name(tag); + g_string_append_printf(ret, "</%s>", name); + return g_string_free(ret, FALSE); +purple_markup_get_tag_name(const char *tag) + g_return_val_if_fail(tag != NULL, NULL); + g_return_val_if_fail(*tag == '<', NULL); + for (i = 1; tag[i]; i++) + if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/') + return g_strndup(tag+1, i-1); --- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/libpurple/purplemarkup.h Fri Oct 16 02:27:21 2020 -0500
@@ -0,0 +1,232 @@
+ * Purple - Internet Messenging Library + * Copyright (C) Pidgin Developers <devel@pidgin.im> + * Purple is the legal property of its developers, whose names are too numerous + * to list here. Please refer to the COPYRIGHT file distributed with this + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses/>. +#if !defined(PURPLE_GLOBAL_HEADER_INSIDE) && !defined(PURPLE_COMPILATION) +# error "only <pidgin.h> may be included directly" + * @section_id: libpurple-purplemarkup + * @title: Text Markup API + * purple_markup_escape_text: + * @text: The text to escape + * @length: The length of the text, or -1 if #NULL terminated + * Escapes special characters in a plain-text string so they display + * correctly as HTML. For example, & is replaced with &amp; and < is + * replaced with &lt; + * This is exactly the same as g_markup_escape_text(), except that it + * does not change ' to &apos; because &apos; is not a valid HTML 4 entity, + * and is displayed literally in IE7. +gchar *purple_markup_escape_text(const gchar *text, gssize length); + * purple_markup_find_tag: + * @needle: The name of the tag + * @haystack: The null-delimited string to search in + * @start: A pointer to the start of the tag if found + * @end: A pointer to the end of the tag if found + * @attributes: The attributes, if the tag was found. This should + * be freed with g_datalist_clear(). + * Finds an HTML tag matching the given name. + * This locates an HTML tag's start and end, and stores its attributes + * in a GData hash table. The names of the attributes are lower-cased + * in the hash table, and the name of the tag is case insensitive. + * Returns: TRUE if the tag was found +gboolean purple_markup_find_tag(const char *needle, const char *haystack, + const char **start, const char **end, + * purple_markup_html_to_xhtml: + * @html: The HTML markup. + * @dest_xhtml: The destination XHTML output. + * @dest_plain: The destination plain-text output. + * Converts HTML markup to XHTML. +void purple_markup_html_to_xhtml(const char *html, char **dest_xhtml, + * purple_markup_strip_html: + * @str: The string to strip HTML from. + * Strips HTML tags from a string. + * Returns: The new string without HTML. You must g_free this string + * when finished with it. +char *purple_markup_strip_html(const char *str); + * purple_markup_linkify: + * @str: The string to linkify. + * Adds the necessary HTML code to turn URIs into HTML links in a string. + * Returns: The new string with all URIs surrounded in standard + * HTML <a href="whatever"></a> tags. You must g_free() + * this string when finished with it. +char *purple_markup_linkify(const char *str); + * purple_unescape_text: + * @text: The string in which to unescape any HTML entities + * Unescapes HTML entities to their literal characters in the text. + * For example "&amp;" is replaced by '&' and so on. Also converts + * numerical entities (e.g. "&\#38;" is also '&'). + * This function currently supports the following named entities: + * "&amp;", "&lt;", "&gt;", "&copy;", "&quot;", + * "&reg;", "&apos;" + * purple_unescape_html() is similar, but also converts "<br>" into "\n". + * See purple_unescape_html() + * Returns: The text with HTML entities literalized. You must g_free + * this string when finished with it. +char *purple_unescape_text(const char *text); + * purple_unescape_html: + * @html: The string in which to unescape any HTML entities + * Unescapes HTML entities to their literal characters and converts + * "<br>" to "\n". See purple_unescape_text() for more details. + * See purple_unescape_text() + * Returns: The text with HTML entities literalized. You must g_free + * this string when finished with it. +char *purple_unescape_html(const char *html); + * @str: The input NUL terminated, HTML, UTF-8 (or ASCII) string. + * @x: The character offset into an unformatted version of str to + * @y: The character offset (into an unformatted vesion of str) of + * one past the last character to include in the slice. + * Returns a newly allocated substring of the HTML UTF-8 string "str". + * The markup is preserved such that the substring will have the same + * formatting as original string, even though some tags may have been + * opened before "x", or may close after "y". All open tags are closed + * at the end of the returned string, in the proper order. + * Note that x and y are in character offsets, not byte offsets, and + * are offsets into an unformatted version of str. Because of this, + * this function may be sensitive to changes in GtkIMHtml and may break + * when used with other UI's. libpurple users are encouraged to report and + * work out any problems encountered. + * Returns: The HTML slice of string, with all formatting retained. +char *purple_markup_slice(const char *str, guint x, guint y); + * purple_markup_get_tag_name: + * @tag: The string starting a HTML tag. + * Returns a newly allocated string containing the name of the tag + * located at "tag". Tag is expected to point to a '<', and contain + * a '>' sometime after that. If there is no '>' and the string is + * not NUL terminated, this function can be expected to segfault. + * Returns: A string containing the name of the tag. +char *purple_markup_get_tag_name(const char *tag); + * purple_markup_unescape_entity: + * @text: A string containing an HTML entity. + * @length: If not %NULL, the string length of the entity is stored in this location. + * Returns a constant string of the character representation of the HTML + * entity pointed to by @text. For example, purple_markup_unescape_entity("&amp;") + * will return "&". The @text variable is expected to point to an '&', + * the first character of the entity. If given an unrecognized entity, the function + * Note that this function, unlike purple_unescape_html(), does not search + * the string for the entity, does not replace the entity, and does not + * return a newly allocated string. + * Returns: A constant string containing the character representation of the given entity. +const char * purple_markup_unescape_entity(const char *text, int *length); + * purple_markup_get_css_property: + * @style: A string containing the inline CSS text. + * @opt: The requested CSS property. + * Returns a newly allocated string containing the value of the CSS property specified + * in opt. The @style argument is expected to point to a HTML inline CSS. + * The function will seek for the CSS property and return its value. + * For example, purple_markup_get_css_property("direction:rtl;color:#dc4d1b;", + * "color") would return "#dc4d1b". + * On error or if the requested property was not found, the function returns + * Returns: The value of the requested CSS property. +char * purple_markup_get_css_property(const gchar *style, const gchar *opt); + * purple_markup_is_rtl: + * @html: The HTML text. + * Check if the given HTML contains RTL text. + * Returns: TRUE if the text contains RTL text, FALSE otherwise. +gboolean purple_markup_is_rtl(const char *html); +#endif /* PURPLE_MARKUP_H */ --- a/libpurple/request.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/request.c Fri Oct 16 02:27:21 2020 -0500
@@ -24,6 +24,7 @@
+#include "purplemarkup.h" #include "purplekeyvaluepair.h"
--- a/libpurple/savedstatuses.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/savedstatuses.c Fri Oct 16 02:27:21 2020 -0500
@@ -26,10 +26,10 @@
+#include "purplemarkup.h" #include "savedstatuses.h"
--- a/libpurple/tests/meson.build Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/tests/meson.build Fri Oct 16 02:27:21 2020 -0500
@@ -5,6 +5,7 @@
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/libpurple/tests/test_markup.c Fri Oct 16 02:27:21 2020 -0500
@@ -0,0 +1,234 @@
+ * Purple - Internet Messenging Library + * Copyright (C) Pidgin Developers <devel@pidgin.im> + * Purple is the legal property of its developers, whose names are too numerous + * to list here. Please refer to the COPYRIGHT file distributed with this + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses/>. +test_util_markup_html_to_xhtml(void) { + MarkupTestData data[] = { + "<A href='URL'>ABOUT</a>", + "<a href=\"URL\">ABOUT</a>", + "<a href='URL'>URL</a>", + "<a href=\"URL\">URL</a>", + "<a href='mailto:mail'>mail</a>", + "<a href=\"mailto:mail\">mail</a>", + "<A href='\"U'R&L'>ABOUT</a>", + "<a href=\""U'R&L\">ABOUT</a>", + "<img src='SRC' alt='ALT'/>", + "<img src='SRC' alt='ALT' />", + "<img src=\"'S'R&C\" alt=\"'A'L&T\"/>", + "<img src=''S'R&C' alt=''A'L&T' />", + "<h1>A<h2>B</h2>C</h1>", + "<h1>A<h2>B</h2>C</h1>", + "<h1><h2><h3><h4></h4></h3></h2></h1>", + "<div attr='"&<>'/>", + "<div attr=\"'\"/>", + "<span style='font-weight: bold;'>x</span>", + "<span style='font-weight: bold;'>x</span>", + "<span style='font-weight: bold;'>x</span>", + "<span style='text-decoration: underline;'>x</span>", + "<underline>x</underline>", + "<span style='text-decoration: underline;'>x</span>", + "<span style='text-decoration: line-through;'>x</span>", + "<span style='text-decoration: line-through;'>x</span>", + "<span style='vertical-align:sub;'>x</span>", + "<span style='vertical-align:super;'>x</span>", + "<font face=\"'Times>New & Roman'\">x</font>", + "<span style='font-family: \"Times>New & Roman\";'>x</span>", + "<font back=\"'color>blue&red'\">x</font>", + "<span style='background: \"color>blue&red\";'>x</span>", + "<font color=\"'color>blue&red'\">x</font>", + "<span style='color: \"color>blue&red\";'>x</span>", + "<font size=1>x</font>", + "<span style='font-size: xx-small;'>x</span>", + "<font size=432>x</font>", + "<span style='font-size: medium;'>x</span>", + for(i = 0; data[i].markup; i++) { + gchar *xhtml = NULL, *plaintext = NULL; + purple_markup_html_to_xhtml(data[i].markup, &xhtml, &plaintext); + g_assert_cmpstr(data[i].xhtml, ==, xhtml); + g_assert_cmpstr(data[i].plaintext, ==, plaintext); +/****************************************************************************** + *****************************************************************************/ +main(gint argc, gchar **argv) { + g_test_init(&argc, &argv, NULL); + g_test_add_func("/util/markup/html to xhtml", + test_util_markup_html_to_xhtml); \ No newline at end of file
--- a/libpurple/tests/test_util.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/tests/test_util.c Fri Oct 16 02:27:21 2020 -0500
@@ -196,205 +196,6 @@
/******************************************************************************
- *****************************************************************************/
-test_util_markup_html_to_xhtml(void) {
- MarkupTestData data[] = {
- "<A href='URL'>ABOUT</a>",
- "<a href=\"URL\">ABOUT</a>",
- "<a href='URL'>URL</a>",
- "<a href=\"URL\">URL</a>",
- "<a href='mailto:mail'>mail</a>",
- "<a href=\"mailto:mail\">mail</a>",
- "<A href='\"U'R&L'>ABOUT</a>",
- "<a href=\""U'R&L\">ABOUT</a>",
- "<img src='SRC' alt='ALT'/>",
- "<img src='SRC' alt='ALT' />",
- "<img src=\"'S'R&C\" alt=\"'A'L&T\"/>",
- "<img src=''S'R&C' alt=''A'L&T' />",
- "<h1>A<h2>B</h2>C</h1>",
- "<h1>A<h2>B</h2>C</h1>",
- "<h1><h2><h3><h4></h4></h3></h2></h1>",
- "<div attr='"&<>'/>",
- "<div attr=\"'\"/>",
- "<span style='font-weight: bold;'>x</span>",
- "<span style='font-weight: bold;'>x</span>",
- "<span style='font-weight: bold;'>x</span>",
- "<span style='text-decoration: underline;'>x</span>",
- "<underline>x</underline>",
- "<span style='text-decoration: underline;'>x</span>",
- "<span style='text-decoration: line-through;'>x</span>",
- "<span style='text-decoration: line-through;'>x</span>",
- "<span style='vertical-align:sub;'>x</span>",
- "<span style='vertical-align:super;'>x</span>",
- "<font face=\"'Times>New & Roman'\">x</font>",
- "<span style='font-family: \"Times>New & Roman\";'>x</span>",
- "<font back=\"'color>blue&red'\">x</font>",
- "<span style='background: \"color>blue&red\";'>x</span>",
- "<font color=\"'color>blue&red'\">x</font>",
- "<span style='color: \"color>blue&red\";'>x</span>",
- "<font size=1>x</font>",
- "<span style='font-size: xx-small;'>x</span>",
- "<font size=432>x</font>",
- "<span style='font-size: medium;'>x</span>",
- for(i = 0; data[i].markup; i++) {
- gchar *xhtml = NULL, *plaintext = NULL;
- purple_markup_html_to_xhtml(data[i].markup, &xhtml, &plaintext);
- g_assert_cmpstr(data[i].xhtml, ==, xhtml);
- g_assert_cmpstr(data[i].plaintext, ==, plaintext);
-/******************************************************************************
*****************************************************************************/
@@ -515,9 +316,6 @@
g_test_add_func("/util/str to date time",
test_util_str_to_date_time);
- g_test_add_func("/util/markup/html to xhtml",
- test_util_markup_html_to_xhtml);
g_test_add_func("/util/utf8/strip unprintables",
test_util_utf8_strip_unprintables);
--- a/libpurple/util.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/util.c Fri Oct 16 02:27:21 2020 -0500
@@ -449,1546 +449,6 @@
/**************************************************************************
- **************************************************************************/
- * This function is stolen from glib's gmarkup.c and modified to not
- * replace ' with '
-static void append_escaped_text(GString *str,
- const gchar *text, gssize length)
- next = g_utf8_next_char (p);
- g_string_append (str, "&");
- g_string_append (str, "<");
- g_string_append (str, ">");
- g_string_append (str, """);
- c = g_utf8_get_char (p);
- if ((0x1 <= c && c <= 0x8) ||
- (0xb <= c && c <= 0xc) ||
- (0xe <= c && c <= 0x1f) ||
- (0x7f <= c && c <= 0x84) ||
- (0x86 <= c && c <= 0x9f))
- g_string_append_printf (str, "&#x%x;", c);
- g_string_append_len (str, p, next - p);
-/* This function is stolen from glib's gmarkup.c */
-gchar *purple_markup_escape_text(const gchar *text, gssize length)
- g_return_val_if_fail(text != NULL, NULL);
- /* prealloc at least as long as original text */
- str = g_string_sized_new(length);
- append_escaped_text(str, text, length);
- return g_string_free(str, FALSE);
-purple_markup_unescape_entity(const char *text, int *length)
- if (!text || *text != '&')
-#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
- else if(IS_ENTITY("<"))
- else if(IS_ENTITY(">"))
- else if(IS_ENTITY(" "))
- else if(IS_ENTITY("©"))
- pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
- else if(IS_ENTITY("""))
- else if(IS_ENTITY("®"))
- pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
- else if(IS_ENTITY("'"))
- else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
- const char *start = text + 2;
- pound = g_ascii_strtoull(start, &end, base);
- if (pound == 0 || pound > INT_MAX || *end != ';') {
- len = (end - text) + 1;
- buflen = g_unichar_to_utf8((gunichar)pound, buf);
-purple_markup_get_css_property(const gchar *style,
- const gchar *css_str = style;
- const gchar *css_value_start;
- const gchar *css_value_end;
- g_return_val_if_fail(opt != NULL, NULL);
- /* find the CSS property */
- /* skip whitespace characters */
- while (*css_str && g_ascii_isspace(*css_str))
- if (!g_ascii_isalpha(*css_str))
- if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
- /* go to next css property positioned after the next ';' */
- while (*css_str && *css_str != '"' && *css_str != ';')
- /* find the CSS value position in the string */
- css_str += strlen(opt);
- while (*css_str && g_ascii_isspace(*css_str))
- while (*css_str && g_ascii_isspace(*css_str))
- if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
- /* mark the CSS value */
- css_value_start = css_str;
- while (*css_str && *css_str != '"' && *css_str != ';')
- css_value_end = css_str - 1;
- /* Removes trailing whitespace */
- while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
- tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
- ret = purple_unescape_html(tmp);
-gboolean purple_markup_is_rtl(const char *html)
- const gchar *start, *end;
- if (purple_markup_find_tag("span", html, &start, &end, &attributes))
- /* tmp is a member of attributes and is free with g_datalist_clear call */
- const char *tmp = g_datalist_get_data(&attributes, "dir");
- if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
- tmp = g_datalist_get_data(&attributes, "style");
- char *tmp2 = purple_markup_get_css_property(tmp, "direction");
- if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
- g_datalist_clear(&attributes);
-purple_markup_find_tag(const char *needle, const char *haystack,
- const char **start, const char **end, GData **attributes)
- const char *cur = haystack;
- gboolean found = FALSE;
- gboolean in_tag = FALSE;
- gboolean in_attr = FALSE;
- const char *in_quotes = NULL;
- g_return_val_if_fail( needle != NULL, FALSE);
- g_return_val_if_fail( *needle != '\0', FALSE);
- g_return_val_if_fail( haystack != NULL, FALSE);
- g_return_val_if_fail( start != NULL, FALSE);
- g_return_val_if_fail( end != NULL, FALSE);
- g_return_val_if_fail(attributes != NULL, FALSE);
- needlelen = strlen(needle);
- g_datalist_init(&attribs);
- while (*cur && !found) {
- const char *close = cur;
- while (*close && *close != *in_quotes)
- /* if we got the close quote, store the value and carry on from *
- * after it. if we ran to the end of the string, point to the NULL *
- * and we're outta here */
- /* only store a value if we have an attribute name */
- size_t len = close - cur;
- char *val = g_strndup(cur, len);
- g_datalist_set_data_full(&attribs, name, val, g_free);
- const char *close = cur;
- while (*close && *close != '>' && *close != '"' &&
- *close != '\'' && *close != ' ' && *close != '=')
- /* if we got the equals, store the name of the attribute. if we got
- * the quote, save the attribute and go straight to quote mode.
- * otherwise the tag closed or we reached the end of the string,
- * so we can get outta here */
- size_t len = close - cur;
- /* don't store a blank attribute name */
- name = g_ascii_strdown(cur, len);
- /* swallow extra spaces inside tag */
- while (*cur && *cur == ' ') cur++;
- /* if we hit a < followed by the name of our tag... */
- if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
- cur = cur + needlelen + 1;
- /* if we're pointing at a space or a >, we found the right tag. if *
- * we're not, we've found a longer tag, so we need to skip to the *
- * >, but not being distracted by >s inside quotes. */
- if (*cur == ' ' || *cur == '>') {
- while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
- while (*cur && *cur != '"')
- } else if (*cur == '\'') {
- while (*cur && *cur != '\'')
- /* clean up any attribute name from a premature termination */
-struct purple_parse_tag {
-/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
- recommended in the GCC docs). It contains 'continue's that should
- affect the while-loop in purple_markup_html_to_xhtml and doing the
- above would break that.
- Also, remember to put braces in constructs that require them for
- multiple statements when using this macro. */
-#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
- const char *o = c + strlen("<" x); \
- const char *p = NULL, *q = NULL, *r = NULL; \
- /* o = iterating over full tag \
- * q = start of quoted bit \
- GString *innards = g_string_new(""); \
- if(!q && (*o == '\"' || *o == '\'') ) { \
- if(*o == *q) { /* end of quoted bit */ \
- char *unescaped = g_strndup(q+1, o-q-1); \
- char *escaped = g_markup_escape_text(unescaped, -1); \
- g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
- } else if(*c == '\\') { \
- } else if(*o == '<') { \
- } else if(*o == '>') { \
- innards = g_string_append_c(innards, *o); \
- if(p && !r) { /* got an end of tag and no other < earlier */\
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
- tags = g_list_prepend(tags, pt); \
- xhtml = g_string_append(xhtml, "<" y); \
- xhtml = g_string_append(xhtml, innards->str); \
- xhtml = g_string_append_c(xhtml, '>'); \
- } else { /* got end of tag with earlier < *or* didn't get anything */ \
- xhtml = g_string_append(xhtml, "<"); \
- plain = g_string_append_c(plain, '<'); \
- g_string_free(innards, TRUE); \
- if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
- (*(c+strlen("<" x)) == '>' || \
- !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
- xhtml = g_string_append(xhtml, "<" y); \
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
- tags = g_list_prepend(tags, pt); \
- xhtml = g_string_append_c(xhtml, '>'); \
- xhtml = g_string_append(xhtml, "/>");\
- c = strchr(c, '>') + 1; \
-/* Don't forget to check the note above for ALLOW_TAG_ALT. */
-#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
-purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
- GList *tags = NULL, *tag;
-#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
-#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
- g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
- xhtml = g_string_new("");
- plain = g_string_new("");
- if(*(c+1) == '/') { /* closing tag */
- struct purple_parse_tag *pt = tag->data;
- if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
- c += strlen(pt->src_tag) + 3;
- struct purple_parse_tag *pt = tags->data;
- if(xhtml && !pt->ignore)
- g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
- if(plain && purple_strequal(pt->src_tag, "a")) {
- /* if this is a link, we have to add the url to the plaintext, too */
- (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
- g_utf8_collate(url->str + 7, cdata->str) != 0)))
- g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
- g_string_free(cdata, TRUE);
- tags = g_list_delete_link(tags, tags);
- tags = g_list_delete_link(tags, tag);
- /* a closing tag we weren't expecting...
- * we'll let it slide, if it's really a tag...if it's
- * just a </ we'll escape it properly */
- while(*end && g_ascii_isalpha(*end))
- xhtml = g_string_append(xhtml, "<");
- plain = g_string_append_c(plain, '<');
- } else { /* opening tag */
- ALLOW_TAG("blockquote");
- /* we only allow html to start the message */
- ALLOW_TAG_ALT("i", "em");
- ALLOW_TAG_ALT("italic", "em");
- /* we skip <HR> because it's not legal in XHTML-IM. However,
- * we still want to send something sensible, so we put a
- * linebreak in its place. <BR> also needs special handling
- * because putting a </BR> to close it would just be dumb. */
- if((!g_ascii_strncasecmp(c, "<br", 3)
- || !g_ascii_strncasecmp(c, "<hr", 3))
- !g_ascii_strncasecmp(c+3, "/>", 2) ||
- !g_ascii_strncasecmp(c+3, " />", 3))) {
- c = strchr(c, '>') + 1;
- xhtml = g_string_append(xhtml, "<br/>");
- if(plain && *c != '\n')
- plain = g_string_append_c(plain, '\n');
- if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- else if (*(c+2) == 'o')
- pt->src_tag = "strong";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
- if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = *(c+2) == '>' ? "u" : "underline";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
- if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = *(c+2) == '>' ? "s" : "strike";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
- if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
- if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
- if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
- GString *src = NULL, *alt = NULL;
-#define ESCAPE(from, to) \
- while (VALID_CHAR(from)) { \
- if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
- to = g_string_append(to, "&"); \
- else if (*from == '\'') \
- to = g_string_append(to, "'"); \
- to = g_string_append_c(to, *from); \
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "src=", 4)) {
- g_string_free(src, TRUE);
- src = g_string_new("");
- } else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
- g_string_free(alt, TRUE);
- alt = g_string_new("");
- if ((c = strchr(p, '>')) != NULL)
- /* src and alt are required! */
- g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
- plain = g_string_append(plain, purple_unescape_html(alt->str));
- xhtml = g_string_append(xhtml, alt->str);
- g_string_free(alt, TRUE);
- g_string_free(src, TRUE);
- if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
- struct purple_parse_tag *pt;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "href=", 5)) {
- g_string_free(url, TRUE);
- url = g_string_new("");
- g_string_free(cdata, TRUE);
- cdata = g_string_new("");
- while (VALID_CHAR(q)) {
- if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
- url = g_string_append(url, "&");
- url = g_string_append(url, """);
- url = g_string_append_c(url, *q);
- if ((c = strchr(p, '>')) != NULL)
- pt = g_new0(struct purple_parse_tag, 1);
- tags = g_list_prepend(tags, pt);
- g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
-#define ESCAPE(from, to) \
- while (VALID_CHAR(from)) { \
- if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
- to = g_string_append(to, "&"); \
- else if (*from == '\'') \
- to = g_string_append_c(to, '\"'); \
- to = g_string_append_c(to, *from); \
- if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
- GString *style = g_string_new("");
- struct purple_parse_tag *pt;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "back=", 5)) {
- GString *color = g_string_new("");
- g_string_append_printf(style, "background: %s; ", color->str);
- g_string_free(color, TRUE);
- } else if (!g_ascii_strncasecmp(p, "color=", 6)) {
- GString *color = g_string_new("");
- g_string_append_printf(style, "color: %s; ", color->str);
- g_string_free(color, TRUE);
- } else if (!g_ascii_strncasecmp(p, "face=", 5)) {
- GString *face = g_string_new("");
- g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
- g_string_free(face, TRUE);
- } else if (!g_ascii_strncasecmp(p, "size=", 5)) {
- const char *size = "medium";
- g_string_append_printf(style, "font-size: %s; ", size);
- if ((c = strchr(p, '>')) != NULL)
- pt = g_new0(struct purple_parse_tag, 1);
- tags = g_list_prepend(tags, pt);
- if(style->len && xhtml)
- g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
- g_string_free(style, TRUE);
- if (!g_ascii_strncasecmp(c, "<body ", 6)) {
- gboolean did_something = FALSE;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- GString *color = g_string_new("");
- while (VALID_CHAR(q)) {
- color = g_string_append_c(color, *q);
- g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
- g_string_free(color, TRUE);
- if ((c = strchr(p, '>')) != NULL)
- tags = g_list_prepend(tags, pt);
- if (did_something) continue;
- /* this has to come after the special case for bgcolor */
- if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
- char *p = strstr(c + strlen("<!--"), "-->");
- xhtml = g_string_append(xhtml, "<!--");
- xhtml = g_string_append(xhtml, "<");
- plain = g_string_append_c(plain, '<');
- if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
- g_snprintf(buf, sizeof(buf), "%c", *c);
- xhtml = g_string_append_len(xhtml, c, len);
- plain = g_string_append(plain, pln);
- cdata = g_string_append_len(cdata, c, len);
- xhtml = g_string_append_c(xhtml, *c);
- plain = g_string_append_c(plain, *c);
- cdata = g_string_append_c(cdata, *c);
- for (tag = tags; tag ; tag = tag->next) {
- struct purple_parse_tag *pt = tag->data;
- g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
- *xhtml_out = g_string_free(xhtml, FALSE);
- *plain_out = g_string_free(plain, FALSE);
- g_string_free(url, TRUE);
- g_string_free(cdata, TRUE);
-/* The following are probably reasonable changes:
- * - \n should be converted to a normal space
- * - in addition to <br>, <p> and <div> etc. should also be converted into \n
- * - We want to turn </td>#whitespace<td> sequences into a single tab
- * - We want to turn </tr>#whitespace<tr> sequences into a single \n
- * - <script>...</script> and <style>...</style> should be completely removed
-purple_markup_strip_html(const char *str)
- gboolean visible = TRUE;
- gboolean closing_td_p = FALSE;
- const gchar *cdata_close_tag = NULL, *ent;
- for (i = 0, j = 0; str2[i]; i++)
- /* Note: Don't even assume any other tag is a tag in CDATA */
- if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
- strlen(cdata_close_tag)) == 0)
- i += strlen(cdata_close_tag) - 1;
- cdata_close_tag = NULL;
- else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
- else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
- if(g_ascii_isspace(str2[k]))
- /* Scan until we end the tag either implicitly (closed start
- * tag) or explicitly, using a sloppy method (i.e., < or >
- * inside quoted attributes will screw us up)
- while (str2[k] && str2[k] != '<' && str2[k] != '>')
- /* If we've got an <a> tag with an href, save the address
- if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
- g_ascii_isspace(str2[i+2]))
- int st; /* start of href, inclusive [ */
- int end; /* end of href, exclusive ) */
- /* Find start of href */
- for (st = i + 3; st < k; st++)
- if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
- if (str2[st] == '"' || str2[st] == '\'')
- /* find end of address */
- for (end = st; end < k && str2[end] != delim; end++)
- /* All the work is done in the loop construct above. */
- /* If there's an address, save it. If there was
- * already one saved, kill it. */
- tmp = g_strndup(str2 + st, end - st);
- href = purple_unescape_html(tmp);
- /* Replace </a> with an ascii representation of the
- * address the link was pointing to. */
- else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
- size_t hrlen = strlen(href);
- /* Only insert the href if it's different from the CDATA. */
- if ((hrlen != (gsize)(j - href_st) ||
- strncmp(str2 + href_st, href, hrlen)) &&
- (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
- strncmp(str2 + href_st, href + 7, hrlen - 7)))
- memmove(str2 + j, href, hrlen);
- /* Check for tags which should be mapped to newline (but ignore some of
- * the tags at the beginning of the text) */
- else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
- || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
- /* Check for tags which begin CDATA and need to be closed */
- else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
- cdata_close_tag = "</script>";
- else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
- cdata_close_tag = "</style>";
- /* Update the index and continue checking after the tag */
- i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
- else if (cdata_close_tag)
- else if (!g_ascii_isspace(str2[i]))
- if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
- str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
-badentity(const char *c)
- if (!g_ascii_strncasecmp(c, "<", 4) ||
- !g_ascii_strncasecmp(c, ">", 4) ||
- !g_ascii_strncasecmp(c, """, 6)) {
-process_link(GString *ret,
- const char *start, const char *c,
- char *url_buf, *tmpurlbuf;
- if (!badchar(*t) && !badentity(t))
- if (*t == ',' && *(t + 1) != ' ') {
- if (t > start && *(t - 1) == '.')
- if (t > start && *(t - 1) == ')' && inside_paren > 0)
- url_buf = g_strndup(c, t - c);
- tmpurlbuf = purple_unescape_html(url_buf);
- g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
-purple_markup_linkify(const char *text)
- const char *c, *t, *q = NULL;
- char *tmpurlbuf, *url_buf;
- gboolean inside_html = FALSE;
- ret = g_string_new("");
- if(*c == '(' && !inside_html) {
- ret = g_string_append_c(ret, *c);
- } else if(!q && (*c == '\"' || *c == '\'')) {
- if (!g_ascii_strncasecmp(c, "<A", 2)) {
- if (!g_ascii_strncasecmp(c, "/A>", 3)) {
- ret = g_string_append_c(ret, *c);
- } else if (!g_ascii_strncasecmp(c, "http://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "https://", 8)) {
- c = process_link(ret, text, c, 8, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
- c = process_link(ret, text, c, 6, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "file://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 4, "http://", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 4, "ftp://", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 5, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
- if (badchar(*t) || badentity(t)) {
- if (t > text && *(t - 1) == '.')
- if ((d = strstr(c + 7, "?")) != NULL && d < t)
- url_buf = g_strndup(c + 7, d - c - 7);
- url_buf = g_strndup(c + 7, t - c - 7);
- if (!purple_email_is_valid(url_buf)) {
- url_buf = g_strndup(c, t - c);
- tmpurlbuf = purple_unescape_html(url_buf);
- g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
- } else if (c != text && (*c == '@')) {
- GString *gurl_buf = NULL;
- const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
- if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
- gurl_buf = g_string_new("");
- /* iterate backwards grabbing the local part of an email address */
- g = g_utf8_get_char(t);
- if (badchar(*t) || (g >= 127) || (*t == '(') ||
- ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "<", 4) ||
- !g_ascii_strncasecmp(t - 3, ">", 4))) ||
- (t > (text+4) && (!g_ascii_strncasecmp(t - 5, """, 6)))))) {
- /* local part will already be part of ret, strip it out */
- ret = g_string_truncate(ret, ret->len - (c - t));
- ret = g_string_append_unichar(ret, g);
- g_string_prepend_unichar(gurl_buf, g);
- t = g_utf8_find_prev_char(text, t);
- ret = g_string_assign(ret, "");
- t = g_utf8_find_next_char(c, NULL);
- /* iterate forwards grabbing the domain part of an email address */
- g = g_utf8_get_char(t);
- if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
- url_buf = g_string_free(gurl_buf, FALSE);
- /* strip off trailing periods */
- for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
- tmpurlbuf = purple_unescape_html(url_buf);
- if (purple_email_is_valid(tmpurlbuf)) {
- g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
- g_string_append(ret, url_buf);
- g_string_append_unichar(gurl_buf, g);
- t = g_utf8_find_next_char(t, NULL);
- g_string_free(gurl_buf, TRUE);
- if(*c == ')' && !inside_html) {
- ret = g_string_append_c(ret, *c);
- ret = g_string_append_c(ret, *c);
- return g_string_free(ret, FALSE);
-char *purple_unescape_text(const char *in)
- ret = g_string_new("");
- if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
- g_string_append(ret, ent);
- g_string_append_c(ret, *c);
- return g_string_free(ret, FALSE);
-char *purple_unescape_html(const char *html)
- ret = g_string_new("");
- if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
- g_string_append(ret, ent);
- } else if (!strncmp(c, "<br>", 4)) {
- g_string_append_c(ret, '\n');
- g_string_append_c(ret, *c);
- return g_string_free(ret, FALSE);
-purple_markup_slice(const char *str, guint x, guint y)
- gboolean appended = FALSE;
- g_return_val_if_fail(str != NULL, NULL);
- g_return_val_if_fail(x <= y, NULL);
- ret = g_string_new("");
- while (*str && (z < y)) {
- c = g_utf8_get_char(str);
- char *end = strchr(str, '>');
- g_string_free(ret, TRUE);
- while ((tag = g_queue_pop_head(q)))
- if (!g_ascii_strncasecmp(str, "<img ", 5)) {
- z += strlen("[Image]");
- } else if (!g_ascii_strncasecmp(str, "<br", 3)) {
- } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
- z += strlen("\n---\n");
- } else if (!g_ascii_strncasecmp(str, "</", 2)) {
- tmp = g_queue_pop_head(q);
- /* push it unto the stack */
- tmp = g_strndup(str, end - str + 1);
- g_queue_push_head(q, tmp);
- g_string_append_len(ret, str, end - str + 1);
- char *end = strchr(str, ';');
- g_string_free(ret, TRUE);
- while ((tag = g_queue_pop_head(q)))
- g_string_append_len(ret, str, end - str + 1);
- if (z == x && z > 0 && !appended) {
- g_string_append(ret, tag);
- g_string_append_unichar(ret, c);
- str = g_utf8_next_char(str);
- while ((tag = g_queue_pop_head(q))) {
- name = purple_markup_get_tag_name(tag);
- g_string_append_printf(ret, "</%s>", name);
- return g_string_free(ret, FALSE);
-purple_markup_get_tag_name(const char *tag)
- g_return_val_if_fail(tag != NULL, NULL);
- g_return_val_if_fail(*tag == '<', NULL);
- for (i = 1; tag[i]; i++)
- if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
- return g_strndup(tag+1, i-1);
-/**************************************************************************
* Path/Filename Functions
**************************************************************************/
--- a/libpurple/util.h Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/util.h Fri Oct 16 02:27:21 2020 -0500
@@ -196,201 +196,6 @@
gint purple_time_parse_month(const char *month_abbr);
/**************************************************************************/
-/**************************************************************************/
- * purple_markup_escape_text:
- * @text: The text to escape
- * @length: The length of the text, or -1 if #NULL terminated
- * Escapes special characters in a plain-text string so they display
- * correctly as HTML. For example, & is replaced with &amp; and < is
- * replaced with &lt;
- * This is exactly the same as g_markup_escape_text(), except that it
- * does not change ' to &apos; because &apos; is not a valid HTML 4 entity,
- * and is displayed literally in IE7.
-gchar *purple_markup_escape_text(const gchar *text, gssize length);
- * purple_markup_find_tag:
- * @needle: The name of the tag
- * @haystack: The null-delimited string to search in
- * @start: A pointer to the start of the tag if found
- * @end: A pointer to the end of the tag if found
- * @attributes: The attributes, if the tag was found. This should
- * be freed with g_datalist_clear().
- * Finds an HTML tag matching the given name.
- * This locates an HTML tag's start and end, and stores its attributes
- * in a GData hash table. The names of the attributes are lower-cased
- * in the hash table, and the name of the tag is case insensitive.
- * Returns: TRUE if the tag was found
-gboolean purple_markup_find_tag(const char *needle, const char *haystack,
- const char **start, const char **end,
- * purple_markup_html_to_xhtml:
- * @html: The HTML markup.
- * @dest_xhtml: The destination XHTML output.
- * @dest_plain: The destination plain-text output.
- * Converts HTML markup to XHTML.
-void purple_markup_html_to_xhtml(const char *html, char **dest_xhtml,
- * purple_markup_strip_html:
- * @str: The string to strip HTML from.
- * Strips HTML tags from a string.
- * Returns: The new string without HTML. You must g_free this string
- * when finished with it.
-char *purple_markup_strip_html(const char *str);
- * purple_markup_linkify:
- * @str: The string to linkify.
- * Adds the necessary HTML code to turn URIs into HTML links in a string.
- * Returns: The new string with all URIs surrounded in standard
- * HTML <a href="whatever"></a> tags. You must g_free()
- * this string when finished with it.
-char *purple_markup_linkify(const char *str);
- * purple_unescape_text:
- * @text: The string in which to unescape any HTML entities
- * Unescapes HTML entities to their literal characters in the text.
- * For example "&amp;" is replaced by '&' and so on. Also converts
- * numerical entities (e.g. "&\#38;" is also '&').
- * This function currently supports the following named entities:
- * "&amp;", "&lt;", "&gt;", "&copy;", "&quot;",
- * "&reg;", "&apos;"
- * purple_unescape_html() is similar, but also converts "<br>" into "\n".
- * See purple_unescape_html()
- * Returns: The text with HTML entities literalized. You must g_free
- * this string when finished with it.
-char *purple_unescape_text(const char *text);
- * purple_unescape_html:
- * @html: The string in which to unescape any HTML entities
- * Unescapes HTML entities to their literal characters and converts
- * "<br>" to "\n". See purple_unescape_text() for more details.
- * See purple_unescape_text()
- * Returns: The text with HTML entities literalized. You must g_free
- * this string when finished with it.
-char *purple_unescape_html(const char *html);
- * @str: The input NUL terminated, HTML, UTF-8 (or ASCII) string.
- * @x: The character offset into an unformatted version of str to
- * @y: The character offset (into an unformatted vesion of str) of
- * one past the last character to include in the slice.
- * Returns a newly allocated substring of the HTML UTF-8 string "str".
- * The markup is preserved such that the substring will have the same
- * formatting as original string, even though some tags may have been
- * opened before "x", or may close after "y". All open tags are closed
- * at the end of the returned string, in the proper order.
- * Note that x and y are in character offsets, not byte offsets, and
- * are offsets into an unformatted version of str. Because of this,
- * this function may be sensitive to changes in GtkIMHtml and may break
- * when used with other UI's. libpurple users are encouraged to report and
- * work out any problems encountered.
- * Returns: The HTML slice of string, with all formatting retained.
-char *purple_markup_slice(const char *str, guint x, guint y);
- * purple_markup_get_tag_name:
- * @tag: The string starting a HTML tag.
- * Returns a newly allocated string containing the name of the tag
- * located at "tag". Tag is expected to point to a '<', and contain
- * a '>' sometime after that. If there is no '>' and the string is
- * not NUL terminated, this function can be expected to segfault.
- * Returns: A string containing the name of the tag.
-char *purple_markup_get_tag_name(const char *tag);
- * purple_markup_unescape_entity:
- * @text: A string containing an HTML entity.
- * @length: If not %NULL, the string length of the entity is stored in this location.
- * Returns a constant string of the character representation of the HTML
- * entity pointed to by @text. For example, purple_markup_unescape_entity("&amp;")
- * will return "&". The @text variable is expected to point to an '&',
- * the first character of the entity. If given an unrecognized entity, the function
- * Note that this function, unlike purple_unescape_html(), does not search
- * the string for the entity, does not replace the entity, and does not
- * return a newly allocated string.
- * Returns: A constant string containing the character representation of the given entity.
-const char * purple_markup_unescape_entity(const char *text, int *length);
- * purple_markup_get_css_property:
- * @style: A string containing the inline CSS text.
- * @opt: The requested CSS property.
- * Returns a newly allocated string containing the value of the CSS property specified
- * in opt. The @style argument is expected to point to a HTML inline CSS.
- * The function will seek for the CSS property and return its value.
- * For example, purple_markup_get_css_property("direction:rtl;color:#dc4d1b;",
- * "color") would return "#dc4d1b".
- * On error or if the requested property was not found, the function returns
- * Returns: The value of the requested CSS property.
-char * purple_markup_get_css_property(const gchar *style, const gchar *opt);
- * purple_markup_is_rtl:
- * @html: The HTML text.
- * Check if the given HTML contains RTL text.
- * Returns: TRUE if the text contains RTL text, FALSE otherwise.
-gboolean purple_markup_is_rtl(const char *html);
-/**************************************************************************/
/* Path/Filename Functions */
/**************************************************************************/
--- a/libpurple/xmlnode.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/xmlnode.c Fri Oct 16 02:27:21 2020 -0500
@@ -33,6 +33,7 @@
+#include "purplemarkup.h" --- a/po/POTFILES.in Thu Oct 15 20:25:08 2020 -0500
+++ b/po/POTFILES.in Fri Oct 16 02:27:21 2020 -0500
@@ -272,6 +272,7 @@
libpurple/purplechatuser.c
libpurple/purplecredentialprovider.c
libpurple/purpleimconversation.c
+libpurple/purplemarkup.c libpurple/purpleprotocolim.c
libpurple/purpleprotocolmedia.c
libpurple/purpleprotocolprivacy.c