pidgin/pidgin

Parents 9ce322b6f2d1
Children 43f9f757667d
Pull the purple_markup_* api out of util.[ch] to purplemarkup.[ch]. No code was changed just moved it from one file to the other.

Testing Done:
Compile and unit tests.

Reviewed at https://reviews.imfreedom.org/r/171/
--- a/doc/reference/libpurple/libpurple-docs.xml Thu Oct 15 20:25:08 2020 -0500
+++ b/doc/reference/libpurple/libpurple-docs.xml Fri Oct 16 02:27:21 2020 -0500
@@ -72,6 +72,7 @@
<xi:include href="xml/purplecredentialprovider.xml" />
<xi:include href="xml/purpleimconversation.xml" />
<xi:include href="xml/purplekeyvaluepair.xml" />
+ <xi:include href="xml/purplemarkup.xml" />
<xi:include href="xml/purpleprotocolfactory.xml" />
<xi:include href="xml/purpleprotocolim.xml" />
<xi:include href="xml/purpleprotocolmedia.xml" />
--- a/libpurple/cmds.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/cmds.c Fri Oct 16 02:27:21 2020 -0500
@@ -19,7 +19,7 @@
#include "internal.h"
#include "account.h"
-#include "util.h"
+#include "purplemarkup.h"
#include "cmds.h"
static PurpleCommandsUiOps *cmds_ui_ops = NULL;
--- a/libpurple/conversation.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/conversation.c Fri Oct 16 02:27:21 2020 -0500
@@ -32,11 +32,11 @@
#include "notify.h"
#include "prefs.h"
#include "protocol.h"
+#include "purplemarkup.h"
#include "purpleprivate.h"
#include "request.h"
#include "signals.h"
#include "smiley-list.h"
-#include "util.h"
typedef struct _PurpleConversationPrivate PurpleConversationPrivate;
--- a/libpurple/log.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/log.c Fri Oct 16 02:27:21 2020 -0500
@@ -33,7 +33,7 @@
#include "image-store.h"
#include "log.h"
#include "prefs.h"
-#include "util.h"
+#include "purplemarkup.h"
#include "time.h"
static GSList *loggers = NULL;
--- a/libpurple/meson.build Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/meson.build Fri Oct 16 02:27:21 2020 -0500
@@ -57,6 +57,7 @@
'purplecredentialprovider.c',
'purpleimconversation.c',
'purplekeyvaluepair.c',
+ 'purplemarkup.c',
'purpleprotocolfactory.c',
'purpleprotocolim.c',
'purpleprotocolmedia.c',
@@ -141,6 +142,7 @@
'purpleimconversation.h',
'purpleattachment.h',
'purplekeyvaluepair.h',
+ 'purplemarkup.h',
'purpleprotocolfactory.h',
'purpleprotocolim.h',
'purpleprotocolmedia.h',
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/libpurple/purplemarkup.c Fri Oct 16 02:27:21 2020 -0500
@@ -0,0 +1,1561 @@
+/*
+ * Purple - Internet Messenging Library
+ * Copyright (C) Pidgin Developers <devel@pidgin.im>
+ *
+ * Purple is the legal property of its developers, whose names are too numerous
+ * to list here. Please refer to the COPYRIGHT file distributed with this
+ * source distribution.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "purplemarkup.h"
+
+#include "util.h"
+
+/*
+ * This function is stolen from glib's gmarkup.c and modified to not
+ * replace ' with &apos;
+ */
+static void append_escaped_text(GString *str,
+ const gchar *text, gssize length)
+{
+ const gchar *p;
+ const gchar *end;
+ gunichar c;
+
+ p = text;
+ end = text + length;
+
+ while (p != end)
+ {
+ const gchar *next;
+ next = g_utf8_next_char (p);
+
+ switch (*p)
+ {
+ case '&':
+ g_string_append (str, "&amp;");
+ break;
+
+ case '<':
+ g_string_append (str, "&lt;");
+ break;
+
+ case '>':
+ g_string_append (str, "&gt;");
+ break;
+
+ case '"':
+ g_string_append (str, "&quot;");
+ break;
+
+ default:
+ c = g_utf8_get_char (p);
+ if ((0x1 <= c && c <= 0x8) ||
+ (0xb <= c && c <= 0xc) ||
+ (0xe <= c && c <= 0x1f) ||
+ (0x7f <= c && c <= 0x84) ||
+ (0x86 <= c && c <= 0x9f))
+ g_string_append_printf (str, "&#x%x;", c);
+ else
+ g_string_append_len (str, p, next - p);
+ break;
+ }
+
+ p = next;
+ }
+}
+
+/* This function is stolen from glib's gmarkup.c */
+gchar *purple_markup_escape_text(const gchar *text, gssize length)
+{
+ GString *str;
+
+ g_return_val_if_fail(text != NULL, NULL);
+
+ if (length < 0)
+ length = strlen(text);
+
+ /* prealloc at least as long as original text */
+ str = g_string_sized_new(length);
+ append_escaped_text(str, text, length);
+
+ return g_string_free(str, FALSE);
+}
+
+const char *
+purple_markup_unescape_entity(const char *text, int *length)
+{
+ const char *pln;
+ int len;
+
+ if (!text || *text != '&')
+ return NULL;
+
+#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
+
+ if(IS_ENTITY("&amp;"))
+ pln = "&";
+ else if(IS_ENTITY("&lt;"))
+ pln = "<";
+ else if(IS_ENTITY("&gt;"))
+ pln = ">";
+ else if(IS_ENTITY("&nbsp;"))
+ pln = " ";
+ else if(IS_ENTITY("&copy;"))
+ pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
+ else if(IS_ENTITY("&quot;"))
+ pln = "\"";
+ else if(IS_ENTITY("&reg;"))
+ pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
+ else if(IS_ENTITY("&apos;"))
+ pln = "\'";
+ else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
+ static char buf[7];
+ const char *start = text + 2;
+ char *end;
+ guint64 pound;
+ int base = 10;
+ int buflen;
+
+ if (*start == 'x') {
+ base = 16;
+ start++;
+ }
+
+ pound = g_ascii_strtoull(start, &end, base);
+ if (pound == 0 || pound > INT_MAX || *end != ';') {
+ return NULL;
+ }
+
+ len = (end - text) + 1;
+
+ buflen = g_unichar_to_utf8((gunichar)pound, buf);
+ buf[buflen] = '\0';
+ pln = buf;
+ }
+ else
+ return NULL;
+
+ if (length)
+ *length = len;
+ return pln;
+}
+
+char *
+purple_markup_get_css_property(const gchar *style,
+ const gchar *opt)
+{
+ const gchar *css_str = style;
+ const gchar *css_value_start;
+ const gchar *css_value_end;
+ gchar *tmp;
+ gchar *ret;
+
+ g_return_val_if_fail(opt != NULL, NULL);
+
+ if (!css_str)
+ return NULL;
+
+ /* find the CSS property */
+ while (1)
+ {
+ /* skip whitespace characters */
+ while (*css_str && g_ascii_isspace(*css_str))
+ css_str++;
+ if (!g_ascii_isalpha(*css_str))
+ return NULL;
+ if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
+ {
+ /* go to next css property positioned after the next ';' */
+ while (*css_str && *css_str != '"' && *css_str != ';')
+ css_str++;
+ if(*css_str != ';')
+ return NULL;
+ css_str++;
+ }
+ else
+ break;
+ }
+
+ /* find the CSS value position in the string */
+ css_str += strlen(opt);
+ while (*css_str && g_ascii_isspace(*css_str))
+ css_str++;
+ if (*css_str != ':')
+ return NULL;
+ css_str++;
+ while (*css_str && g_ascii_isspace(*css_str))
+ css_str++;
+ if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
+ return NULL;
+
+ /* mark the CSS value */
+ css_value_start = css_str;
+ while (*css_str && *css_str != '"' && *css_str != ';')
+ css_str++;
+ css_value_end = css_str - 1;
+
+ /* Removes trailing whitespace */
+ while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
+ css_value_end--;
+
+ tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
+ ret = purple_unescape_html(tmp);
+ g_free(tmp);
+
+ return ret;
+}
+
+gboolean purple_markup_is_rtl(const char *html)
+{
+ GData *attributes;
+ const gchar *start, *end;
+ gboolean res = FALSE;
+
+ if (purple_markup_find_tag("span", html, &start, &end, &attributes))
+ {
+ /* tmp is a member of attributes and is free with g_datalist_clear call */
+ const char *tmp = g_datalist_get_data(&attributes, "dir");
+ if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
+ res = TRUE;
+ if (!res)
+ {
+ tmp = g_datalist_get_data(&attributes, "style");
+ if (tmp)
+ {
+ char *tmp2 = purple_markup_get_css_property(tmp, "direction");
+ if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
+ res = TRUE;
+ g_free(tmp2);
+ }
+
+ }
+ g_datalist_clear(&attributes);
+ }
+ return res;
+}
+
+gboolean
+purple_markup_find_tag(const char *needle, const char *haystack,
+ const char **start, const char **end, GData **attributes)
+{
+ GData *attribs;
+ const char *cur = haystack;
+ char *name = NULL;
+ gboolean found = FALSE;
+ gboolean in_tag = FALSE;
+ gboolean in_attr = FALSE;
+ const char *in_quotes = NULL;
+ size_t needlelen;
+
+ g_return_val_if_fail( needle != NULL, FALSE);
+ g_return_val_if_fail( *needle != '\0', FALSE);
+ g_return_val_if_fail( haystack != NULL, FALSE);
+ g_return_val_if_fail( start != NULL, FALSE);
+ g_return_val_if_fail( end != NULL, FALSE);
+ g_return_val_if_fail(attributes != NULL, FALSE);
+
+ needlelen = strlen(needle);
+ g_datalist_init(&attribs);
+
+ while (*cur && !found) {
+ if (in_tag) {
+ if (in_quotes) {
+ const char *close = cur;
+
+ while (*close && *close != *in_quotes)
+ close++;
+
+ /* if we got the close quote, store the value and carry on from *
+ * after it. if we ran to the end of the string, point to the NULL *
+ * and we're outta here */
+ if (*close) {
+ /* only store a value if we have an attribute name */
+ if (name) {
+ size_t len = close - cur;
+ char *val = g_strndup(cur, len);
+
+ g_datalist_set_data_full(&attribs, name, val, g_free);
+ g_free(name);
+ name = NULL;
+ }
+
+ in_quotes = NULL;
+ cur = close + 1;
+ } else {
+ cur = close;
+ }
+ } else if (in_attr) {
+ const char *close = cur;
+
+ while (*close && *close != '>' && *close != '"' &&
+ *close != '\'' && *close != ' ' && *close != '=')
+ close++;
+
+ /* if we got the equals, store the name of the attribute. if we got
+ * the quote, save the attribute and go straight to quote mode.
+ * otherwise the tag closed or we reached the end of the string,
+ * so we can get outta here */
+ switch (*close) {
+ case '"':
+ case '\'':
+ in_quotes = close;
+ /* fall through */
+ case '=':
+ {
+ size_t len = close - cur;
+
+ /* don't store a blank attribute name */
+ if (len) {
+ g_free(name);
+ name = g_ascii_strdown(cur, len);
+ }
+
+ in_attr = FALSE;
+ cur = close + 1;
+ }
+ break;
+ case ' ':
+ case '>':
+ in_attr = FALSE;
+ /* fall through */
+ default:
+ cur = close;
+ break;
+ }
+ } else {
+ switch (*cur) {
+ case ' ':
+ /* swallow extra spaces inside tag */
+ while (*cur && *cur == ' ') cur++;
+ in_attr = TRUE;
+ break;
+ case '>':
+ found = TRUE;
+ *end = cur;
+ break;
+ case '"':
+ case '\'':
+ in_quotes = cur;
+ /* fall through */
+ default:
+ cur++;
+ break;
+ }
+ }
+ } else {
+ /* if we hit a < followed by the name of our tag... */
+ if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
+ *start = cur;
+ cur = cur + needlelen + 1;
+
+ /* if we're pointing at a space or a >, we found the right tag. if *
+ * we're not, we've found a longer tag, so we need to skip to the *
+ * >, but not being distracted by >s inside quotes. */
+ if (*cur == ' ' || *cur == '>') {
+ in_tag = TRUE;
+ } else {
+ while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
+ if (*cur == '"') {
+ cur++;
+ while (*cur && *cur != '"')
+ cur++;
+ } else if (*cur == '\'') {
+ cur++;
+ while (*cur && *cur != '\'')
+ cur++;
+ } else {
+ cur++;
+ }
+ }
+ }
+ } else {
+ cur++;
+ }
+ }
+ }
+
+ /* clean up any attribute name from a premature termination */
+ g_free(name);
+
+ if (found) {
+ *attributes = attribs;
+ } else {
+ *start = NULL;
+ *end = NULL;
+ *attributes = NULL;
+ }
+
+ return found;
+}
+
+struct purple_parse_tag {
+ char *src_tag;
+ char *dest_tag;
+ gboolean ignore;
+};
+
+/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
+ recommended in the GCC docs). It contains 'continue's that should
+ affect the while-loop in purple_markup_html_to_xhtml and doing the
+ above would break that.
+ Also, remember to put braces in constructs that require them for
+ multiple statements when using this macro. */
+#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
+ const char *o = c + strlen("<" x); \
+ const char *p = NULL, *q = NULL, *r = NULL; \
+ /* o = iterating over full tag \
+ * p = > (end of tag) \
+ * q = start of quoted bit \
+ * r = < inside tag \
+ */ \
+ GString *innards = g_string_new(""); \
+ while(o && *o) { \
+ if(!q && (*o == '\"' || *o == '\'') ) { \
+ q = o; \
+ } else if(q) { \
+ if(*o == *q) { /* end of quoted bit */ \
+ char *unescaped = g_strndup(q+1, o-q-1); \
+ char *escaped = g_markup_escape_text(unescaped, -1); \
+ g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
+ g_free(unescaped); \
+ g_free(escaped); \
+ q = NULL; \
+ } else if(*c == '\\') { \
+ o++; \
+ } \
+ } else if(*o == '<') { \
+ r = o; \
+ } else if(*o == '>') { \
+ p = o; \
+ break; \
+ } else { \
+ innards = g_string_append_c(innards, *o); \
+ } \
+ o++; \
+ } \
+ if(p && !r) { /* got an end of tag and no other < earlier */\
+ if(*(p-1) != '/') { \
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
+ pt->src_tag = x; \
+ pt->dest_tag = y; \
+ tags = g_list_prepend(tags, pt); \
+ } \
+ if(xhtml) { \
+ xhtml = g_string_append(xhtml, "<" y); \
+ xhtml = g_string_append(xhtml, innards->str); \
+ xhtml = g_string_append_c(xhtml, '>'); \
+ } \
+ c = p + 1; \
+ } else { /* got end of tag with earlier < *or* didn't get anything */ \
+ if(xhtml) \
+ xhtml = g_string_append(xhtml, "&lt;"); \
+ if(plain) \
+ plain = g_string_append_c(plain, '<'); \
+ c++; \
+ } \
+ g_string_free(innards, TRUE); \
+ continue; \
+ } \
+ if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
+ (*(c+strlen("<" x)) == '>' || \
+ !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
+ if(xhtml) \
+ xhtml = g_string_append(xhtml, "<" y); \
+ c += strlen("<" x); \
+ if(*c != '/') { \
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
+ pt->src_tag = x; \
+ pt->dest_tag = y; \
+ tags = g_list_prepend(tags, pt); \
+ if(xhtml) \
+ xhtml = g_string_append_c(xhtml, '>'); \
+ } else { \
+ if(xhtml) \
+ xhtml = g_string_append(xhtml, "/>");\
+ } \
+ c = strchr(c, '>') + 1; \
+ continue; \
+ }
+/* Don't forget to check the note above for ALLOW_TAG_ALT. */
+#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
+void
+purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
+ char **plain_out)
+{
+ GString *xhtml = NULL;
+ GString *plain = NULL;
+ GString *url = NULL;
+ GString *cdata = NULL;
+ GList *tags = NULL, *tag;
+ const char *c = html;
+ char quote = '\0';
+
+#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
+ quote = *(ptr++); \
+ else \
+ quote = '\0';
+
+#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
+
+ g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
+
+ if(xhtml_out)
+ xhtml = g_string_new("");
+ if(plain_out)
+ plain = g_string_new("");
+
+ while(c && *c) {
+ if(*c == '<') {
+ if(*(c+1) == '/') { /* closing tag */
+ tag = tags;
+ while(tag) {
+ struct purple_parse_tag *pt = tag->data;
+ if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
+ c += strlen(pt->src_tag) + 3;
+ break;
+ }
+ tag = tag->next;
+ }
+ if(tag) {
+ while(tags) {
+ struct purple_parse_tag *pt = tags->data;
+ if(xhtml && !pt->ignore)
+ g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
+ if(plain && purple_strequal(pt->src_tag, "a")) {
+ /* if this is a link, we have to add the url to the plaintext, too */
+ if (cdata && url &&
+ (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
+ g_utf8_collate(url->str + 7, cdata->str) != 0)))
+ g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
+ if (cdata) {
+ g_string_free(cdata, TRUE);
+ cdata = NULL;
+ }
+
+ }
+ if(tags == tag)
+ break;
+ tags = g_list_delete_link(tags, tags);
+ g_free(pt);
+ }
+ g_free(tag->data);
+ tags = g_list_delete_link(tags, tag);
+ } else {
+ /* a closing tag we weren't expecting...
+ * we'll let it slide, if it's really a tag...if it's
+ * just a </ we'll escape it properly */
+ const char *end = c+2;
+ while(*end && g_ascii_isalpha(*end))
+ end++;
+ if(*end == '>') {
+ c = end+1;
+ } else {
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "&lt;");
+ if(plain)
+ plain = g_string_append_c(plain, '<');
+ c++;
+ }
+ }
+ } else { /* opening tag */
+ ALLOW_TAG("blockquote");
+ ALLOW_TAG("cite");
+ ALLOW_TAG("div");
+ ALLOW_TAG("em");
+ ALLOW_TAG("h1");
+ ALLOW_TAG("h2");
+ ALLOW_TAG("h3");
+ ALLOW_TAG("h4");
+ ALLOW_TAG("h5");
+ ALLOW_TAG("h6");
+ /* we only allow html to start the message */
+ if(c == html) {
+ ALLOW_TAG("html");
+ }
+ ALLOW_TAG_ALT("i", "em");
+ ALLOW_TAG_ALT("italic", "em");
+ ALLOW_TAG("li");
+ ALLOW_TAG("ol");
+ ALLOW_TAG("p");
+ ALLOW_TAG("pre");
+ ALLOW_TAG("q");
+ ALLOW_TAG("span");
+ ALLOW_TAG("ul");
+
+
+ /* we skip <HR> because it's not legal in XHTML-IM. However,
+ * we still want to send something sensible, so we put a
+ * linebreak in its place. <BR> also needs special handling
+ * because putting a </BR> to close it would just be dumb. */
+ if((!g_ascii_strncasecmp(c, "<br", 3)
+ || !g_ascii_strncasecmp(c, "<hr", 3))
+ && (*(c+3) == '>' ||
+ !g_ascii_strncasecmp(c+3, "/>", 2) ||
+ !g_ascii_strncasecmp(c+3, " />", 3))) {
+ c = strchr(c, '>') + 1;
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "<br/>");
+ if(plain && *c != '\n')
+ plain = g_string_append_c(plain, '\n');
+ continue;
+ }
+ if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+ if (*(c+2) == '>')
+ pt->src_tag = "b";
+ else if (*(c+2) == 'o')
+ pt->src_tag = "bold";
+ else
+ pt->src_tag = "strong";
+ pt->dest_tag = "span";
+ tags = g_list_prepend(tags, pt);
+ c = strchr(c, '>') + 1;
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
+ continue;
+ }
+ if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+ pt->src_tag = *(c+2) == '>' ? "u" : "underline";
+ pt->dest_tag = "span";
+ tags = g_list_prepend(tags, pt);
+ c = strchr(c, '>') + 1;
+ if (xhtml)
+ xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
+ continue;
+ }
+ if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+ pt->src_tag = *(c+2) == '>' ? "s" : "strike";
+ pt->dest_tag = "span";
+ tags = g_list_prepend(tags, pt);
+ c = strchr(c, '>') + 1;
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
+ continue;
+ }
+ if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+ pt->src_tag = "sub";
+ pt->dest_tag = "span";
+ tags = g_list_prepend(tags, pt);
+ c = strchr(c, '>') + 1;
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
+ continue;
+ }
+ if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+ pt->src_tag = "sup";
+ pt->dest_tag = "span";
+ tags = g_list_prepend(tags, pt);
+ c = strchr(c, '>') + 1;
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
+ continue;
+ }
+ if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
+ const char *p = c + 4;
+ GString *src = NULL, *alt = NULL;
+#define ESCAPE(from, to) \
+ CHECK_QUOTE(from); \
+ while (VALID_CHAR(from)) { \
+ int len; \
+ if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
+ to = g_string_append(to, "&amp;"); \
+ else if (*from == '\'') \
+ to = g_string_append(to, "&apos;"); \
+ else \
+ to = g_string_append_c(to, *from); \
+ from++; \
+ }
+
+ while (*p && *p != '>') {
+ if (!g_ascii_strncasecmp(p, "src=", 4)) {
+ const char *q = p + 4;
+ if (src)
+ g_string_free(src, TRUE);
+ src = g_string_new("");
+ ESCAPE(q, src);
+ p = q;
+ } else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
+ const char *q = p + 4;
+ if (alt)
+ g_string_free(alt, TRUE);
+ alt = g_string_new("");
+ ESCAPE(q, alt);
+ p = q;
+ } else {
+ p++;
+ }
+ }
+#undef ESCAPE
+ if ((c = strchr(p, '>')) != NULL)
+ c++;
+ else
+ c = p;
+ /* src and alt are required! */
+ if(src && xhtml)
+ g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
+ if(alt) {
+ if(plain)
+ plain = g_string_append(plain, purple_unescape_html(alt->str));
+ if(!src && xhtml)
+ xhtml = g_string_append(xhtml, alt->str);
+ g_string_free(alt, TRUE);
+ }
+ g_string_free(src, TRUE);
+ continue;
+ }
+ if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
+ const char *p = c + 2;
+ struct purple_parse_tag *pt;
+ while (*p && *p != '>') {
+ if (!g_ascii_strncasecmp(p, "href=", 5)) {
+ const char *q = p + 5;
+ if (url)
+ g_string_free(url, TRUE);
+ url = g_string_new("");
+ if (cdata)
+ g_string_free(cdata, TRUE);
+ cdata = g_string_new("");
+ CHECK_QUOTE(q);
+ while (VALID_CHAR(q)) {
+ int len;
+ if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
+ url = g_string_append(url, "&amp;");
+ else if (*q == '"')
+ url = g_string_append(url, "&quot;");
+ else
+ url = g_string_append_c(url, *q);
+ q++;
+ }
+ p = q;
+ } else {
+ p++;
+ }
+ }
+ if ((c = strchr(p, '>')) != NULL)
+ c++;
+ else
+ c = p;
+ pt = g_new0(struct purple_parse_tag, 1);
+ pt->src_tag = "a";
+ pt->dest_tag = "a";
+ tags = g_list_prepend(tags, pt);
+ if(xhtml)
+ g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
+ continue;
+ }
+#define ESCAPE(from, to) \
+ CHECK_QUOTE(from); \
+ while (VALID_CHAR(from)) { \
+ int len; \
+ if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
+ to = g_string_append(to, "&amp;"); \
+ else if (*from == '\'') \
+ to = g_string_append_c(to, '\"'); \
+ else \
+ to = g_string_append_c(to, *from); \
+ from++; \
+ }
+ if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
+ const char *p = c + 5;
+ GString *style = g_string_new("");
+ struct purple_parse_tag *pt;
+ while (*p && *p != '>') {
+ if (!g_ascii_strncasecmp(p, "back=", 5)) {
+ const char *q = p + 5;
+ GString *color = g_string_new("");
+ ESCAPE(q, color);
+ g_string_append_printf(style, "background: %s; ", color->str);
+ g_string_free(color, TRUE);
+ p = q;
+ } else if (!g_ascii_strncasecmp(p, "color=", 6)) {
+ const char *q = p + 6;
+ GString *color = g_string_new("");
+ ESCAPE(q, color);
+ g_string_append_printf(style, "color: %s; ", color->str);
+ g_string_free(color, TRUE);
+ p = q;
+ } else if (!g_ascii_strncasecmp(p, "face=", 5)) {
+ const char *q = p + 5;
+ GString *face = g_string_new("");
+ ESCAPE(q, face);
+ g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
+ g_string_free(face, TRUE);
+ p = q;
+ } else if (!g_ascii_strncasecmp(p, "size=", 5)) {
+ const char *q = p + 5;
+ int sz;
+ const char *size = "medium";
+ CHECK_QUOTE(q);
+ sz = atoi(q);
+ switch (sz)
+ {
+ case 1:
+ size = "xx-small";
+ break;
+ case 2:
+ size = "small";
+ break;
+ case 3:
+ size = "medium";
+ break;
+ case 4:
+ size = "large";
+ break;
+ case 5:
+ size = "x-large";
+ break;
+ case 6:
+ case 7:
+ size = "xx-large";
+ break;
+ default:
+ break;
+ }
+ g_string_append_printf(style, "font-size: %s; ", size);
+ p = q;
+ } else {
+ p++;
+ }
+ }
+ if ((c = strchr(p, '>')) != NULL)
+ c++;
+ else
+ c = p;
+ pt = g_new0(struct purple_parse_tag, 1);
+ pt->src_tag = "font";
+ pt->dest_tag = "span";
+ tags = g_list_prepend(tags, pt);
+ if(style->len && xhtml)
+ g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
+ else
+ pt->ignore = TRUE;
+ g_string_free(style, TRUE);
+ continue;
+ }
+#undef ESCAPE
+ if (!g_ascii_strncasecmp(c, "<body ", 6)) {
+ const char *p = c + 6;
+ gboolean did_something = FALSE;
+ while (*p && *p != '>') {
+ if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
+ const char *q = p + 8;
+ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+ GString *color = g_string_new("");
+ CHECK_QUOTE(q);
+ while (VALID_CHAR(q)) {
+ color = g_string_append_c(color, *q);
+ q++;
+ }
+ if (xhtml)
+ g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
+ g_string_free(color, TRUE);
+ if ((c = strchr(p, '>')) != NULL)
+ c++;
+ else
+ c = p;
+ pt->src_tag = "body";
+ pt->dest_tag = "span";
+ tags = g_list_prepend(tags, pt);
+ did_something = TRUE;
+ break;
+ }
+ p++;
+ }
+ if (did_something) continue;
+ }
+ /* this has to come after the special case for bgcolor */
+ ALLOW_TAG("body");
+ if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
+ char *p = strstr(c + strlen("<!--"), "-->");
+ if(p) {
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "<!--");
+ c += strlen("<!--");
+ continue;
+ }
+ }
+
+ if(xhtml)
+ xhtml = g_string_append(xhtml, "&lt;");
+ if(plain)
+ plain = g_string_append_c(plain, '<');
+ c++;
+ }
+ } else if(*c == '&') {
+ char buf[7];
+ const char *pln;
+ int len;
+
+ if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
+ len = 1;
+ g_snprintf(buf, sizeof(buf), "%c", *c);
+ pln = buf;
+ }
+ if(xhtml)
+ xhtml = g_string_append_len(xhtml, c, len);
+ if(plain)
+ plain = g_string_append(plain, pln);
+ if(cdata)
+ cdata = g_string_append_len(cdata, c, len);
+ c += len;
+ } else {
+ if(xhtml)
+ xhtml = g_string_append_c(xhtml, *c);
+ if(plain)
+ plain = g_string_append_c(plain, *c);
+ if(cdata)
+ cdata = g_string_append_c(cdata, *c);
+ c++;
+ }
+ }
+ if(xhtml) {
+ for (tag = tags; tag ; tag = tag->next) {
+ struct purple_parse_tag *pt = tag->data;
+ if(!pt->ignore)
+ g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
+ }
+ }
+ g_list_free(tags);
+ if(xhtml_out)
+ *xhtml_out = g_string_free(xhtml, FALSE);
+ if(plain_out)
+ *plain_out = g_string_free(plain, FALSE);
+ if(url)
+ g_string_free(url, TRUE);
+ if (cdata)
+ g_string_free(cdata, TRUE);
+#undef CHECK_QUOTE
+#undef VALID_CHAR
+}
+
+/* The following are probably reasonable changes:
+ * - \n should be converted to a normal space
+ * - in addition to <br>, <p> and <div> etc. should also be converted into \n
+ * - We want to turn </td>#whitespace<td> sequences into a single tab
+ * - We want to turn </tr>#whitespace<tr> sequences into a single \n
+ * - <script>...</script> and <style>...</style> should be completely removed
+ */
+
+char *
+purple_markup_strip_html(const char *str)
+{
+ int i, j, k, entlen;
+ gboolean visible = TRUE;
+ gboolean closing_td_p = FALSE;
+ gchar *str2;
+ const gchar *cdata_close_tag = NULL, *ent;
+ gchar *href = NULL;
+ int href_st = 0;
+
+ if(!str)
+ return NULL;
+
+ str2 = g_strdup(str);
+
+ for (i = 0, j = 0; str2[i]; i++)
+ {
+ if (str2[i] == '<')
+ {
+ if (cdata_close_tag)
+ {
+ /* Note: Don't even assume any other tag is a tag in CDATA */
+ if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
+ strlen(cdata_close_tag)) == 0)
+ {
+ i += strlen(cdata_close_tag) - 1;
+ cdata_close_tag = NULL;
+ }
+ continue;
+ }
+ else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
+ {
+ str2[j++] = '\t';
+ visible = TRUE;
+ }
+ else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
+ {
+ closing_td_p = TRUE;
+ visible = FALSE;
+ }
+ else
+ {
+ closing_td_p = FALSE;
+ visible = TRUE;
+ }
+
+ k = i + 1;
+
+ if(g_ascii_isspace(str2[k]))
+ visible = TRUE;
+ else if (str2[k])
+ {
+ /* Scan until we end the tag either implicitly (closed start
+ * tag) or explicitly, using a sloppy method (i.e., < or >
+ * inside quoted attributes will screw us up)
+ */
+ while (str2[k] && str2[k] != '<' && str2[k] != '>')
+ {
+ k++;
+ }
+
+ /* If we've got an <a> tag with an href, save the address
+ * to print later. */
+ if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
+ g_ascii_isspace(str2[i+2]))
+ {
+ int st; /* start of href, inclusive [ */
+ int end; /* end of href, exclusive ) */
+ char delim = ' ';
+ /* Find start of href */
+ for (st = i + 3; st < k; st++)
+ {
+ if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
+ {
+ st += 5;
+ if (str2[st] == '"' || str2[st] == '\'')
+ {
+ delim = str2[st];
+ st++;
+ }
+ break;
+ }
+ }
+ /* find end of address */
+ for (end = st; end < k && str2[end] != delim; end++)
+ {
+ /* All the work is done in the loop construct above. */
+ }
+
+ /* If there's an address, save it. If there was
+ * already one saved, kill it. */
+ if (st < k)
+ {
+ char *tmp;
+ g_free(href);
+ tmp = g_strndup(str2 + st, end - st);
+ href = purple_unescape_html(tmp);
+ g_free(tmp);
+ href_st = j;
+ }
+ }
+
+ /* Replace </a> with an ascii representation of the
+ * address the link was pointing to. */
+ else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
+ {
+ size_t hrlen = strlen(href);
+
+ /* Only insert the href if it's different from the CDATA. */
+ if ((hrlen != (gsize)(j - href_st) ||
+ strncmp(str2 + href_st, href, hrlen)) &&
+ (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
+ strncmp(str2 + href_st, href + 7, hrlen - 7)))
+ {
+ str2[j++] = ' ';
+ str2[j++] = '(';
+ memmove(str2 + j, href, hrlen);
+ j += hrlen;
+ str2[j++] = ')';
+ g_free(href);
+ href = NULL;
+ }
+ }
+
+ /* Check for tags which should be mapped to newline (but ignore some of
+ * the tags at the beginning of the text) */
+ else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
+ || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
+ || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
+ || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
+ || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
+ || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
+ || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
+ {
+ str2[j++] = '\n';
+ }
+ /* Check for tags which begin CDATA and need to be closed */
+ else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
+ {
+ cdata_close_tag = "</script>";
+ }
+ else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
+ {
+ cdata_close_tag = "</style>";
+ }
+ /* Update the index and continue checking after the tag */
+ i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
+ continue;
+ }
+ }
+ else if (cdata_close_tag)
+ {
+ continue;
+ }
+ else if (!g_ascii_isspace(str2[i]))
+ {
+ visible = TRUE;
+ }
+
+ if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
+ {
+ while (*ent)
+ str2[j++] = *ent++;
+ i += entlen - 1;
+ continue;
+ }
+
+ if (visible)
+ str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
+ }
+
+ g_free(href);
+
+ str2[j] = '\0';
+
+ return str2;
+}
+
+static gboolean
+badchar(char c)
+{
+ switch (c) {
+ case ' ':
+ case ',':
+ case '\0':
+ case '\n':
+ case '\r':
+ case '<':
+ case '>':
+ case '"':
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static gboolean
+badentity(const char *c)
+{
+ if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
+ !g_ascii_strncasecmp(c, "&gt;", 4) ||
+ !g_ascii_strncasecmp(c, "&quot;", 6)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static const char *
+process_link(GString *ret,
+ const char *start, const char *c,
+ int matchlen,
+ const char *urlprefix,
+ int inside_paren)
+{
+ char *url_buf, *tmpurlbuf;
+ const char *t;
+
+ for (t = c;; t++) {
+ if (!badchar(*t) && !badentity(t))
+ continue;
+
+ if (t - c == matchlen)
+ break;
+
+ if (*t == ',' && *(t + 1) != ' ') {
+ continue;
+ }
+
+ if (t > start && *(t - 1) == '.')
+ t--;
+ if (t > start && *(t - 1) == ')' && inside_paren > 0)
+ t--;
+
+ url_buf = g_strndup(c, t - c);
+ tmpurlbuf = purple_unescape_html(url_buf);
+ g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
+ urlprefix,
+ tmpurlbuf, url_buf);
+ g_free(tmpurlbuf);
+ g_free(url_buf);
+ return t;
+ }
+
+ return c;
+}
+
+char *
+purple_markup_linkify(const char *text)
+{
+ const char *c, *t, *q = NULL;
+ char *tmpurlbuf, *url_buf;
+ gunichar g;
+ gboolean inside_html = FALSE;
+ int inside_paren = 0;
+ GString *ret;
+
+ if (text == NULL)
+ return NULL;
+
+ ret = g_string_new("");
+
+ c = text;
+ while (*c) {
+
+ if(*c == '(' && !inside_html) {
+ inside_paren++;
+ ret = g_string_append_c(ret, *c);
+ c++;
+ }
+
+ if(inside_html) {
+ if(*c == '>') {
+ inside_html = FALSE;
+ } else if(!q && (*c == '\"' || *c == '\'')) {
+ q = c;
+ } else if(q) {
+ if(*c == *q)
+ q = NULL;
+ }
+ } else if(*c == '<') {
+ inside_html = TRUE;
+ if (!g_ascii_strncasecmp(c, "<A", 2)) {
+ while (1) {
+ if (!g_ascii_strncasecmp(c, "/A>", 3)) {
+ inside_html = FALSE;
+ break;
+ }
+ ret = g_string_append_c(ret, *c);
+ c++;
+ if (!(*c))
+ break;
+ }
+ }
+ } else if (!g_ascii_strncasecmp(c, "http://", 7)) {
+ c = process_link(ret, text, c, 7, "", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "https://", 8)) {
+ c = process_link(ret, text, c, 8, "", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
+ c = process_link(ret, text, c, 6, "", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
+ c = process_link(ret, text, c, 7, "", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "file://", 7)) {
+ c = process_link(ret, text, c, 7, "", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
+ c = process_link(ret, text, c, 4, "http://", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
+ c = process_link(ret, text, c, 4, "ftp://", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
+ c = process_link(ret, text, c, 5, "", inside_paren);
+ } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
+ t = c;
+ while (1) {
+ if (badchar(*t) || badentity(t)) {
+ char *d;
+ if (t - c == 7) {
+ break;
+ }
+ if (t > text && *(t - 1) == '.')
+ t--;
+ if ((d = strstr(c + 7, "?")) != NULL && d < t)
+ url_buf = g_strndup(c + 7, d - c - 7);
+ else
+ url_buf = g_strndup(c + 7, t - c - 7);
+ if (!purple_email_is_valid(url_buf)) {
+ g_free(url_buf);
+ break;
+ }
+ g_free(url_buf);
+ url_buf = g_strndup(c, t - c);
+ tmpurlbuf = purple_unescape_html(url_buf);
+ g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
+ tmpurlbuf, url_buf);
+ g_free(url_buf);
+ g_free(tmpurlbuf);
+ c = t;
+ break;
+ }
+ t++;
+ }
+ } else if (c != text && (*c == '@')) {
+ int flag;
+ GString *gurl_buf = NULL;
+ const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
+
+ if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
+ flag = 0;
+ else {
+ flag = 1;
+ gurl_buf = g_string_new("");
+ }
+
+ t = c;
+ while (flag) {
+ /* iterate backwards grabbing the local part of an email address */
+ g = g_utf8_get_char(t);
+ if (badchar(*t) || (g >= 127) || (*t == '(') ||
+ ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
+ !g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
+ (t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
+ /* local part will already be part of ret, strip it out */
+ ret = g_string_truncate(ret, ret->len - (c - t));
+ ret = g_string_append_unichar(ret, g);
+ break;
+ } else {
+ g_string_prepend_unichar(gurl_buf, g);
+ t = g_utf8_find_prev_char(text, t);
+ if (t < text) {
+ ret = g_string_assign(ret, "");
+ break;
+ }
+ }
+ }
+
+ t = g_utf8_find_next_char(c, NULL);
+
+ while (flag) {
+ /* iterate forwards grabbing the domain part of an email address */
+ g = g_utf8_get_char(t);
+ if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
+ char *d;
+
+ url_buf = g_string_free(gurl_buf, FALSE);
+ gurl_buf = NULL;
+
+ /* strip off trailing periods */
+ if (*url_buf) {
+ for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
+ *d = '\0';
+ }
+
+ tmpurlbuf = purple_unescape_html(url_buf);
+ if (purple_email_is_valid(tmpurlbuf)) {
+ g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
+ tmpurlbuf, url_buf);
+ } else {
+ g_string_append(ret, url_buf);
+ }
+ g_free(url_buf);
+ g_free(tmpurlbuf);
+ c = t;
+
+ break;
+ } else {
+ g_string_append_unichar(gurl_buf, g);
+ t = g_utf8_find_next_char(t, NULL);
+ }
+ }
+
+ if (gurl_buf) {
+ g_string_free(gurl_buf, TRUE);
+ }
+ }
+
+ if(*c == ')' && !inside_html) {
+ inside_paren--;
+ ret = g_string_append_c(ret, *c);
+ c++;
+ }
+
+ if (*c == 0)
+ break;
+
+ ret = g_string_append_c(ret, *c);
+ c++;
+
+ }
+ return g_string_free(ret, FALSE);
+}
+
+char *purple_unescape_text(const char *in)
+{
+ GString *ret;
+ const char *c = in;
+
+ if (in == NULL)
+ return NULL;
+
+ ret = g_string_new("");
+ while (*c) {
+ int len;
+ const char *ent;
+
+ if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
+ g_string_append(ret, ent);
+ c += len;
+ } else {
+ g_string_append_c(ret, *c);
+ c++;
+ }
+ }
+
+ return g_string_free(ret, FALSE);
+}
+
+char *purple_unescape_html(const char *html)
+{
+ GString *ret;
+ const char *c = html;
+
+ if (html == NULL)
+ return NULL;
+
+ ret = g_string_new("");
+ while (*c) {
+ int len;
+ const char *ent;
+
+ if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
+ g_string_append(ret, ent);
+ c += len;
+ } else if (!strncmp(c, "<br>", 4)) {
+ g_string_append_c(ret, '\n');
+ c += 4;
+ } else {
+ g_string_append_c(ret, *c);
+ c++;
+ }
+ }
+
+ return g_string_free(ret, FALSE);
+}
+
+char *
+purple_markup_slice(const char *str, guint x, guint y)
+{
+ GString *ret;
+ GQueue *q;
+ guint z = 0;
+ gboolean appended = FALSE;
+ gunichar c;
+ char *tag;
+
+ g_return_val_if_fail(str != NULL, NULL);
+ g_return_val_if_fail(x <= y, NULL);
+
+ if (x == y)
+ return g_strdup("");
+
+ ret = g_string_new("");
+ q = g_queue_new();
+
+ while (*str && (z < y)) {
+ c = g_utf8_get_char(str);
+
+ if (c == '<') {
+ char *end = strchr(str, '>');
+
+ if (!end) {
+ g_string_free(ret, TRUE);
+ while ((tag = g_queue_pop_head(q)))
+ g_free(tag);
+ g_queue_free(q);
+ return NULL;
+ }
+
+ if (!g_ascii_strncasecmp(str, "<img ", 5)) {
+ z += strlen("[Image]");
+ } else if (!g_ascii_strncasecmp(str, "<br", 3)) {
+ z += 1;
+ } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
+ z += strlen("\n---\n");
+ } else if (!g_ascii_strncasecmp(str, "</", 2)) {
+ /* pop stack */
+ char *tmp;
+
+ tmp = g_queue_pop_head(q);
+ g_free(tmp);
+ /* z += 0; */
+ } else {
+ /* push it unto the stack */
+ char *tmp;
+
+ tmp = g_strndup(str, end - str + 1);
+ g_queue_push_head(q, tmp);
+ /* z += 0; */
+ }
+
+ if (z >= x) {
+ g_string_append_len(ret, str, end - str + 1);
+ }
+
+ str = end;
+ } else if (c == '&') {
+ char *end = strchr(str, ';');
+ if (!end) {
+ g_string_free(ret, TRUE);
+ while ((tag = g_queue_pop_head(q)))
+ g_free(tag);
+ g_queue_free(q);
+
+ return NULL;
+ }
+
+ if (z >= x)
+ g_string_append_len(ret, str, end - str + 1);
+
+ z++;
+ str = end;
+ } else {
+ if (z == x && z > 0 && !appended) {
+ GList *l = q->tail;
+
+ while (l) {
+ tag = l->data;
+ g_string_append(ret, tag);
+ l = l->prev;
+ }
+ appended = TRUE;
+ }
+
+ if (z >= x)
+ g_string_append_unichar(ret, c);
+ z++;
+ }
+
+ str = g_utf8_next_char(str);
+ }
+
+ while ((tag = g_queue_pop_head(q))) {
+ char *name;
+
+ name = purple_markup_get_tag_name(tag);
+ g_string_append_printf(ret, "</%s>", name);
+ g_free(name);
+ g_free(tag);
+ }
+
+ g_queue_free(q);
+ return g_string_free(ret, FALSE);
+}
+
+char *
+purple_markup_get_tag_name(const char *tag)
+{
+ int i;
+ g_return_val_if_fail(tag != NULL, NULL);
+ g_return_val_if_fail(*tag == '<', NULL);
+
+ for (i = 1; tag[i]; i++)
+ if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
+ break;
+
+ return g_strndup(tag+1, i-1);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/libpurple/purplemarkup.h Fri Oct 16 02:27:21 2020 -0500
@@ -0,0 +1,232 @@
+/*
+ * Purple - Internet Messenging Library
+ * Copyright (C) Pidgin Developers <devel@pidgin.im>
+ *
+ * Purple is the legal property of its developers, whose names are too numerous
+ * to list here. Please refer to the COPYRIGHT file distributed with this
+ * source distribution.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#if !defined(PURPLE_GLOBAL_HEADER_INSIDE) && !defined(PURPLE_COMPILATION)
+# error "only <pidgin.h> may be included directly"
+#endif
+
+#ifndef PURPLE_MARKUP_H
+#define PURPLE_MARKUP_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION:purplemarkup
+ * @section_id: libpurple-purplemarkup
+ * @title: Text Markup API
+ */
+
+/**
+ * purple_markup_escape_text:
+ * @text: The text to escape
+ * @length: The length of the text, or -1 if #NULL terminated
+ *
+ * Escapes special characters in a plain-text string so they display
+ * correctly as HTML. For example, &amp; is replaced with &amp;amp; and &lt; is
+ * replaced with &amp;lt;
+ *
+ * This is exactly the same as g_markup_escape_text(), except that it
+ * does not change ' to &amp;apos; because &amp;apos; is not a valid HTML 4 entity,
+ * and is displayed literally in IE7.
+ */
+gchar *purple_markup_escape_text(const gchar *text, gssize length);
+
+/**
+ * purple_markup_find_tag:
+ * @needle: The name of the tag
+ * @haystack: The null-delimited string to search in
+ * @start: A pointer to the start of the tag if found
+ * @end: A pointer to the end of the tag if found
+ * @attributes: The attributes, if the tag was found. This should
+ * be freed with g_datalist_clear().
+ *
+ * Finds an HTML tag matching the given name.
+ *
+ * This locates an HTML tag's start and end, and stores its attributes
+ * in a GData hash table. The names of the attributes are lower-cased
+ * in the hash table, and the name of the tag is case insensitive.
+ *
+ * Returns: TRUE if the tag was found
+ */
+gboolean purple_markup_find_tag(const char *needle, const char *haystack,
+ const char **start, const char **end,
+ GData **attributes);
+
+/**
+ * purple_markup_html_to_xhtml:
+ * @html: The HTML markup.
+ * @dest_xhtml: The destination XHTML output.
+ * @dest_plain: The destination plain-text output.
+ *
+ * Converts HTML markup to XHTML.
+ */
+void purple_markup_html_to_xhtml(const char *html, char **dest_xhtml,
+ char **dest_plain);
+
+/**
+ * purple_markup_strip_html:
+ * @str: The string to strip HTML from.
+ *
+ * Strips HTML tags from a string.
+ *
+ * Returns: The new string without HTML. You must g_free this string
+ * when finished with it.
+ */
+char *purple_markup_strip_html(const char *str);
+
+/**
+ * purple_markup_linkify:
+ * @str: The string to linkify.
+ *
+ * Adds the necessary HTML code to turn URIs into HTML links in a string.
+ *
+ * Returns: The new string with all URIs surrounded in standard
+ * HTML &lt;a href="whatever"&gt;&lt;/a&gt; tags. You must g_free()
+ * this string when finished with it.
+ */
+char *purple_markup_linkify(const char *str);
+
+/**
+ * purple_unescape_text:
+ * @text: The string in which to unescape any HTML entities
+ *
+ * Unescapes HTML entities to their literal characters in the text.
+ * For example "&amp;amp;" is replaced by '&amp;' and so on. Also converts
+ * numerical entities (e.g. "&amp;\#38;" is also '&amp;').
+ *
+ * This function currently supports the following named entities:
+ * "&amp;amp;", "&amp;lt;", "&amp;gt;", "&amp;copy;", "&amp;quot;",
+ * "&amp;reg;", "&amp;apos;"
+ *
+ * purple_unescape_html() is similar, but also converts "&lt;br&gt;" into "\n".
+ *
+ * See purple_unescape_html()
+ *
+ * Returns: The text with HTML entities literalized. You must g_free
+ * this string when finished with it.
+ */
+char *purple_unescape_text(const char *text);
+
+/**
+ * purple_unescape_html:
+ * @html: The string in which to unescape any HTML entities
+ *
+ * Unescapes HTML entities to their literal characters and converts
+ * "&lt;br&gt;" to "\n". See purple_unescape_text() for more details.
+ *
+ * See purple_unescape_text()
+ *
+ * Returns: The text with HTML entities literalized. You must g_free
+ * this string when finished with it.
+ */
+char *purple_unescape_html(const char *html);
+
+/**
+ * purple_markup_slice:
+ * @str: The input NUL terminated, HTML, UTF-8 (or ASCII) string.
+ * @x: The character offset into an unformatted version of str to
+ * begin at.
+ * @y: The character offset (into an unformatted vesion of str) of
+ * one past the last character to include in the slice.
+ *
+ * Returns a newly allocated substring of the HTML UTF-8 string "str".
+ * The markup is preserved such that the substring will have the same
+ * formatting as original string, even though some tags may have been
+ * opened before "x", or may close after "y". All open tags are closed
+ * at the end of the returned string, in the proper order.
+ *
+ * Note that x and y are in character offsets, not byte offsets, and
+ * are offsets into an unformatted version of str. Because of this,
+ * this function may be sensitive to changes in GtkIMHtml and may break
+ * when used with other UI's. libpurple users are encouraged to report and
+ * work out any problems encountered.
+ *
+ * Returns: The HTML slice of string, with all formatting retained.
+ */
+char *purple_markup_slice(const char *str, guint x, guint y);
+
+/**
+ * purple_markup_get_tag_name:
+ * @tag: The string starting a HTML tag.
+ *
+ * Returns a newly allocated string containing the name of the tag
+ * located at "tag". Tag is expected to point to a '<', and contain
+ * a '>' sometime after that. If there is no '>' and the string is
+ * not NUL terminated, this function can be expected to segfault.
+ *
+ * Returns: A string containing the name of the tag.
+ */
+char *purple_markup_get_tag_name(const char *tag);
+
+/**
+ * purple_markup_unescape_entity:
+ * @text: A string containing an HTML entity.
+ * @length: If not %NULL, the string length of the entity is stored in this location.
+ *
+ * Returns a constant string of the character representation of the HTML
+ * entity pointed to by @text. For example, purple_markup_unescape_entity("&amp;amp;")
+ * will return "&amp;". The @text variable is expected to point to an '&amp;',
+ * the first character of the entity. If given an unrecognized entity, the function
+ * returns %NULL.
+ *
+ * Note that this function, unlike purple_unescape_html(), does not search
+ * the string for the entity, does not replace the entity, and does not
+ * return a newly allocated string.
+ *
+ * Returns: A constant string containing the character representation of the given entity.
+ */
+const char * purple_markup_unescape_entity(const char *text, int *length);
+
+/**
+ * purple_markup_get_css_property:
+ * @style: A string containing the inline CSS text.
+ * @opt: The requested CSS property.
+ *
+ * Returns a newly allocated string containing the value of the CSS property specified
+ * in opt. The @style argument is expected to point to a HTML inline CSS.
+ * The function will seek for the CSS property and return its value.
+ *
+ * For example, purple_markup_get_css_property("direction:rtl;color:#dc4d1b;",
+ * "color") would return "#dc4d1b".
+ *
+ * On error or if the requested property was not found, the function returns
+ * %NULL.
+ *
+ * Returns: The value of the requested CSS property.
+ */
+char * purple_markup_get_css_property(const gchar *style, const gchar *opt);
+
+/**
+ * purple_markup_is_rtl:
+ * @html: The HTML text.
+ *
+ * Check if the given HTML contains RTL text.
+ *
+ * Returns: TRUE if the text contains RTL text, FALSE otherwise.
+ */
+gboolean purple_markup_is_rtl(const char *html);
+
+G_END_DECLS
+
+#endif /* PURPLE_MARKUP_H */
--- a/libpurple/request.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/request.c Fri Oct 16 02:27:21 2020 -0500
@@ -24,6 +24,7 @@
#include "internal.h"
#include "notify.h"
+#include "purplemarkup.h"
#include "request.h"
#include "debug.h"
#include "purplekeyvaluepair.h"
--- a/libpurple/savedstatuses.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/savedstatuses.c Fri Oct 16 02:27:21 2020 -0500
@@ -26,10 +26,10 @@
#include "debug.h"
#include "idle.h"
#include "notify.h"
+#include "purplemarkup.h"
#include "savedstatuses.h"
#include "request.h"
#include "status.h"
-#include "util.h"
#include "xmlnode.h"
/*
--- a/libpurple/tests/meson.build Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/tests/meson.build Fri Oct 16 02:27:21 2020 -0500
@@ -5,6 +5,7 @@
'credential_provider',
'image',
'keyvaluepair',
+ 'markup',
'protocol_action',
'protocol_attention',
'protocol_xfer',
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/libpurple/tests/test_markup.c Fri Oct 16 02:27:21 2020 -0500
@@ -0,0 +1,234 @@
+/*
+ * Purple - Internet Messenging Library
+ * Copyright (C) Pidgin Developers <devel@pidgin.im>
+ *
+ * Purple is the legal property of its developers, whose names are too numerous
+ * to list here. Please refer to the COPYRIGHT file distributed with this
+ * source distribution.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+
+#include <purple.h>
+
+typedef struct {
+ gchar *markup;
+ gchar *xhtml;
+ gchar *plaintext;
+} MarkupTestData;
+
+static void
+test_util_markup_html_to_xhtml(void) {
+ gint i;
+ MarkupTestData data[] = {
+ {
+ "<a>",
+ "<a href=\"\"></a>",
+ "",
+ }, {
+ "<A href='URL'>ABOUT</a>",
+ "<a href=\"URL\">ABOUT</a>",
+ "ABOUT <URL>",
+ }, {
+ "<a href='URL'>URL</a>",
+ "<a href=\"URL\">URL</a>",
+ "URL",
+ }, {
+ "<a href='mailto:mail'>mail</a>",
+ "<a href=\"mailto:mail\">mail</a>",
+ "mail",
+ }, {
+ "<A href='\"U&apos;R&L'>ABOUT</a>",
+ "<a href=\"&quot;U&apos;R&amp;L\">ABOUT</a>",
+ "ABOUT <\"U'R&L>",
+ }, {
+ "<img src='SRC' alt='ALT'/>",
+ "<img src='SRC' alt='ALT' />",
+ "ALT",
+ }, {
+ "<img src=\"'S&apos;R&C\" alt=\"'A&apos;L&T\"/>",
+ "<img src='&apos;S&apos;R&amp;C' alt='&apos;A&apos;L&amp;T' />",
+ "'A'L&T",
+ }, {
+ "<unknown>",
+ "&lt;unknown>",
+ "<unknown>",
+ }, {
+ "&eacute;&amp;",
+ "&eacute;&amp;",
+ "&eacute;&",
+ }, {
+ "<h1>A<h2>B</h2>C</h1>",
+ "<h1>A<h2>B</h2>C</h1>",
+ "ABC",
+ }, {
+ "<h1><h2><h3><h4>",
+ "<h1><h2><h3><h4></h4></h3></h2></h1>",
+ "",
+ }, {
+ "<italic/>",
+ "<em/>",
+ "",
+ }, {
+ "</",
+ "&lt;/",
+ "</",
+ }, {
+ "</div>",
+ "",
+ "",
+ }, {
+ "<hr/>",
+ "<br/>",
+ "\n",
+ }, {
+ "<hr>",
+ "<br/>",
+ "\n",
+ }, {
+ "<br />",
+ "<br/>",
+ "\n",
+ }, {
+ "<br>INSIDE</br>",
+ "<br/>INSIDE",
+ "\nINSIDE",
+ }, {
+ "<div></div>",
+ "<div></div>",
+ "",
+ }, {
+ "<div/>",
+ "<div/>",
+ "",
+ }, {
+ "<div attr='\"&<>'/>",
+ "<div attr='&quot;&amp;&lt;&gt;'/>",
+ "",
+ }, {
+ "<div attr=\"'\"/>",
+ "<div attr=\"&apos;\"/>",
+ "",
+ }, {
+ "<div/> < <div/>",
+ "<div/> &lt; <div/>",
+ " < ",
+ }, {
+ "<div>x</div>",
+ "<div>x</div>",
+ "x",
+ }, {
+ "<b>x</b>",
+ "<span style='font-weight: bold;'>x</span>",
+ "x",
+ }, {
+ "<bold>x</bold>",
+ "<span style='font-weight: bold;'>x</span>",
+ "x",
+ }, {
+ "<strong>x</strong>",
+ "<span style='font-weight: bold;'>x</span>",
+ "x",
+ }, {
+ "<u>x</u>",
+ "<span style='text-decoration: underline;'>x</span>",
+ "x",
+ }, {
+ "<underline>x</underline>",
+ "<span style='text-decoration: underline;'>x</span>",
+ "x",
+ }, {
+ "<s>x</s>",
+ "<span style='text-decoration: line-through;'>x</span>",
+ "x",
+ }, {
+ "<strike>x</strike>",
+ "<span style='text-decoration: line-through;'>x</span>",
+ "x",
+ }, {
+ "<sub>x</sub>",
+ "<span style='vertical-align:sub;'>x</span>",
+ "x",
+ }, {
+ "<sup>x</sup>",
+ "<span style='vertical-align:super;'>x</span>",
+ "x",
+ }, {
+ "<FONT>x</FONT>",
+ "x",
+ "x",
+ }, {
+ "<font face=\"'Times&gt;New & Roman'\">x</font>",
+ "<span style='font-family: \"Times&gt;New &amp; Roman\";'>x</span>",
+ "x",
+ }, {
+ "<font back=\"'color&gt;blue&red'\">x</font>",
+ "<span style='background: \"color&gt;blue&amp;red\";'>x</span>",
+ "x",
+ }, {
+ "<font color=\"'color&gt;blue&red'\">x</font>",
+ "<span style='color: \"color&gt;blue&amp;red\";'>x</span>",
+ "x",
+ }, {
+ "<font size=1>x</font>",
+ "<span style='font-size: xx-small;'>x</span>",
+ "x",
+ }, {
+ "<font size=432>x</font>",
+ "<span style='font-size: medium;'>x</span>",
+ "x",
+ }, {
+ "<!--COMMENT-->",
+ "<!--COMMENT-->",
+ "COMMENT-->",
+ }, {
+ "<br />",
+ "&lt;br />",
+ "<br />",
+ }, {
+ "<hr />",
+ "&lt;hr />",
+ "<hr />"
+ }, {
+ NULL, NULL, NULL,
+ }
+ };
+
+ for(i = 0; data[i].markup; i++) {
+ gchar *xhtml = NULL, *plaintext = NULL;
+
+ purple_markup_html_to_xhtml(data[i].markup, &xhtml, &plaintext);
+
+ g_assert_cmpstr(data[i].xhtml, ==, xhtml);
+ g_free(xhtml);
+
+ g_assert_cmpstr(data[i].plaintext, ==, plaintext);
+ g_free(plaintext);
+ }
+}
+
+/******************************************************************************
+ * Main
+ *****************************************************************************/
+gint
+main(gint argc, gchar **argv) {
+ g_test_init(&argc, &argv, NULL);
+
+ g_test_add_func("/util/markup/html to xhtml",
+ test_util_markup_html_to_xhtml);
+
+ return g_test_run();
+}
\ No newline at end of file
--- a/libpurple/tests/test_util.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/tests/test_util.c Fri Oct 16 02:27:21 2020 -0500
@@ -196,205 +196,6 @@
}
/******************************************************************************
- * Markup tests
- *****************************************************************************/
-typedef struct {
- gchar *markup;
- gchar *xhtml;
- gchar *plaintext;
-} MarkupTestData;
-
-static void
-test_util_markup_html_to_xhtml(void) {
- gint i;
- MarkupTestData data[] = {
- {
- "<a>",
- "<a href=\"\"></a>",
- "",
- }, {
- "<A href='URL'>ABOUT</a>",
- "<a href=\"URL\">ABOUT</a>",
- "ABOUT <URL>",
- }, {
- "<a href='URL'>URL</a>",
- "<a href=\"URL\">URL</a>",
- "URL",
- }, {
- "<a href='mailto:mail'>mail</a>",
- "<a href=\"mailto:mail\">mail</a>",
- "mail",
- }, {
- "<A href='\"U&apos;R&L'>ABOUT</a>",
- "<a href=\"&quot;U&apos;R&amp;L\">ABOUT</a>",
- "ABOUT <\"U'R&L>",
- }, {
- "<img src='SRC' alt='ALT'/>",
- "<img src='SRC' alt='ALT' />",
- "ALT",
- }, {
- "<img src=\"'S&apos;R&C\" alt=\"'A&apos;L&T\"/>",
- "<img src='&apos;S&apos;R&amp;C' alt='&apos;A&apos;L&amp;T' />",
- "'A'L&T",
- }, {
- "<unknown>",
- "&lt;unknown>",
- "<unknown>",
- }, {
- "&eacute;&amp;",
- "&eacute;&amp;",
- "&eacute;&",
- }, {
- "<h1>A<h2>B</h2>C</h1>",
- "<h1>A<h2>B</h2>C</h1>",
- "ABC",
- }, {
- "<h1><h2><h3><h4>",
- "<h1><h2><h3><h4></h4></h3></h2></h1>",
- "",
- }, {
- "<italic/>",
- "<em/>",
- "",
- }, {
- "</",
- "&lt;/",
- "</",
- }, {
- "</div>",
- "",
- "",
- }, {
- "<hr/>",
- "<br/>",
- "\n",
- }, {
- "<hr>",
- "<br/>",
- "\n",
- }, {
- "<br />",
- "<br/>",
- "\n",
- }, {
- "<br>INSIDE</br>",
- "<br/>INSIDE",
- "\nINSIDE",
- }, {
- "<div></div>",
- "<div></div>",
- "",
- }, {
- "<div/>",
- "<div/>",
- "",
- }, {
- "<div attr='\"&<>'/>",
- "<div attr='&quot;&amp;&lt;&gt;'/>",
- "",
- }, {
- "<div attr=\"'\"/>",
- "<div attr=\"&apos;\"/>",
- "",
- }, {
- "<div/> < <div/>",
- "<div/> &lt; <div/>",
- " < ",
- }, {
- "<div>x</div>",
- "<div>x</div>",
- "x",
- }, {
- "<b>x</b>",
- "<span style='font-weight: bold;'>x</span>",
- "x",
- }, {
- "<bold>x</bold>",
- "<span style='font-weight: bold;'>x</span>",
- "x",
- }, {
- "<strong>x</strong>",
- "<span style='font-weight: bold;'>x</span>",
- "x",
- }, {
- "<u>x</u>",
- "<span style='text-decoration: underline;'>x</span>",
- "x",
- }, {
- "<underline>x</underline>",
- "<span style='text-decoration: underline;'>x</span>",
- "x",
- }, {
- "<s>x</s>",
- "<span style='text-decoration: line-through;'>x</span>",
- "x",
- }, {
- "<strike>x</strike>",
- "<span style='text-decoration: line-through;'>x</span>",
- "x",
- }, {
- "<sub>x</sub>",
- "<span style='vertical-align:sub;'>x</span>",
- "x",
- }, {
- "<sup>x</sup>",
- "<span style='vertical-align:super;'>x</span>",
- "x",
- }, {
- "<FONT>x</FONT>",
- "x",
- "x",
- }, {
- "<font face=\"'Times&gt;New & Roman'\">x</font>",
- "<span style='font-family: \"Times&gt;New &amp; Roman\";'>x</span>",
- "x",
- }, {
- "<font back=\"'color&gt;blue&red'\">x</font>",
- "<span style='background: \"color&gt;blue&amp;red\";'>x</span>",
- "x",
- }, {
- "<font color=\"'color&gt;blue&red'\">x</font>",
- "<span style='color: \"color&gt;blue&amp;red\";'>x</span>",
- "x",
- }, {
- "<font size=1>x</font>",
- "<span style='font-size: xx-small;'>x</span>",
- "x",
- }, {
- "<font size=432>x</font>",
- "<span style='font-size: medium;'>x</span>",
- "x",
- }, {
- "<!--COMMENT-->",
- "<!--COMMENT-->",
- "COMMENT-->",
- }, {
- "<br />",
- "&lt;br />",
- "<br />",
- }, {
- "<hr />",
- "&lt;hr />",
- "<hr />"
- }, {
- NULL, NULL, NULL,
- }
- };
-
- for(i = 0; data[i].markup; i++) {
- gchar *xhtml = NULL, *plaintext = NULL;
-
- purple_markup_html_to_xhtml(data[i].markup, &xhtml, &plaintext);
-
- g_assert_cmpstr(data[i].xhtml, ==, xhtml);
- g_free(xhtml);
-
- g_assert_cmpstr(data[i].plaintext, ==, plaintext);
- g_free(plaintext);
- }
-}
-
-/******************************************************************************
* UTF8 tests
*****************************************************************************/
typedef struct {
@@ -515,9 +316,6 @@
g_test_add_func("/util/str to date time",
test_util_str_to_date_time);
- g_test_add_func("/util/markup/html to xhtml",
- test_util_markup_html_to_xhtml);
-
g_test_add_func("/util/utf8/strip unprintables",
test_util_utf8_strip_unprintables);
--- a/libpurple/util.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/util.c Fri Oct 16 02:27:21 2020 -0500
@@ -449,1546 +449,6 @@
}
/**************************************************************************
- * Markup Functions
- **************************************************************************/
-
-/*
- * This function is stolen from glib's gmarkup.c and modified to not
- * replace ' with &apos;
- */
-static void append_escaped_text(GString *str,
- const gchar *text, gssize length)
-{
- const gchar *p;
- const gchar *end;
- gunichar c;
-
- p = text;
- end = text + length;
-
- while (p != end)
- {
- const gchar *next;
- next = g_utf8_next_char (p);
-
- switch (*p)
- {
- case '&':
- g_string_append (str, "&amp;");
- break;
-
- case '<':
- g_string_append (str, "&lt;");
- break;
-
- case '>':
- g_string_append (str, "&gt;");
- break;
-
- case '"':
- g_string_append (str, "&quot;");
- break;
-
- default:
- c = g_utf8_get_char (p);
- if ((0x1 <= c && c <= 0x8) ||
- (0xb <= c && c <= 0xc) ||
- (0xe <= c && c <= 0x1f) ||
- (0x7f <= c && c <= 0x84) ||
- (0x86 <= c && c <= 0x9f))
- g_string_append_printf (str, "&#x%x;", c);
- else
- g_string_append_len (str, p, next - p);
- break;
- }
-
- p = next;
- }
-}
-
-/* This function is stolen from glib's gmarkup.c */
-gchar *purple_markup_escape_text(const gchar *text, gssize length)
-{
- GString *str;
-
- g_return_val_if_fail(text != NULL, NULL);
-
- if (length < 0)
- length = strlen(text);
-
- /* prealloc at least as long as original text */
- str = g_string_sized_new(length);
- append_escaped_text(str, text, length);
-
- return g_string_free(str, FALSE);
-}
-
-const char *
-purple_markup_unescape_entity(const char *text, int *length)
-{
- const char *pln;
- int len;
-
- if (!text || *text != '&')
- return NULL;
-
-#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
-
- if(IS_ENTITY("&amp;"))
- pln = "&";
- else if(IS_ENTITY("&lt;"))
- pln = "<";
- else if(IS_ENTITY("&gt;"))
- pln = ">";
- else if(IS_ENTITY("&nbsp;"))
- pln = " ";
- else if(IS_ENTITY("&copy;"))
- pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
- else if(IS_ENTITY("&quot;"))
- pln = "\"";
- else if(IS_ENTITY("&reg;"))
- pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
- else if(IS_ENTITY("&apos;"))
- pln = "\'";
- else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
- static char buf[7];
- const char *start = text + 2;
- char *end;
- guint64 pound;
- int base = 10;
- int buflen;
-
- if (*start == 'x') {
- base = 16;
- start++;
- }
-
- pound = g_ascii_strtoull(start, &end, base);
- if (pound == 0 || pound > INT_MAX || *end != ';') {
- return NULL;
- }
-
- len = (end - text) + 1;
-
- buflen = g_unichar_to_utf8((gunichar)pound, buf);
- buf[buflen] = '\0';
- pln = buf;
- }
- else
- return NULL;
-
- if (length)
- *length = len;
- return pln;
-}
-
-char *
-purple_markup_get_css_property(const gchar *style,
- const gchar *opt)
-{
- const gchar *css_str = style;
- const gchar *css_value_start;
- const gchar *css_value_end;
- gchar *tmp;
- gchar *ret;
-
- g_return_val_if_fail(opt != NULL, NULL);
-
- if (!css_str)
- return NULL;
-
- /* find the CSS property */
- while (1)
- {
- /* skip whitespace characters */
- while (*css_str && g_ascii_isspace(*css_str))
- css_str++;
- if (!g_ascii_isalpha(*css_str))
- return NULL;
- if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
- {
- /* go to next css property positioned after the next ';' */
- while (*css_str && *css_str != '"' && *css_str != ';')
- css_str++;
- if(*css_str != ';')
- return NULL;
- css_str++;
- }
- else
- break;
- }
-
- /* find the CSS value position in the string */
- css_str += strlen(opt);
- while (*css_str && g_ascii_isspace(*css_str))
- css_str++;
- if (*css_str != ':')
- return NULL;
- css_str++;
- while (*css_str && g_ascii_isspace(*css_str))
- css_str++;
- if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
- return NULL;
-
- /* mark the CSS value */
- css_value_start = css_str;
- while (*css_str && *css_str != '"' && *css_str != ';')
- css_str++;
- css_value_end = css_str - 1;
-
- /* Removes trailing whitespace */
- while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
- css_value_end--;
-
- tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
- ret = purple_unescape_html(tmp);
- g_free(tmp);
-
- return ret;
-}
-
-gboolean purple_markup_is_rtl(const char *html)
-{
- GData *attributes;
- const gchar *start, *end;
- gboolean res = FALSE;
-
- if (purple_markup_find_tag("span", html, &start, &end, &attributes))
- {
- /* tmp is a member of attributes and is free with g_datalist_clear call */
- const char *tmp = g_datalist_get_data(&attributes, "dir");
- if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
- res = TRUE;
- if (!res)
- {
- tmp = g_datalist_get_data(&attributes, "style");
- if (tmp)
- {
- char *tmp2 = purple_markup_get_css_property(tmp, "direction");
- if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
- res = TRUE;
- g_free(tmp2);
- }
-
- }
- g_datalist_clear(&attributes);
- }
- return res;
-}
-
-gboolean
-purple_markup_find_tag(const char *needle, const char *haystack,
- const char **start, const char **end, GData **attributes)
-{
- GData *attribs;
- const char *cur = haystack;
- char *name = NULL;
- gboolean found = FALSE;
- gboolean in_tag = FALSE;
- gboolean in_attr = FALSE;
- const char *in_quotes = NULL;
- size_t needlelen;
-
- g_return_val_if_fail( needle != NULL, FALSE);
- g_return_val_if_fail( *needle != '\0', FALSE);
- g_return_val_if_fail( haystack != NULL, FALSE);
- g_return_val_if_fail( start != NULL, FALSE);
- g_return_val_if_fail( end != NULL, FALSE);
- g_return_val_if_fail(attributes != NULL, FALSE);
-
- needlelen = strlen(needle);
- g_datalist_init(&attribs);
-
- while (*cur && !found) {
- if (in_tag) {
- if (in_quotes) {
- const char *close = cur;
-
- while (*close && *close != *in_quotes)
- close++;
-
- /* if we got the close quote, store the value and carry on from *
- * after it. if we ran to the end of the string, point to the NULL *
- * and we're outta here */
- if (*close) {
- /* only store a value if we have an attribute name */
- if (name) {
- size_t len = close - cur;
- char *val = g_strndup(cur, len);
-
- g_datalist_set_data_full(&attribs, name, val, g_free);
- g_free(name);
- name = NULL;
- }
-
- in_quotes = NULL;
- cur = close + 1;
- } else {
- cur = close;
- }
- } else if (in_attr) {
- const char *close = cur;
-
- while (*close && *close != '>' && *close != '"' &&
- *close != '\'' && *close != ' ' && *close != '=')
- close++;
-
- /* if we got the equals, store the name of the attribute. if we got
- * the quote, save the attribute and go straight to quote mode.
- * otherwise the tag closed or we reached the end of the string,
- * so we can get outta here */
- switch (*close) {
- case '"':
- case '\'':
- in_quotes = close;
- /* fall through */
- case '=':
- {
- size_t len = close - cur;
-
- /* don't store a blank attribute name */
- if (len) {
- g_free(name);
- name = g_ascii_strdown(cur, len);
- }
-
- in_attr = FALSE;
- cur = close + 1;
- }
- break;
- case ' ':
- case '>':
- in_attr = FALSE;
- /* fall through */
- default:
- cur = close;
- break;
- }
- } else {
- switch (*cur) {
- case ' ':
- /* swallow extra spaces inside tag */
- while (*cur && *cur == ' ') cur++;
- in_attr = TRUE;
- break;
- case '>':
- found = TRUE;
- *end = cur;
- break;
- case '"':
- case '\'':
- in_quotes = cur;
- /* fall through */
- default:
- cur++;
- break;
- }
- }
- } else {
- /* if we hit a < followed by the name of our tag... */
- if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
- *start = cur;
- cur = cur + needlelen + 1;
-
- /* if we're pointing at a space or a >, we found the right tag. if *
- * we're not, we've found a longer tag, so we need to skip to the *
- * >, but not being distracted by >s inside quotes. */
- if (*cur == ' ' || *cur == '>') {
- in_tag = TRUE;
- } else {
- while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
- if (*cur == '"') {
- cur++;
- while (*cur && *cur != '"')
- cur++;
- } else if (*cur == '\'') {
- cur++;
- while (*cur && *cur != '\'')
- cur++;
- } else {
- cur++;
- }
- }
- }
- } else {
- cur++;
- }
- }
- }
-
- /* clean up any attribute name from a premature termination */
- g_free(name);
-
- if (found) {
- *attributes = attribs;
- } else {
- *start = NULL;
- *end = NULL;
- *attributes = NULL;
- }
-
- return found;
-}
-
-struct purple_parse_tag {
- char *src_tag;
- char *dest_tag;
- gboolean ignore;
-};
-
-/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
- recommended in the GCC docs). It contains 'continue's that should
- affect the while-loop in purple_markup_html_to_xhtml and doing the
- above would break that.
- Also, remember to put braces in constructs that require them for
- multiple statements when using this macro. */
-#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
- const char *o = c + strlen("<" x); \
- const char *p = NULL, *q = NULL, *r = NULL; \
- /* o = iterating over full tag \
- * p = > (end of tag) \
- * q = start of quoted bit \
- * r = < inside tag \
- */ \
- GString *innards = g_string_new(""); \
- while(o && *o) { \
- if(!q && (*o == '\"' || *o == '\'') ) { \
- q = o; \
- } else if(q) { \
- if(*o == *q) { /* end of quoted bit */ \
- char *unescaped = g_strndup(q+1, o-q-1); \
- char *escaped = g_markup_escape_text(unescaped, -1); \
- g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
- g_free(unescaped); \
- g_free(escaped); \
- q = NULL; \
- } else if(*c == '\\') { \
- o++; \
- } \
- } else if(*o == '<') { \
- r = o; \
- } else if(*o == '>') { \
- p = o; \
- break; \
- } else { \
- innards = g_string_append_c(innards, *o); \
- } \
- o++; \
- } \
- if(p && !r) { /* got an end of tag and no other < earlier */\
- if(*(p-1) != '/') { \
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
- pt->src_tag = x; \
- pt->dest_tag = y; \
- tags = g_list_prepend(tags, pt); \
- } \
- if(xhtml) { \
- xhtml = g_string_append(xhtml, "<" y); \
- xhtml = g_string_append(xhtml, innards->str); \
- xhtml = g_string_append_c(xhtml, '>'); \
- } \
- c = p + 1; \
- } else { /* got end of tag with earlier < *or* didn't get anything */ \
- if(xhtml) \
- xhtml = g_string_append(xhtml, "&lt;"); \
- if(plain) \
- plain = g_string_append_c(plain, '<'); \
- c++; \
- } \
- g_string_free(innards, TRUE); \
- continue; \
- } \
- if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
- (*(c+strlen("<" x)) == '>' || \
- !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
- if(xhtml) \
- xhtml = g_string_append(xhtml, "<" y); \
- c += strlen("<" x); \
- if(*c != '/') { \
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
- pt->src_tag = x; \
- pt->dest_tag = y; \
- tags = g_list_prepend(tags, pt); \
- if(xhtml) \
- xhtml = g_string_append_c(xhtml, '>'); \
- } else { \
- if(xhtml) \
- xhtml = g_string_append(xhtml, "/>");\
- } \
- c = strchr(c, '>') + 1; \
- continue; \
- }
-/* Don't forget to check the note above for ALLOW_TAG_ALT. */
-#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
-void
-purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
- char **plain_out)
-{
- GString *xhtml = NULL;
- GString *plain = NULL;
- GString *url = NULL;
- GString *cdata = NULL;
- GList *tags = NULL, *tag;
- const char *c = html;
- char quote = '\0';
-
-#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
- quote = *(ptr++); \
- else \
- quote = '\0';
-
-#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
-
- g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
-
- if(xhtml_out)
- xhtml = g_string_new("");
- if(plain_out)
- plain = g_string_new("");
-
- while(c && *c) {
- if(*c == '<') {
- if(*(c+1) == '/') { /* closing tag */
- tag = tags;
- while(tag) {
- struct purple_parse_tag *pt = tag->data;
- if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
- c += strlen(pt->src_tag) + 3;
- break;
- }
- tag = tag->next;
- }
- if(tag) {
- while(tags) {
- struct purple_parse_tag *pt = tags->data;
- if(xhtml && !pt->ignore)
- g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
- if(plain && purple_strequal(pt->src_tag, "a")) {
- /* if this is a link, we have to add the url to the plaintext, too */
- if (cdata && url &&
- (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
- g_utf8_collate(url->str + 7, cdata->str) != 0)))
- g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
- if (cdata) {
- g_string_free(cdata, TRUE);
- cdata = NULL;
- }
-
- }
- if(tags == tag)
- break;
- tags = g_list_delete_link(tags, tags);
- g_free(pt);
- }
- g_free(tag->data);
- tags = g_list_delete_link(tags, tag);
- } else {
- /* a closing tag we weren't expecting...
- * we'll let it slide, if it's really a tag...if it's
- * just a </ we'll escape it properly */
- const char *end = c+2;
- while(*end && g_ascii_isalpha(*end))
- end++;
- if(*end == '>') {
- c = end+1;
- } else {
- if(xhtml)
- xhtml = g_string_append(xhtml, "&lt;");
- if(plain)
- plain = g_string_append_c(plain, '<');
- c++;
- }
- }
- } else { /* opening tag */
- ALLOW_TAG("blockquote");
- ALLOW_TAG("cite");
- ALLOW_TAG("div");
- ALLOW_TAG("em");
- ALLOW_TAG("h1");
- ALLOW_TAG("h2");
- ALLOW_TAG("h3");
- ALLOW_TAG("h4");
- ALLOW_TAG("h5");
- ALLOW_TAG("h6");
- /* we only allow html to start the message */
- if(c == html) {
- ALLOW_TAG("html");
- }
- ALLOW_TAG_ALT("i", "em");
- ALLOW_TAG_ALT("italic", "em");
- ALLOW_TAG("li");
- ALLOW_TAG("ol");
- ALLOW_TAG("p");
- ALLOW_TAG("pre");
- ALLOW_TAG("q");
- ALLOW_TAG("span");
- ALLOW_TAG("ul");
-
-
- /* we skip <HR> because it's not legal in XHTML-IM. However,
- * we still want to send something sensible, so we put a
- * linebreak in its place. <BR> also needs special handling
- * because putting a </BR> to close it would just be dumb. */
- if((!g_ascii_strncasecmp(c, "<br", 3)
- || !g_ascii_strncasecmp(c, "<hr", 3))
- && (*(c+3) == '>' ||
- !g_ascii_strncasecmp(c+3, "/>", 2) ||
- !g_ascii_strncasecmp(c+3, " />", 3))) {
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<br/>");
- if(plain && *c != '\n')
- plain = g_string_append_c(plain, '\n');
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- if (*(c+2) == '>')
- pt->src_tag = "b";
- else if (*(c+2) == 'o')
- pt->src_tag = "bold";
- else
- pt->src_tag = "strong";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = *(c+2) == '>' ? "u" : "underline";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if (xhtml)
- xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = *(c+2) == '>' ? "s" : "strike";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "sub";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
- continue;
- }
- if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "sup";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- c = strchr(c, '>') + 1;
- if(xhtml)
- xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
- continue;
- }
- if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
- const char *p = c + 4;
- GString *src = NULL, *alt = NULL;
-#define ESCAPE(from, to) \
- CHECK_QUOTE(from); \
- while (VALID_CHAR(from)) { \
- int len; \
- if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
- to = g_string_append(to, "&amp;"); \
- else if (*from == '\'') \
- to = g_string_append(to, "&apos;"); \
- else \
- to = g_string_append_c(to, *from); \
- from++; \
- }
-
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "src=", 4)) {
- const char *q = p + 4;
- if (src)
- g_string_free(src, TRUE);
- src = g_string_new("");
- ESCAPE(q, src);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
- const char *q = p + 4;
- if (alt)
- g_string_free(alt, TRUE);
- alt = g_string_new("");
- ESCAPE(q, alt);
- p = q;
- } else {
- p++;
- }
- }
-#undef ESCAPE
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- /* src and alt are required! */
- if(src && xhtml)
- g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
- if(alt) {
- if(plain)
- plain = g_string_append(plain, purple_unescape_html(alt->str));
- if(!src && xhtml)
- xhtml = g_string_append(xhtml, alt->str);
- g_string_free(alt, TRUE);
- }
- g_string_free(src, TRUE);
- continue;
- }
- if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
- const char *p = c + 2;
- struct purple_parse_tag *pt;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "href=", 5)) {
- const char *q = p + 5;
- if (url)
- g_string_free(url, TRUE);
- url = g_string_new("");
- if (cdata)
- g_string_free(cdata, TRUE);
- cdata = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
- int len;
- if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
- url = g_string_append(url, "&amp;");
- else if (*q == '"')
- url = g_string_append(url, "&quot;");
- else
- url = g_string_append_c(url, *q);
- q++;
- }
- p = q;
- } else {
- p++;
- }
- }
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "a";
- pt->dest_tag = "a";
- tags = g_list_prepend(tags, pt);
- if(xhtml)
- g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
- continue;
- }
-#define ESCAPE(from, to) \
- CHECK_QUOTE(from); \
- while (VALID_CHAR(from)) { \
- int len; \
- if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
- to = g_string_append(to, "&amp;"); \
- else if (*from == '\'') \
- to = g_string_append_c(to, '\"'); \
- else \
- to = g_string_append_c(to, *from); \
- from++; \
- }
- if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
- const char *p = c + 5;
- GString *style = g_string_new("");
- struct purple_parse_tag *pt;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "back=", 5)) {
- const char *q = p + 5;
- GString *color = g_string_new("");
- ESCAPE(q, color);
- g_string_append_printf(style, "background: %s; ", color->str);
- g_string_free(color, TRUE);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "color=", 6)) {
- const char *q = p + 6;
- GString *color = g_string_new("");
- ESCAPE(q, color);
- g_string_append_printf(style, "color: %s; ", color->str);
- g_string_free(color, TRUE);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "face=", 5)) {
- const char *q = p + 5;
- GString *face = g_string_new("");
- ESCAPE(q, face);
- g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
- g_string_free(face, TRUE);
- p = q;
- } else if (!g_ascii_strncasecmp(p, "size=", 5)) {
- const char *q = p + 5;
- int sz;
- const char *size = "medium";
- CHECK_QUOTE(q);
- sz = atoi(q);
- switch (sz)
- {
- case 1:
- size = "xx-small";
- break;
- case 2:
- size = "small";
- break;
- case 3:
- size = "medium";
- break;
- case 4:
- size = "large";
- break;
- case 5:
- size = "x-large";
- break;
- case 6:
- case 7:
- size = "xx-large";
- break;
- default:
- break;
- }
- g_string_append_printf(style, "font-size: %s; ", size);
- p = q;
- } else {
- p++;
- }
- }
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- pt = g_new0(struct purple_parse_tag, 1);
- pt->src_tag = "font";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- if(style->len && xhtml)
- g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
- else
- pt->ignore = TRUE;
- g_string_free(style, TRUE);
- continue;
- }
-#undef ESCAPE
- if (!g_ascii_strncasecmp(c, "<body ", 6)) {
- const char *p = c + 6;
- gboolean did_something = FALSE;
- while (*p && *p != '>') {
- if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
- const char *q = p + 8;
- struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
- GString *color = g_string_new("");
- CHECK_QUOTE(q);
- while (VALID_CHAR(q)) {
- color = g_string_append_c(color, *q);
- q++;
- }
- if (xhtml)
- g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
- g_string_free(color, TRUE);
- if ((c = strchr(p, '>')) != NULL)
- c++;
- else
- c = p;
- pt->src_tag = "body";
- pt->dest_tag = "span";
- tags = g_list_prepend(tags, pt);
- did_something = TRUE;
- break;
- }
- p++;
- }
- if (did_something) continue;
- }
- /* this has to come after the special case for bgcolor */
- ALLOW_TAG("body");
- if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
- char *p = strstr(c + strlen("<!--"), "-->");
- if(p) {
- if(xhtml)
- xhtml = g_string_append(xhtml, "<!--");
- c += strlen("<!--");
- continue;
- }
- }
-
- if(xhtml)
- xhtml = g_string_append(xhtml, "&lt;");
- if(plain)
- plain = g_string_append_c(plain, '<');
- c++;
- }
- } else if(*c == '&') {
- char buf[7];
- const char *pln;
- int len;
-
- if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
- len = 1;
- g_snprintf(buf, sizeof(buf), "%c", *c);
- pln = buf;
- }
- if(xhtml)
- xhtml = g_string_append_len(xhtml, c, len);
- if(plain)
- plain = g_string_append(plain, pln);
- if(cdata)
- cdata = g_string_append_len(cdata, c, len);
- c += len;
- } else {
- if(xhtml)
- xhtml = g_string_append_c(xhtml, *c);
- if(plain)
- plain = g_string_append_c(plain, *c);
- if(cdata)
- cdata = g_string_append_c(cdata, *c);
- c++;
- }
- }
- if(xhtml) {
- for (tag = tags; tag ; tag = tag->next) {
- struct purple_parse_tag *pt = tag->data;
- if(!pt->ignore)
- g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
- }
- }
- g_list_free(tags);
- if(xhtml_out)
- *xhtml_out = g_string_free(xhtml, FALSE);
- if(plain_out)
- *plain_out = g_string_free(plain, FALSE);
- if(url)
- g_string_free(url, TRUE);
- if (cdata)
- g_string_free(cdata, TRUE);
-#undef CHECK_QUOTE
-#undef VALID_CHAR
-}
-
-/* The following are probably reasonable changes:
- * - \n should be converted to a normal space
- * - in addition to <br>, <p> and <div> etc. should also be converted into \n
- * - We want to turn </td>#whitespace<td> sequences into a single tab
- * - We want to turn </tr>#whitespace<tr> sequences into a single \n
- * - <script>...</script> and <style>...</style> should be completely removed
- */
-
-char *
-purple_markup_strip_html(const char *str)
-{
- int i, j, k, entlen;
- gboolean visible = TRUE;
- gboolean closing_td_p = FALSE;
- gchar *str2;
- const gchar *cdata_close_tag = NULL, *ent;
- gchar *href = NULL;
- int href_st = 0;
-
- if(!str)
- return NULL;
-
- str2 = g_strdup(str);
-
- for (i = 0, j = 0; str2[i]; i++)
- {
- if (str2[i] == '<')
- {
- if (cdata_close_tag)
- {
- /* Note: Don't even assume any other tag is a tag in CDATA */
- if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
- strlen(cdata_close_tag)) == 0)
- {
- i += strlen(cdata_close_tag) - 1;
- cdata_close_tag = NULL;
- }
- continue;
- }
- else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
- {
- str2[j++] = '\t';
- visible = TRUE;
- }
- else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
- {
- closing_td_p = TRUE;
- visible = FALSE;
- }
- else
- {
- closing_td_p = FALSE;
- visible = TRUE;
- }
-
- k = i + 1;
-
- if(g_ascii_isspace(str2[k]))
- visible = TRUE;
- else if (str2[k])
- {
- /* Scan until we end the tag either implicitly (closed start
- * tag) or explicitly, using a sloppy method (i.e., < or >
- * inside quoted attributes will screw us up)
- */
- while (str2[k] && str2[k] != '<' && str2[k] != '>')
- {
- k++;
- }
-
- /* If we've got an <a> tag with an href, save the address
- * to print later. */
- if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
- g_ascii_isspace(str2[i+2]))
- {
- int st; /* start of href, inclusive [ */
- int end; /* end of href, exclusive ) */
- char delim = ' ';
- /* Find start of href */
- for (st = i + 3; st < k; st++)
- {
- if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
- {
- st += 5;
- if (str2[st] == '"' || str2[st] == '\'')
- {
- delim = str2[st];
- st++;
- }
- break;
- }
- }
- /* find end of address */
- for (end = st; end < k && str2[end] != delim; end++)
- {
- /* All the work is done in the loop construct above. */
- }
-
- /* If there's an address, save it. If there was
- * already one saved, kill it. */
- if (st < k)
- {
- char *tmp;
- g_free(href);
- tmp = g_strndup(str2 + st, end - st);
- href = purple_unescape_html(tmp);
- g_free(tmp);
- href_st = j;
- }
- }
-
- /* Replace </a> with an ascii representation of the
- * address the link was pointing to. */
- else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
- {
- size_t hrlen = strlen(href);
-
- /* Only insert the href if it's different from the CDATA. */
- if ((hrlen != (gsize)(j - href_st) ||
- strncmp(str2 + href_st, href, hrlen)) &&
- (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
- strncmp(str2 + href_st, href + 7, hrlen - 7)))
- {
- str2[j++] = ' ';
- str2[j++] = '(';
- memmove(str2 + j, href, hrlen);
- j += hrlen;
- str2[j++] = ')';
- g_free(href);
- href = NULL;
- }
- }
-
- /* Check for tags which should be mapped to newline (but ignore some of
- * the tags at the beginning of the text) */
- else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
- || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
- || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
- {
- str2[j++] = '\n';
- }
- /* Check for tags which begin CDATA and need to be closed */
- else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
- {
- cdata_close_tag = "</script>";
- }
- else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
- {
- cdata_close_tag = "</style>";
- }
- /* Update the index and continue checking after the tag */
- i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
- continue;
- }
- }
- else if (cdata_close_tag)
- {
- continue;
- }
- else if (!g_ascii_isspace(str2[i]))
- {
- visible = TRUE;
- }
-
- if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
- {
- while (*ent)
- str2[j++] = *ent++;
- i += entlen - 1;
- continue;
- }
-
- if (visible)
- str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
- }
-
- g_free(href);
-
- str2[j] = '\0';
-
- return str2;
-}
-
-static gboolean
-badchar(char c)
-{
- switch (c) {
- case ' ':
- case ',':
- case '\0':
- case '\n':
- case '\r':
- case '<':
- case '>':
- case '"':
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static gboolean
-badentity(const char *c)
-{
- if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
- !g_ascii_strncasecmp(c, "&gt;", 4) ||
- !g_ascii_strncasecmp(c, "&quot;", 6)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static const char *
-process_link(GString *ret,
- const char *start, const char *c,
- int matchlen,
- const char *urlprefix,
- int inside_paren)
-{
- char *url_buf, *tmpurlbuf;
- const char *t;
-
- for (t = c;; t++) {
- if (!badchar(*t) && !badentity(t))
- continue;
-
- if (t - c == matchlen)
- break;
-
- if (*t == ',' && *(t + 1) != ' ') {
- continue;
- }
-
- if (t > start && *(t - 1) == '.')
- t--;
- if (t > start && *(t - 1) == ')' && inside_paren > 0)
- t--;
-
- url_buf = g_strndup(c, t - c);
- tmpurlbuf = purple_unescape_html(url_buf);
- g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
- urlprefix,
- tmpurlbuf, url_buf);
- g_free(tmpurlbuf);
- g_free(url_buf);
- return t;
- }
-
- return c;
-}
-
-char *
-purple_markup_linkify(const char *text)
-{
- const char *c, *t, *q = NULL;
- char *tmpurlbuf, *url_buf;
- gunichar g;
- gboolean inside_html = FALSE;
- int inside_paren = 0;
- GString *ret;
-
- if (text == NULL)
- return NULL;
-
- ret = g_string_new("");
-
- c = text;
- while (*c) {
-
- if(*c == '(' && !inside_html) {
- inside_paren++;
- ret = g_string_append_c(ret, *c);
- c++;
- }
-
- if(inside_html) {
- if(*c == '>') {
- inside_html = FALSE;
- } else if(!q && (*c == '\"' || *c == '\'')) {
- q = c;
- } else if(q) {
- if(*c == *q)
- q = NULL;
- }
- } else if(*c == '<') {
- inside_html = TRUE;
- if (!g_ascii_strncasecmp(c, "<A", 2)) {
- while (1) {
- if (!g_ascii_strncasecmp(c, "/A>", 3)) {
- inside_html = FALSE;
- break;
- }
- ret = g_string_append_c(ret, *c);
- c++;
- if (!(*c))
- break;
- }
- }
- } else if (!g_ascii_strncasecmp(c, "http://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "https://", 8)) {
- c = process_link(ret, text, c, 8, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
- c = process_link(ret, text, c, 6, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "file://", 7)) {
- c = process_link(ret, text, c, 7, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 4, "http://", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 4, "ftp://", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
- c = process_link(ret, text, c, 5, "", inside_paren);
- } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
- t = c;
- while (1) {
- if (badchar(*t) || badentity(t)) {
- char *d;
- if (t - c == 7) {
- break;
- }
- if (t > text && *(t - 1) == '.')
- t--;
- if ((d = strstr(c + 7, "?")) != NULL && d < t)
- url_buf = g_strndup(c + 7, d - c - 7);
- else
- url_buf = g_strndup(c + 7, t - c - 7);
- if (!purple_email_is_valid(url_buf)) {
- g_free(url_buf);
- break;
- }
- g_free(url_buf);
- url_buf = g_strndup(c, t - c);
- tmpurlbuf = purple_unescape_html(url_buf);
- g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
- tmpurlbuf, url_buf);
- g_free(url_buf);
- g_free(tmpurlbuf);
- c = t;
- break;
- }
- t++;
- }
- } else if (c != text && (*c == '@')) {
- int flag;
- GString *gurl_buf = NULL;
- const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
-
- if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
- flag = 0;
- else {
- flag = 1;
- gurl_buf = g_string_new("");
- }
-
- t = c;
- while (flag) {
- /* iterate backwards grabbing the local part of an email address */
- g = g_utf8_get_char(t);
- if (badchar(*t) || (g >= 127) || (*t == '(') ||
- ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
- !g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
- (t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
- /* local part will already be part of ret, strip it out */
- ret = g_string_truncate(ret, ret->len - (c - t));
- ret = g_string_append_unichar(ret, g);
- break;
- } else {
- g_string_prepend_unichar(gurl_buf, g);
- t = g_utf8_find_prev_char(text, t);
- if (t < text) {
- ret = g_string_assign(ret, "");
- break;
- }
- }
- }
-
- t = g_utf8_find_next_char(c, NULL);
-
- while (flag) {
- /* iterate forwards grabbing the domain part of an email address */
- g = g_utf8_get_char(t);
- if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
- char *d;
-
- url_buf = g_string_free(gurl_buf, FALSE);
- gurl_buf = NULL;
-
- /* strip off trailing periods */
- if (*url_buf) {
- for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
- *d = '\0';
- }
-
- tmpurlbuf = purple_unescape_html(url_buf);
- if (purple_email_is_valid(tmpurlbuf)) {
- g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
- tmpurlbuf, url_buf);
- } else {
- g_string_append(ret, url_buf);
- }
- g_free(url_buf);
- g_free(tmpurlbuf);
- c = t;
-
- break;
- } else {
- g_string_append_unichar(gurl_buf, g);
- t = g_utf8_find_next_char(t, NULL);
- }
- }
-
- if (gurl_buf) {
- g_string_free(gurl_buf, TRUE);
- }
- }
-
- if(*c == ')' && !inside_html) {
- inside_paren--;
- ret = g_string_append_c(ret, *c);
- c++;
- }
-
- if (*c == 0)
- break;
-
- ret = g_string_append_c(ret, *c);
- c++;
-
- }
- return g_string_free(ret, FALSE);
-}
-
-char *purple_unescape_text(const char *in)
-{
- GString *ret;
- const char *c = in;
-
- if (in == NULL)
- return NULL;
-
- ret = g_string_new("");
- while (*c) {
- int len;
- const char *ent;
-
- if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
- g_string_append(ret, ent);
- c += len;
- } else {
- g_string_append_c(ret, *c);
- c++;
- }
- }
-
- return g_string_free(ret, FALSE);
-}
-
-char *purple_unescape_html(const char *html)
-{
- GString *ret;
- const char *c = html;
-
- if (html == NULL)
- return NULL;
-
- ret = g_string_new("");
- while (*c) {
- int len;
- const char *ent;
-
- if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
- g_string_append(ret, ent);
- c += len;
- } else if (!strncmp(c, "<br>", 4)) {
- g_string_append_c(ret, '\n');
- c += 4;
- } else {
- g_string_append_c(ret, *c);
- c++;
- }
- }
-
- return g_string_free(ret, FALSE);
-}
-
-char *
-purple_markup_slice(const char *str, guint x, guint y)
-{
- GString *ret;
- GQueue *q;
- guint z = 0;
- gboolean appended = FALSE;
- gunichar c;
- char *tag;
-
- g_return_val_if_fail(str != NULL, NULL);
- g_return_val_if_fail(x <= y, NULL);
-
- if (x == y)
- return g_strdup("");
-
- ret = g_string_new("");
- q = g_queue_new();
-
- while (*str && (z < y)) {
- c = g_utf8_get_char(str);
-
- if (c == '<') {
- char *end = strchr(str, '>');
-
- if (!end) {
- g_string_free(ret, TRUE);
- while ((tag = g_queue_pop_head(q)))
- g_free(tag);
- g_queue_free(q);
- return NULL;
- }
-
- if (!g_ascii_strncasecmp(str, "<img ", 5)) {
- z += strlen("[Image]");
- } else if (!g_ascii_strncasecmp(str, "<br", 3)) {
- z += 1;
- } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
- z += strlen("\n---\n");
- } else if (!g_ascii_strncasecmp(str, "</", 2)) {
- /* pop stack */
- char *tmp;
-
- tmp = g_queue_pop_head(q);
- g_free(tmp);
- /* z += 0; */
- } else {
- /* push it unto the stack */
- char *tmp;
-
- tmp = g_strndup(str, end - str + 1);
- g_queue_push_head(q, tmp);
- /* z += 0; */
- }
-
- if (z >= x) {
- g_string_append_len(ret, str, end - str + 1);
- }
-
- str = end;
- } else if (c == '&') {
- char *end = strchr(str, ';');
- if (!end) {
- g_string_free(ret, TRUE);
- while ((tag = g_queue_pop_head(q)))
- g_free(tag);
- g_queue_free(q);
-
- return NULL;
- }
-
- if (z >= x)
- g_string_append_len(ret, str, end - str + 1);
-
- z++;
- str = end;
- } else {
- if (z == x && z > 0 && !appended) {
- GList *l = q->tail;
-
- while (l) {
- tag = l->data;
- g_string_append(ret, tag);
- l = l->prev;
- }
- appended = TRUE;
- }
-
- if (z >= x)
- g_string_append_unichar(ret, c);
- z++;
- }
-
- str = g_utf8_next_char(str);
- }
-
- while ((tag = g_queue_pop_head(q))) {
- char *name;
-
- name = purple_markup_get_tag_name(tag);
- g_string_append_printf(ret, "</%s>", name);
- g_free(name);
- g_free(tag);
- }
-
- g_queue_free(q);
- return g_string_free(ret, FALSE);
-}
-
-char *
-purple_markup_get_tag_name(const char *tag)
-{
- int i;
- g_return_val_if_fail(tag != NULL, NULL);
- g_return_val_if_fail(*tag == '<', NULL);
-
- for (i = 1; tag[i]; i++)
- if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
- break;
-
- return g_strndup(tag+1, i-1);
-}
-
-/**************************************************************************
* Path/Filename Functions
**************************************************************************/
const char *
--- a/libpurple/util.h Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/util.h Fri Oct 16 02:27:21 2020 -0500
@@ -196,201 +196,6 @@
gint purple_time_parse_month(const char *month_abbr);
/**************************************************************************/
-/* Markup Functions */
-/**************************************************************************/
-
-/**
- * purple_markup_escape_text:
- * @text: The text to escape
- * @length: The length of the text, or -1 if #NULL terminated
- *
- * Escapes special characters in a plain-text string so they display
- * correctly as HTML. For example, &amp; is replaced with &amp;amp; and &lt; is
- * replaced with &amp;lt;
- *
- * This is exactly the same as g_markup_escape_text(), except that it
- * does not change ' to &amp;apos; because &amp;apos; is not a valid HTML 4 entity,
- * and is displayed literally in IE7.
- */
-gchar *purple_markup_escape_text(const gchar *text, gssize length);
-
-/**
- * purple_markup_find_tag:
- * @needle: The name of the tag
- * @haystack: The null-delimited string to search in
- * @start: A pointer to the start of the tag if found
- * @end: A pointer to the end of the tag if found
- * @attributes: The attributes, if the tag was found. This should
- * be freed with g_datalist_clear().
- *
- * Finds an HTML tag matching the given name.
- *
- * This locates an HTML tag's start and end, and stores its attributes
- * in a GData hash table. The names of the attributes are lower-cased
- * in the hash table, and the name of the tag is case insensitive.
- *
- * Returns: TRUE if the tag was found
- */
-gboolean purple_markup_find_tag(const char *needle, const char *haystack,
- const char **start, const char **end,
- GData **attributes);
-
-/**
- * purple_markup_html_to_xhtml:
- * @html: The HTML markup.
- * @dest_xhtml: The destination XHTML output.
- * @dest_plain: The destination plain-text output.
- *
- * Converts HTML markup to XHTML.
- */
-void purple_markup_html_to_xhtml(const char *html, char **dest_xhtml,
- char **dest_plain);
-
-/**
- * purple_markup_strip_html:
- * @str: The string to strip HTML from.
- *
- * Strips HTML tags from a string.
- *
- * Returns: The new string without HTML. You must g_free this string
- * when finished with it.
- */
-char *purple_markup_strip_html(const char *str);
-
-/**
- * purple_markup_linkify:
- * @str: The string to linkify.
- *
- * Adds the necessary HTML code to turn URIs into HTML links in a string.
- *
- * Returns: The new string with all URIs surrounded in standard
- * HTML &lt;a href="whatever"&gt;&lt;/a&gt; tags. You must g_free()
- * this string when finished with it.
- */
-char *purple_markup_linkify(const char *str);
-
-/**
- * purple_unescape_text:
- * @text: The string in which to unescape any HTML entities
- *
- * Unescapes HTML entities to their literal characters in the text.
- * For example "&amp;amp;" is replaced by '&amp;' and so on. Also converts
- * numerical entities (e.g. "&amp;\#38;" is also '&amp;').
- *
- * This function currently supports the following named entities:
- * "&amp;amp;", "&amp;lt;", "&amp;gt;", "&amp;copy;", "&amp;quot;",
- * "&amp;reg;", "&amp;apos;"
- *
- * purple_unescape_html() is similar, but also converts "&lt;br&gt;" into "\n".
- *
- * See purple_unescape_html()
- *
- * Returns: The text with HTML entities literalized. You must g_free
- * this string when finished with it.
- */
-char *purple_unescape_text(const char *text);
-
-/**
- * purple_unescape_html:
- * @html: The string in which to unescape any HTML entities
- *
- * Unescapes HTML entities to their literal characters and converts
- * "&lt;br&gt;" to "\n". See purple_unescape_text() for more details.
- *
- * See purple_unescape_text()
- *
- * Returns: The text with HTML entities literalized. You must g_free
- * this string when finished with it.
- */
-char *purple_unescape_html(const char *html);
-
-/**
- * purple_markup_slice:
- * @str: The input NUL terminated, HTML, UTF-8 (or ASCII) string.
- * @x: The character offset into an unformatted version of str to
- * begin at.
- * @y: The character offset (into an unformatted vesion of str) of
- * one past the last character to include in the slice.
- *
- * Returns a newly allocated substring of the HTML UTF-8 string "str".
- * The markup is preserved such that the substring will have the same
- * formatting as original string, even though some tags may have been
- * opened before "x", or may close after "y". All open tags are closed
- * at the end of the returned string, in the proper order.
- *
- * Note that x and y are in character offsets, not byte offsets, and
- * are offsets into an unformatted version of str. Because of this,
- * this function may be sensitive to changes in GtkIMHtml and may break
- * when used with other UI's. libpurple users are encouraged to report and
- * work out any problems encountered.
- *
- * Returns: The HTML slice of string, with all formatting retained.
- */
-char *purple_markup_slice(const char *str, guint x, guint y);
-
-/**
- * purple_markup_get_tag_name:
- * @tag: The string starting a HTML tag.
- *
- * Returns a newly allocated string containing the name of the tag
- * located at "tag". Tag is expected to point to a '<', and contain
- * a '>' sometime after that. If there is no '>' and the string is
- * not NUL terminated, this function can be expected to segfault.
- *
- * Returns: A string containing the name of the tag.
- */
-char *purple_markup_get_tag_name(const char *tag);
-
-/**
- * purple_markup_unescape_entity:
- * @text: A string containing an HTML entity.
- * @length: If not %NULL, the string length of the entity is stored in this location.
- *
- * Returns a constant string of the character representation of the HTML
- * entity pointed to by @text. For example, purple_markup_unescape_entity("&amp;amp;")
- * will return "&amp;". The @text variable is expected to point to an '&amp;',
- * the first character of the entity. If given an unrecognized entity, the function
- * returns %NULL.
- *
- * Note that this function, unlike purple_unescape_html(), does not search
- * the string for the entity, does not replace the entity, and does not
- * return a newly allocated string.
- *
- * Returns: A constant string containing the character representation of the given entity.
- */
-const char * purple_markup_unescape_entity(const char *text, int *length);
-
-/**
- * purple_markup_get_css_property:
- * @style: A string containing the inline CSS text.
- * @opt: The requested CSS property.
- *
- * Returns a newly allocated string containing the value of the CSS property specified
- * in opt. The @style argument is expected to point to a HTML inline CSS.
- * The function will seek for the CSS property and return its value.
- *
- * For example, purple_markup_get_css_property("direction:rtl;color:#dc4d1b;",
- * "color") would return "#dc4d1b".
- *
- * On error or if the requested property was not found, the function returns
- * %NULL.
- *
- * Returns: The value of the requested CSS property.
- */
-char * purple_markup_get_css_property(const gchar *style, const gchar *opt);
-
-/**
- * purple_markup_is_rtl:
- * @html: The HTML text.
- *
- * Check if the given HTML contains RTL text.
- *
- * Returns: TRUE if the text contains RTL text, FALSE otherwise.
- */
-gboolean purple_markup_is_rtl(const char *html);
-
-
-/**************************************************************************/
/* Path/Filename Functions */
/**************************************************************************/
--- a/libpurple/xmlnode.c Thu Oct 15 20:25:08 2020 -0500
+++ b/libpurple/xmlnode.c Fri Oct 16 02:27:21 2020 -0500
@@ -33,6 +33,7 @@
#include <string.h>
#include <glib.h>
+#include "purplemarkup.h"
#include "util.h"
#include "xmlnode.h"
--- a/po/POTFILES.in Thu Oct 15 20:25:08 2020 -0500
+++ b/po/POTFILES.in Fri Oct 16 02:27:21 2020 -0500
@@ -272,6 +272,7 @@
libpurple/purplechatuser.c
libpurple/purplecredentialprovider.c
libpurple/purpleimconversation.c
+libpurple/purplemarkup.c
libpurple/purpleprotocolim.c
libpurple/purpleprotocolmedia.c
libpurple/purpleprotocolprivacy.c