pidgin/pidgin

1896a80ff8e3
Route GLib debug logging directly to the Finch debug window

Instead of flowing through purple debug, this merges some bits of the existing GLib log handler, and the purple debug printer.

Testing Done:
Open the Debug window an see some `GLib-*` outputs.

Reviewed at https://reviews.imfreedom.org/r/1057/
/*
* Purple - Internet Messaging Library
* Copyright (C) Pidgin Developers <devel@pidgin.im>
*
* Purple is the legal property of its developers, whose names are too numerous
* to list here. Please refer to the COPYRIGHT file distributed with this
* source distribution.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
#include "purplemarkup.h"
#include "util.h"
/*
* This function is stolen from glib's gmarkup.c and modified to not
* replace ' with &apos;
*/
static void append_escaped_text(GString *str,
const gchar *text, gssize length)
{
const gchar *p;
const gchar *end;
gunichar c;
p = text;
end = text + length;
while (p != end)
{
const gchar *next;
next = g_utf8_next_char (p);
switch (*p)
{
case '&':
g_string_append (str, "&amp;");
break;
case '<':
g_string_append (str, "&lt;");
break;
case '>':
g_string_append (str, "&gt;");
break;
case '"':
g_string_append (str, "&quot;");
break;
default:
c = g_utf8_get_char (p);
if ((0x1 <= c && c <= 0x8) ||
(0xb <= c && c <= 0xc) ||
(0xe <= c && c <= 0x1f) ||
(0x7f <= c && c <= 0x84) ||
(0x86 <= c && c <= 0x9f))
g_string_append_printf (str, "&#x%x;", c);
else
g_string_append_len (str, p, next - p);
break;
}
p = next;
}
}
/* This function is stolen from glib's gmarkup.c */
gchar *purple_markup_escape_text(const gchar *text, gssize length)
{
GString *str;
g_return_val_if_fail(text != NULL, NULL);
if (length < 0)
length = strlen(text);
/* prealloc at least as long as original text */
str = g_string_sized_new(length);
append_escaped_text(str, text, length);
return g_string_free(str, FALSE);
}
const char *
purple_markup_unescape_entity(const char *text, int *length)
{
const char *pln;
int len;
if (!text || *text != '&')
return NULL;
#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
if(IS_ENTITY("&amp;"))
pln = "&";
else if(IS_ENTITY("&lt;"))
pln = "<";
else if(IS_ENTITY("&gt;"))
pln = ">";
else if(IS_ENTITY("&nbsp;"))
pln = " ";
else if(IS_ENTITY("&copy;"))
pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
else if(IS_ENTITY("&quot;"))
pln = "\"";
else if(IS_ENTITY("&reg;"))
pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
else if(IS_ENTITY("&apos;"))
pln = "\'";
else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
static char buf[7];
const char *start = text + 2;
char *end;
guint64 pound;
int base = 10;
int buflen;
if (*start == 'x') {
base = 16;
start++;
}
pound = g_ascii_strtoull(start, &end, base);
if (pound == 0 || pound > INT_MAX || *end != ';') {
return NULL;
}
len = (end - text) + 1;
buflen = g_unichar_to_utf8((gunichar)pound, buf);
buf[buflen] = '\0';
pln = buf;
}
else
return NULL;
if (length)
*length = len;
return pln;
}
char *
purple_markup_get_css_property(const gchar *style,
const gchar *opt)
{
const gchar *css_str = style;
const gchar *css_value_start;
const gchar *css_value_end;
gchar *tmp;
gchar *ret;
g_return_val_if_fail(opt != NULL, NULL);
if (!css_str)
return NULL;
/* find the CSS property */
while (1)
{
/* skip whitespace characters */
while (*css_str && g_ascii_isspace(*css_str))
css_str++;
if (!g_ascii_isalpha(*css_str))
return NULL;
if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
{
/* go to next css property positioned after the next ';' */
while (*css_str && *css_str != '"' && *css_str != ';')
css_str++;
if(*css_str != ';')
return NULL;
css_str++;
}
else
break;
}
/* find the CSS value position in the string */
css_str += strlen(opt);
while (*css_str && g_ascii_isspace(*css_str))
css_str++;
if (*css_str != ':')
return NULL;
css_str++;
while (*css_str && g_ascii_isspace(*css_str))
css_str++;
if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
return NULL;
/* mark the CSS value */
css_value_start = css_str;
while (*css_str && *css_str != '"' && *css_str != ';')
css_str++;
css_value_end = css_str - 1;
/* Removes trailing whitespace */
while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
css_value_end--;
tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
ret = purple_unescape_html(tmp);
g_free(tmp);
return ret;
}
gboolean purple_markup_is_rtl(const char *html)
{
GData *attributes;
const gchar *start, *end;
gboolean res = FALSE;
if (purple_markup_find_tag("span", html, &start, &end, &attributes))
{
/* tmp is a member of attributes and is free with g_datalist_clear call */
const char *tmp = g_datalist_get_data(&attributes, "dir");
if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
res = TRUE;
if (!res)
{
tmp = g_datalist_get_data(&attributes, "style");
if (tmp)
{
char *tmp2 = purple_markup_get_css_property(tmp, "direction");
if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
res = TRUE;
g_free(tmp2);
}
}
g_datalist_clear(&attributes);
}
return res;
}
gboolean
purple_markup_find_tag(const char *needle, const char *haystack,
const char **start, const char **end, GData **attributes)
{
GData *attribs;
const char *cur = haystack;
char *name = NULL;
gboolean found = FALSE;
gboolean in_tag = FALSE;
gboolean in_attr = FALSE;
const char *in_quotes = NULL;
size_t needlelen;
g_return_val_if_fail( needle != NULL, FALSE);
g_return_val_if_fail( *needle != '\0', FALSE);
g_return_val_if_fail( haystack != NULL, FALSE);
g_return_val_if_fail( start != NULL, FALSE);
g_return_val_if_fail( end != NULL, FALSE);
g_return_val_if_fail(attributes != NULL, FALSE);
needlelen = strlen(needle);
g_datalist_init(&attribs);
while (*cur && !found) {
if (in_tag) {
if (in_quotes) {
const char *close = cur;
while (*close && *close != *in_quotes)
close++;
/* if we got the close quote, store the value and carry on from *
* after it. if we ran to the end of the string, point to the NULL *
* and we're outta here */
if (*close) {
/* only store a value if we have an attribute name */
if (name) {
size_t len = close - cur;
char *val = g_strndup(cur, len);
g_datalist_set_data_full(&attribs, name, val, g_free);
g_free(name);
name = NULL;
}
in_quotes = NULL;
cur = close + 1;
} else {
cur = close;
}
} else if (in_attr) {
const char *close = cur;
while (*close && *close != '>' && *close != '"' &&
*close != '\'' && *close != ' ' && *close != '=')
close++;
/* if we got the equals, store the name of the attribute. if we got
* the quote, save the attribute and go straight to quote mode.
* otherwise the tag closed or we reached the end of the string,
* so we can get outta here */
switch (*close) {
case '"':
case '\'':
in_quotes = close;
/* fall through */
case '=':
{
size_t len = close - cur;
/* don't store a blank attribute name */
if (len) {
g_free(name);
name = g_ascii_strdown(cur, len);
}
in_attr = FALSE;
cur = close + 1;
}
break;
case ' ':
case '>':
in_attr = FALSE;
/* fall through */
default:
cur = close;
break;
}
} else {
switch (*cur) {
case ' ':
/* swallow extra spaces inside tag */
while (*cur && *cur == ' ') cur++;
in_attr = TRUE;
break;
case '>':
found = TRUE;
*end = cur;
break;
case '"':
case '\'':
in_quotes = cur;
/* fall through */
default:
cur++;
break;
}
}
} else {
/* if we hit a < followed by the name of our tag... */
if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
*start = cur;
cur = cur + needlelen + 1;
/* if we're pointing at a space or a >, we found the right tag. if *
* we're not, we've found a longer tag, so we need to skip to the *
* >, but not being distracted by >s inside quotes. */
if (*cur == ' ' || *cur == '>') {
in_tag = TRUE;
} else {
while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
if (*cur == '"') {
cur++;
while (*cur && *cur != '"')
cur++;
} else if (*cur == '\'') {
cur++;
while (*cur && *cur != '\'')
cur++;
} else {
cur++;
}
}
}
} else {
cur++;
}
}
}
/* clean up any attribute name from a premature termination */
g_free(name);
if (found) {
*attributes = attribs;
} else {
*start = NULL;
*end = NULL;
*attributes = NULL;
}
return found;
}
struct purple_parse_tag {
char *src_tag;
char *dest_tag;
gboolean ignore;
};
/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
recommended in the GCC docs). It contains 'continue's that should
affect the while-loop in purple_markup_html_to_xhtml and doing the
above would break that.
Also, remember to put braces in constructs that require them for
multiple statements when using this macro. */
#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
const char *o = c + strlen("<" x); \
const char *p = NULL, *q = NULL, *r = NULL; \
/* o = iterating over full tag \
* p = > (end of tag) \
* q = start of quoted bit \
* r = < inside tag \
*/ \
GString *innards = g_string_new(""); \
while(o && *o) { \
if(!q && (*o == '\"' || *o == '\'') ) { \
q = o; \
} else if(q) { \
if(*o == *q) { /* end of quoted bit */ \
char *unescaped = g_strndup(q+1, o-q-1); \
char *escaped = g_markup_escape_text(unescaped, -1); \
g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
g_free(unescaped); \
g_free(escaped); \
q = NULL; \
} else if(*c == '\\') { \
o++; \
} \
} else if(*o == '<') { \
r = o; \
} else if(*o == '>') { \
p = o; \
break; \
} else { \
innards = g_string_append_c(innards, *o); \
} \
o++; \
} \
if(p && !r) { /* got an end of tag and no other < earlier */\
if(*(p-1) != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
} \
if(xhtml) { \
xhtml = g_string_append(xhtml, "<" y); \
xhtml = g_string_append(xhtml, innards->str); \
xhtml = g_string_append_c(xhtml, '>'); \
} \
c = p + 1; \
} else { /* got end of tag with earlier < *or* didn't get anything */ \
if(xhtml) \
xhtml = g_string_append(xhtml, "&lt;"); \
if(plain) \
plain = g_string_append_c(plain, '<'); \
c++; \
} \
g_string_free(innards, TRUE); \
continue; \
} \
if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
(*(c+strlen("<" x)) == '>' || \
!g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
if(xhtml) \
xhtml = g_string_append(xhtml, "<" y); \
c += strlen("<" x); \
if(*c != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
if(xhtml) \
xhtml = g_string_append_c(xhtml, '>'); \
} else { \
if(xhtml) \
xhtml = g_string_append(xhtml, "/>");\
} \
c = strchr(c, '>') + 1; \
continue; \
}
/* Don't forget to check the note above for ALLOW_TAG_ALT. */
#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
void
purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
char **plain_out)
{
GString *xhtml = NULL;
GString *plain = NULL;
GString *url = NULL;
GString *cdata = NULL;
GList *tags = NULL, *tag;
const char *c = html;
char quote = '\0';
#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
quote = *(ptr++); \
else \
quote = '\0';
#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
if(xhtml_out)
xhtml = g_string_new("");
if(plain_out)
plain = g_string_new("");
while(c && *c) {
if(*c == '<') {
if(*(c+1) == '/') { /* closing tag */
tag = tags;
while(tag) {
struct purple_parse_tag *pt = tag->data;
if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
c += strlen(pt->src_tag) + 3;
break;
}
tag = tag->next;
}
if(tag) {
while(tags) {
struct purple_parse_tag *pt = tags->data;
if(xhtml && !pt->ignore)
g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
if(plain && purple_strequal(pt->src_tag, "a")) {
/* if this is a link, we have to add the url to the plaintext, too */
if (cdata && url &&
(!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
g_utf8_collate(url->str + 7, cdata->str) != 0)))
g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
if (cdata) {
g_string_free(cdata, TRUE);
cdata = NULL;
}
}
if(tags == tag)
break;
tags = g_list_delete_link(tags, tags);
g_free(pt);
}
g_free(tag->data);
tags = g_list_delete_link(tags, tag);
} else {
/* a closing tag we weren't expecting...
* we'll let it slide, if it's really a tag...if it's
* just a </ we'll escape it properly */
const char *end = c+2;
while(*end && g_ascii_isalpha(*end))
end++;
if(*end == '>') {
c = end+1;
} else {
if(xhtml)
xhtml = g_string_append(xhtml, "&lt;");
if(plain)
plain = g_string_append_c(plain, '<');
c++;
}
}
} else { /* opening tag */
ALLOW_TAG("blockquote");
ALLOW_TAG("cite");
ALLOW_TAG("div");
ALLOW_TAG("em");
ALLOW_TAG("h1");
ALLOW_TAG("h2");
ALLOW_TAG("h3");
ALLOW_TAG("h4");
ALLOW_TAG("h5");
ALLOW_TAG("h6");
/* we only allow html to start the message */
if(c == html) {
ALLOW_TAG("html");
}
ALLOW_TAG_ALT("i", "em");
ALLOW_TAG_ALT("italic", "em");
ALLOW_TAG("li");
ALLOW_TAG("ol");
ALLOW_TAG("p");
ALLOW_TAG("pre");
ALLOW_TAG("q");
ALLOW_TAG("span");
ALLOW_TAG("ul");
/* we skip <HR> because it's not legal in XHTML-IM. However,
* we still want to send something sensible, so we put a
* linebreak in its place. <BR> also needs special handling
* because putting a </BR> to close it would just be dumb. */
if((!g_ascii_strncasecmp(c, "<br", 3)
|| !g_ascii_strncasecmp(c, "<hr", 3))
&& (*(c+3) == '>' ||
!g_ascii_strncasecmp(c+3, "/>", 2) ||
!g_ascii_strncasecmp(c+3, " />", 3))) {
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<br/>");
if(plain && *c != '\n')
plain = g_string_append_c(plain, '\n');
continue;
}
if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
if (*(c+2) == '>')
pt->src_tag = "b";
else if (*(c+2) == 'o')
pt->src_tag = "bold";
else
pt->src_tag = "strong";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = *(c+2) == '>' ? "u" : "underline";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if (xhtml)
xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = *(c+2) == '>' ? "s" : "strike";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "sub";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "sup";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
continue;
}
if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
const char *p = c + 4;
GString *src = NULL, *alt = NULL;
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&amp;"); \
else if (*from == '\'') \
to = g_string_append(to, "&apos;"); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "src=", 4)) {
const char *q = p + 4;
if (src)
g_string_free(src, TRUE);
src = g_string_new("");
ESCAPE(q, src);
p = q;
} else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
const char *q = p + 4;
if (alt)
g_string_free(alt, TRUE);
alt = g_string_new("");
ESCAPE(q, alt);
p = q;
} else {
p++;
}
}
#undef ESCAPE
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
/* src and alt are required! */
if(src && xhtml)
g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
if(alt) {
if(plain)
plain = g_string_append(plain, purple_unescape_html(alt->str));
if(!src && xhtml)
xhtml = g_string_append(xhtml, alt->str);
g_string_free(alt, TRUE);
}
g_string_free(src, TRUE);
continue;
}
if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
const char *p = c + 2;
struct purple_parse_tag *pt;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "href=", 5)) {
const char *q = p + 5;
if (url)
g_string_free(url, TRUE);
url = g_string_new("");
if (cdata)
g_string_free(cdata, TRUE);
cdata = g_string_new("");
CHECK_QUOTE(q);
while (VALID_CHAR(q)) {
int len;
if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
url = g_string_append(url, "&amp;");
else if (*q == '"')
url = g_string_append(url, "&quot;");
else
url = g_string_append_c(url, *q);
q++;
}
p = q;
} else {
p++;
}
}
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "a";
pt->dest_tag = "a";
tags = g_list_prepend(tags, pt);
if(xhtml)
g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
continue;
}
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&amp;"); \
else if (*from == '\'') \
to = g_string_append_c(to, '\"'); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
const char *p = c + 5;
GString *style = g_string_new("");
struct purple_parse_tag *pt;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "back=", 5)) {
const char *q = p + 5;
GString *color = g_string_new("");
ESCAPE(q, color);
g_string_append_printf(style, "background: %s; ", color->str);
g_string_free(color, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "color=", 6)) {
const char *q = p + 6;
GString *color = g_string_new("");
ESCAPE(q, color);
g_string_append_printf(style, "color: %s; ", color->str);
g_string_free(color, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "face=", 5)) {
const char *q = p + 5;
GString *face = g_string_new("");
ESCAPE(q, face);
g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
g_string_free(face, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "size=", 5)) {
const char *q = p + 5;
int sz;
const char *size = "medium";
CHECK_QUOTE(q);
sz = atoi(q);
switch (sz)
{
case 1:
size = "xx-small";
break;
case 2:
size = "small";
break;
case 3:
size = "medium";
break;
case 4:
size = "large";
break;
case 5:
size = "x-large";
break;
case 6:
case 7:
size = "xx-large";
break;
default:
break;
}
g_string_append_printf(style, "font-size: %s; ", size);
p = q;
} else {
p++;
}
}
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "font";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
if(style->len && xhtml)
g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
else
pt->ignore = TRUE;
g_string_free(style, TRUE);
continue;
}
#undef ESCAPE
if (!g_ascii_strncasecmp(c, "<body ", 6)) {
const char *p = c + 6;
gboolean did_something = FALSE;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
const char *q = p + 8;
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
GString *color = g_string_new("");
CHECK_QUOTE(q);
while (VALID_CHAR(q)) {
color = g_string_append_c(color, *q);
q++;
}
if (xhtml)
g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
g_string_free(color, TRUE);
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt->src_tag = "body";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
did_something = TRUE;
break;
}
p++;
}
if (did_something) continue;
}
/* this has to come after the special case for bgcolor */
ALLOW_TAG("body");
if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
char *p = strstr(c + strlen("<!--"), "-->");
if(p) {
if(xhtml)
xhtml = g_string_append(xhtml, "<!--");
c += strlen("<!--");
continue;
}
}
if(xhtml)
xhtml = g_string_append(xhtml, "&lt;");
if(plain)
plain = g_string_append_c(plain, '<');
c++;
}
} else if(*c == '&') {
char buf[7];
const char *pln;
int len;
if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
len = 1;
g_snprintf(buf, sizeof(buf), "%c", *c);
pln = buf;
}
if(xhtml)
xhtml = g_string_append_len(xhtml, c, len);
if(plain)
plain = g_string_append(plain, pln);
if(cdata)
cdata = g_string_append_len(cdata, c, len);
c += len;
} else {
if(xhtml)
xhtml = g_string_append_c(xhtml, *c);
if(plain)
plain = g_string_append_c(plain, *c);
if(cdata)
cdata = g_string_append_c(cdata, *c);
c++;
}
}
if(xhtml) {
for (tag = tags; tag ; tag = tag->next) {
struct purple_parse_tag *pt = tag->data;
if(!pt->ignore)
g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
}
}
g_list_free(tags);
if(xhtml_out)
*xhtml_out = g_string_free(xhtml, FALSE);
if(plain_out)
*plain_out = g_string_free(plain, FALSE);
if(url)
g_string_free(url, TRUE);
if (cdata)
g_string_free(cdata, TRUE);
#undef CHECK_QUOTE
#undef VALID_CHAR
}
/* The following are probably reasonable changes:
* - \n should be converted to a normal space
* - in addition to <br>, <p> and <div> etc. should also be converted into \n
* - We want to turn </td>#whitespace<td> sequences into a single tab
* - We want to turn </tr>#whitespace<tr> sequences into a single \n
* - <script>...</script> and <style>...</style> should be completely removed
*/
char *
purple_markup_strip_html(const char *str)
{
int i, j, k, entlen;
gboolean visible = TRUE;
gboolean closing_td_p = FALSE;
gchar *str2;
const gchar *cdata_close_tag = NULL, *ent;
gchar *href = NULL;
int href_st = 0;
if(!str)
return NULL;
str2 = g_strdup(str);
for (i = 0, j = 0; str2[i]; i++)
{
if (str2[i] == '<')
{
if (cdata_close_tag)
{
/* Note: Don't even assume any other tag is a tag in CDATA */
if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
strlen(cdata_close_tag)) == 0)
{
i += strlen(cdata_close_tag) - 1;
cdata_close_tag = NULL;
}
continue;
}
else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
{
str2[j++] = '\t';
visible = TRUE;
}
else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
{
closing_td_p = TRUE;
visible = FALSE;
}
else
{
closing_td_p = FALSE;
visible = TRUE;
}
k = i + 1;
if(g_ascii_isspace(str2[k]))
visible = TRUE;
else if (str2[k])
{
/* Scan until we end the tag either implicitly (closed start
* tag) or explicitly, using a sloppy method (i.e., < or >
* inside quoted attributes will screw us up)
*/
while (str2[k] && str2[k] != '<' && str2[k] != '>')
{
k++;
}
/* If we've got an <a> tag with an href, save the address
* to print later. */
if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
g_ascii_isspace(str2[i+2]))
{
int st; /* start of href, inclusive [ */
int end; /* end of href, exclusive ) */
char delim = ' ';
/* Find start of href */
for (st = i + 3; st < k; st++)
{
if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
{
st += 5;
if (str2[st] == '"' || str2[st] == '\'')
{
delim = str2[st];
st++;
}
break;
}
}
/* find end of address */
for (end = st; end < k && str2[end] != delim; end++)
{
/* All the work is done in the loop construct above. */
}
/* If there's an address, save it. If there was
* already one saved, kill it. */
if (st < k)
{
char *tmp;
g_free(href);
tmp = g_strndup(str2 + st, end - st);
href = purple_unescape_html(tmp);
g_free(tmp);
href_st = j;
}
}
/* Replace </a> with an ascii representation of the
* address the link was pointing to. */
else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
{
size_t hrlen = strlen(href);
/* Only insert the href if it's different from the CDATA. */
if ((hrlen != (gsize)(j - href_st) ||
strncmp(str2 + href_st, href, hrlen)) &&
(hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
strncmp(str2 + href_st, href + 7, hrlen - 7)))
{
str2[j++] = ' ';
str2[j++] = '(';
memmove(str2 + j, href, hrlen);
j += hrlen;
str2[j++] = ')';
g_free(href);
href = NULL;
}
}
/* Check for tags which should be mapped to newline (but ignore some of
* the tags at the beginning of the text) */
else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
|| g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
{
str2[j++] = '\n';
}
/* Check for tags which begin CDATA and need to be closed */
else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
{
cdata_close_tag = "</script>";
}
else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
{
cdata_close_tag = "</style>";
}
/* Update the index and continue checking after the tag */
i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
continue;
}
}
else if (cdata_close_tag)
{
continue;
}
else if (!g_ascii_isspace(str2[i]))
{
visible = TRUE;
}
if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
{
while (*ent)
str2[j++] = *ent++;
i += entlen - 1;
continue;
}
if (visible)
str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
}
g_free(href);
str2[j] = '\0';
return str2;
}
static gboolean
badchar(char c)
{
switch (c) {
case ' ':
case ',':
case '\0':
case '\n':
case '\r':
case '<':
case '>':
case '"':
return TRUE;
default:
return FALSE;
}
}
static gboolean
badentity(const char *c)
{
if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
!g_ascii_strncasecmp(c, "&gt;", 4) ||
!g_ascii_strncasecmp(c, "&quot;", 6)) {
return TRUE;
}
return FALSE;
}
static const char *
process_link(GString *ret,
const char *start, const char *c,
int matchlen,
const char *urlprefix,
int inside_paren)
{
char *url_buf, *tmpurlbuf;
const char *t;
for (t = c;; t++) {
if (!badchar(*t) && !badentity(t))
continue;
if (t - c == matchlen)
break;
if (*t == ',' && *(t + 1) != ' ') {
continue;
}
if (t > start && *(t - 1) == '.')
t--;
if (t > start && *(t - 1) == ')' && inside_paren > 0)
t--;
url_buf = g_strndup(c, t - c);
tmpurlbuf = purple_unescape_html(url_buf);
g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
urlprefix,
tmpurlbuf, url_buf);
g_free(tmpurlbuf);
g_free(url_buf);
return t;
}
return c;
}
char *
purple_markup_linkify(const char *text)
{
const char *c, *t, *q = NULL;
char *tmpurlbuf, *url_buf;
gunichar g;
gboolean inside_html = FALSE;
int inside_paren = 0;
GString *ret;
if (text == NULL)
return NULL;
ret = g_string_new("");
c = text;
while (*c) {
if(*c == '(' && !inside_html) {
inside_paren++;
ret = g_string_append_c(ret, *c);
c++;
}
if(inside_html) {
if(*c == '>') {
inside_html = FALSE;
} else if(!q && (*c == '\"' || *c == '\'')) {
q = c;
} else if(q) {
if(*c == *q)
q = NULL;
}
} else if(*c == '<') {
inside_html = TRUE;
if (!g_ascii_strncasecmp(c, "<A", 2)) {
while (1) {
if (!g_ascii_strncasecmp(c, "/A>", 3)) {
inside_html = FALSE;
break;
}
ret = g_string_append_c(ret, *c);
c++;
if (!(*c))
break;
}
}
} else if (!g_ascii_strncasecmp(c, "http://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "https://", 8)) {
c = process_link(ret, text, c, 8, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
c = process_link(ret, text, c, 6, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "file://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 4, "http://", inside_paren);
} else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 4, "ftp://", inside_paren);
} else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 5, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
t = c;
while (1) {
if (badchar(*t) || badentity(t)) {
char *d;
if (t - c == 7) {
break;
}
if (t > text && *(t - 1) == '.')
t--;
if ((d = strstr(c + 7, "?")) != NULL && d < t)
url_buf = g_strndup(c + 7, d - c - 7);
else
url_buf = g_strndup(c + 7, t - c - 7);
if (!purple_email_is_valid(url_buf)) {
g_free(url_buf);
break;
}
g_free(url_buf);
url_buf = g_strndup(c, t - c);
tmpurlbuf = purple_unescape_html(url_buf);
g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
tmpurlbuf, url_buf);
g_free(url_buf);
g_free(tmpurlbuf);
c = t;
break;
}
t++;
}
} else if (c != text && (*c == '@')) {
int flag;
GString *gurl_buf = NULL;
const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
flag = 0;
else {
flag = 1;
gurl_buf = g_string_new("");
}
t = c;
while (flag) {
/* iterate backwards grabbing the local part of an email address */
g = g_utf8_get_char(t);
if (badchar(*t) || (g >= 127) || (*t == '(') ||
((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
!g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
(t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
/* local part will already be part of ret, strip it out */
ret = g_string_truncate(ret, ret->len - (c - t));
ret = g_string_append_unichar(ret, g);
break;
} else {
g_string_prepend_unichar(gurl_buf, g);
t = g_utf8_find_prev_char(text, t);
if (t < text) {
ret = g_string_assign(ret, "");
break;
}
}
}
t = g_utf8_find_next_char(c, NULL);
while (flag) {
/* iterate forwards grabbing the domain part of an email address */
g = g_utf8_get_char(t);
if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
char *d;
url_buf = g_string_free(gurl_buf, FALSE);
gurl_buf = NULL;
/* strip off trailing periods */
if (*url_buf) {
for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
*d = '\0';
}
tmpurlbuf = purple_unescape_html(url_buf);
if (purple_email_is_valid(tmpurlbuf)) {
g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
tmpurlbuf, url_buf);
} else {
g_string_append(ret, url_buf);
}
g_free(url_buf);
g_free(tmpurlbuf);
c = t;
break;
} else {
g_string_append_unichar(gurl_buf, g);
t = g_utf8_find_next_char(t, NULL);
}
}
if (gurl_buf) {
g_string_free(gurl_buf, TRUE);
}
}
if(*c == ')' && !inside_html) {
inside_paren--;
ret = g_string_append_c(ret, *c);
c++;
}
if (*c == 0)
break;
ret = g_string_append_c(ret, *c);
c++;
}
return g_string_free(ret, FALSE);
}
char *purple_unescape_text(const char *in)
{
GString *ret;
const char *c = in;
if (in == NULL)
return NULL;
ret = g_string_new("");
while (*c) {
int len;
const char *ent;
if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
g_string_append(ret, ent);
c += len;
} else {
g_string_append_c(ret, *c);
c++;
}
}
return g_string_free(ret, FALSE);
}
char *purple_unescape_html(const char *html)
{
GString *ret;
const char *c = html;
if (html == NULL)
return NULL;
ret = g_string_new("");
while (*c) {
int len;
const char *ent;
if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
g_string_append(ret, ent);
c += len;
} else if (!strncmp(c, "<br>", 4)) {
g_string_append_c(ret, '\n');
c += 4;
} else {
g_string_append_c(ret, *c);
c++;
}
}
return g_string_free(ret, FALSE);
}
char *
purple_markup_slice(const char *str, guint x, guint y)
{
GString *ret;
GQueue *q;
guint z = 0;
gboolean appended = FALSE;
gunichar c;
char *tag;
g_return_val_if_fail(str != NULL, NULL);
g_return_val_if_fail(x <= y, NULL);
if (x == y)
return g_strdup("");
ret = g_string_new("");
q = g_queue_new();
while (*str && (z < y)) {
c = g_utf8_get_char(str);
if (c == '<') {
char *end = strchr(str, '>');
if (!end) {
g_string_free(ret, TRUE);
while ((tag = g_queue_pop_head(q)))
g_free(tag);
g_queue_free(q);
return NULL;
}
if (!g_ascii_strncasecmp(str, "<img ", 5)) {
z += strlen("[Image]");
} else if (!g_ascii_strncasecmp(str, "<br", 3)) {
z += 1;
} else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
z += strlen("\n---\n");
} else if (!g_ascii_strncasecmp(str, "</", 2)) {
/* pop stack */
char *tmp;
tmp = g_queue_pop_head(q);
g_free(tmp);
/* z += 0; */
} else {
/* push it unto the stack */
char *tmp;
tmp = g_strndup(str, end - str + 1);
g_queue_push_head(q, tmp);
/* z += 0; */
}
if (z >= x) {
g_string_append_len(ret, str, end - str + 1);
}
str = end;
} else if (c == '&') {
char *end = strchr(str, ';');
if (!end) {
g_string_free(ret, TRUE);
while ((tag = g_queue_pop_head(q)))
g_free(tag);
g_queue_free(q);
return NULL;
}
if (z >= x)
g_string_append_len(ret, str, end - str + 1);
z++;
str = end;
} else {
if (z == x && z > 0 && !appended) {
GList *l = q->tail;
while (l) {
tag = l->data;
g_string_append(ret, tag);
l = l->prev;
}
appended = TRUE;
}
if (z >= x)
g_string_append_unichar(ret, c);
z++;
}
str = g_utf8_next_char(str);
}
while ((tag = g_queue_pop_head(q))) {
char *name;
name = purple_markup_get_tag_name(tag);
g_string_append_printf(ret, "</%s>", name);
g_free(name);
g_free(tag);
}
g_queue_free(q);
return g_string_free(ret, FALSE);
}
char *
purple_markup_get_tag_name(const char *tag)
{
int i;
g_return_val_if_fail(tag != NULL, NULL);
g_return_val_if_fail(*tag == '<', NULL);
for (i = 1; tag[i]; i++)
if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
break;
return g_strndup(tag+1, i-1);
}