pidgin/pidgin

Use G_DECLARE_DERIVABLE_TYPE for PurpleConversation and additional cleanups

The setters for PurpleConversation are kind of crappy because of the
conversation cache in the purple_conversations api, which we'll address at a
later time.

Testing Done:
Compiled and ran locally, parted a chat and just signed out to verify no new issues.

Reviewed at https://reviews.imfreedom.org/r/613/
/*
* Purple - Internet Messaging Library
* Copyright (C) Pidgin Developers <devel@pidgin.im>
*
* Purple is the legal property of its developers, whose names are too numerous
* to list here. Please refer to the COPYRIGHT file distributed with this
* source distribution.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
#include "purplemarkup.h"
#include "util.h"
/*
* This function is stolen from glib's gmarkup.c and modified to not
* replace ' with &apos;
*/
static void append_escaped_text(GString *str,
const gchar *text, gssize length)
{
const gchar *p;
const gchar *end;
gunichar c;
p = text;
end = text + length;
while (p != end)
{
const gchar *next;
next = g_utf8_next_char (p);
switch (*p)
{
case '&':
g_string_append (str, "&amp;");
break;
case '<':
g_string_append (str, "&lt;");
break;
case '>':
g_string_append (str, "&gt;");
break;
case '"':
g_string_append (str, "&quot;");
break;
default:
c = g_utf8_get_char (p);
if ((0x1 <= c && c <= 0x8) ||
(0xb <= c && c <= 0xc) ||
(0xe <= c && c <= 0x1f) ||
(0x7f <= c && c <= 0x84) ||
(0x86 <= c && c <= 0x9f))
g_string_append_printf (str, "&#x%x;", c);
else
g_string_append_len (str, p, next - p);
break;
}
p = next;
}
}
/* This function is stolen from glib's gmarkup.c */
gchar *purple_markup_escape_text(const gchar *text, gssize length)
{
GString *str;
g_return_val_if_fail(text != NULL, NULL);
if (length < 0)
length = strlen(text);
/* prealloc at least as long as original text */
str = g_string_sized_new(length);
append_escaped_text(str, text, length);
return g_string_free(str, FALSE);
}
const char *
purple_markup_unescape_entity(const char *text, int *length)
{
const char *pln;
int len;
if (!text || *text != '&')
return NULL;
#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
if(IS_ENTITY("&amp;"))
pln = "&";
else if(IS_ENTITY("&lt;"))
pln = "<";
else if(IS_ENTITY("&gt;"))
pln = ">";
else if(IS_ENTITY("&nbsp;"))
pln = " ";
else if(IS_ENTITY("&copy;"))
pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
else if(IS_ENTITY("&quot;"))
pln = "\"";
else if(IS_ENTITY("&reg;"))
pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
else if(IS_ENTITY("&apos;"))
pln = "\'";
else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
static char buf[7];
const char *start = text + 2;
char *end;
guint64 pound;
int base = 10;
int buflen;
if (*start == 'x') {
base = 16;
start++;
}
pound = g_ascii_strtoull(start, &end, base);
if (pound == 0 || pound > INT_MAX || *end != ';') {
return NULL;
}
len = (end - text) + 1;
buflen = g_unichar_to_utf8((gunichar)pound, buf);
buf[buflen] = '\0';
pln = buf;
}
else
return NULL;
if (length)
*length = len;
return pln;
}
char *
purple_markup_get_css_property(const gchar *style,
const gchar *opt)
{
const gchar *css_str = style;
const gchar *css_value_start;
const gchar *css_value_end;
gchar *tmp;
gchar *ret;
g_return_val_if_fail(opt != NULL, NULL);
if (!css_str)
return NULL;
/* find the CSS property */
while (1)
{
/* skip whitespace characters */
while (*css_str && g_ascii_isspace(*css_str))
css_str++;
if (!g_ascii_isalpha(*css_str))
return NULL;
if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
{
/* go to next css property positioned after the next ';' */
while (*css_str && *css_str != '"' && *css_str != ';')
css_str++;
if(*css_str != ';')
return NULL;
css_str++;
}
else
break;
}
/* find the CSS value position in the string */
css_str += strlen(opt);
while (*css_str && g_ascii_isspace(*css_str))
css_str++;
if (*css_str != ':')
return NULL;
css_str++;
while (*css_str && g_ascii_isspace(*css_str))
css_str++;
if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
return NULL;
/* mark the CSS value */
css_value_start = css_str;
while (*css_str && *css_str != '"' && *css_str != ';')
css_str++;
css_value_end = css_str - 1;
/* Removes trailing whitespace */
while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
css_value_end--;
tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
ret = purple_unescape_html(tmp);
g_free(tmp);
return ret;
}
gboolean purple_markup_is_rtl(const char *html)
{
GData *attributes;
const gchar *start, *end;
gboolean res = FALSE;
if (purple_markup_find_tag("span", html, &start, &end, &attributes))
{
/* tmp is a member of attributes and is free with g_datalist_clear call */
const char *tmp = g_datalist_get_data(&attributes, "dir");
if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
res = TRUE;
if (!res)
{
tmp = g_datalist_get_data(&attributes, "style");
if (tmp)
{
char *tmp2 = purple_markup_get_css_property(tmp, "direction");
if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
res = TRUE;
g_free(tmp2);
}
}
g_datalist_clear(&attributes);
}
return res;
}
gboolean
purple_markup_find_tag(const char *needle, const char *haystack,
const char **start, const char **end, GData **attributes)
{
GData *attribs;
const char *cur = haystack;
char *name = NULL;
gboolean found = FALSE;
gboolean in_tag = FALSE;
gboolean in_attr = FALSE;
const char *in_quotes = NULL;
size_t needlelen;
g_return_val_if_fail( needle != NULL, FALSE);
g_return_val_if_fail( *needle != '\0', FALSE);
g_return_val_if_fail( haystack != NULL, FALSE);
g_return_val_if_fail( start != NULL, FALSE);
g_return_val_if_fail( end != NULL, FALSE);
g_return_val_if_fail(attributes != NULL, FALSE);
needlelen = strlen(needle);
g_datalist_init(&attribs);
while (*cur && !found) {
if (in_tag) {
if (in_quotes) {
const char *close = cur;
while (*close && *close != *in_quotes)
close++;
/* if we got the close quote, store the value and carry on from *
* after it. if we ran to the end of the string, point to the NULL *
* and we're outta here */
if (*close) {
/* only store a value if we have an attribute name */
if (name) {
size_t len = close - cur;
char *val = g_strndup(cur, len);
g_datalist_set_data_full(&attribs, name, val, g_free);
g_free(name);
name = NULL;
}
in_quotes = NULL;
cur = close + 1;
} else {
cur = close;
}
} else if (in_attr) {
const char *close = cur;
while (*close && *close != '>' && *close != '"' &&
*close != '\'' && *close != ' ' && *close != '=')
close++;
/* if we got the equals, store the name of the attribute. if we got
* the quote, save the attribute and go straight to quote mode.
* otherwise the tag closed or we reached the end of the string,
* so we can get outta here */
switch (*close) {
case '"':
case '\'':
in_quotes = close;
/* fall through */
case '=':
{
size_t len = close - cur;
/* don't store a blank attribute name */
if (len) {
g_free(name);
name = g_ascii_strdown(cur, len);
}
in_attr = FALSE;
cur = close + 1;
}
break;
case ' ':
case '>':
in_attr = FALSE;
/* fall through */
default:
cur = close;
break;
}
} else {
switch (*cur) {
case ' ':
/* swallow extra spaces inside tag */
while (*cur && *cur == ' ') cur++;
in_attr = TRUE;
break;
case '>':
found = TRUE;
*end = cur;
break;
case '"':
case '\'':
in_quotes = cur;
/* fall through */
default:
cur++;
break;
}
}
} else {
/* if we hit a < followed by the name of our tag... */
if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
*start = cur;
cur = cur + needlelen + 1;
/* if we're pointing at a space or a >, we found the right tag. if *
* we're not, we've found a longer tag, so we need to skip to the *
* >, but not being distracted by >s inside quotes. */
if (*cur == ' ' || *cur == '>') {
in_tag = TRUE;
} else {
while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
if (*cur == '"') {
cur++;
while (*cur && *cur != '"')
cur++;
} else if (*cur == '\'') {
cur++;
while (*cur && *cur != '\'')
cur++;
} else {
cur++;
}
}
}
} else {
cur++;
}
}
}
/* clean up any attribute name from a premature termination */
g_free(name);
if (found) {
*attributes = attribs;
} else {
*start = NULL;
*end = NULL;
*attributes = NULL;
}
return found;
}
struct purple_parse_tag {
char *src_tag;
char *dest_tag;
gboolean ignore;
};
/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
recommended in the GCC docs). It contains 'continue's that should
affect the while-loop in purple_markup_html_to_xhtml and doing the
above would break that.
Also, remember to put braces in constructs that require them for
multiple statements when using this macro. */
#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
const char *o = c + strlen("<" x); \
const char *p = NULL, *q = NULL, *r = NULL; \
/* o = iterating over full tag \
* p = > (end of tag) \
* q = start of quoted bit \
* r = < inside tag \
*/ \
GString *innards = g_string_new(""); \
while(o && *o) { \
if(!q && (*o == '\"' || *o == '\'') ) { \
q = o; \
} else if(q) { \
if(*o == *q) { /* end of quoted bit */ \
char *unescaped = g_strndup(q+1, o-q-1); \
char *escaped = g_markup_escape_text(unescaped, -1); \
g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
g_free(unescaped); \
g_free(escaped); \
q = NULL; \
} else if(*c == '\\') { \
o++; \
} \
} else if(*o == '<') { \
r = o; \
} else if(*o == '>') { \
p = o; \
break; \
} else { \
innards = g_string_append_c(innards, *o); \
} \
o++; \
} \
if(p && !r) { /* got an end of tag and no other < earlier */\
if(*(p-1) != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
} \
if(xhtml) { \
xhtml = g_string_append(xhtml, "<" y); \
xhtml = g_string_append(xhtml, innards->str); \
xhtml = g_string_append_c(xhtml, '>'); \
} \
c = p + 1; \
} else { /* got end of tag with earlier < *or* didn't get anything */ \
if(xhtml) \
xhtml = g_string_append(xhtml, "&lt;"); \
if(plain) \
plain = g_string_append_c(plain, '<'); \
c++; \
} \
g_string_free(innards, TRUE); \
continue; \
} \
if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
(*(c+strlen("<" x)) == '>' || \
!g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
if(xhtml) \
xhtml = g_string_append(xhtml, "<" y); \
c += strlen("<" x); \
if(*c != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
if(xhtml) \
xhtml = g_string_append_c(xhtml, '>'); \
} else { \
if(xhtml) \
xhtml = g_string_append(xhtml, "/>");\
} \
c = strchr(c, '>') + 1; \
continue; \
}
/* Don't forget to check the note above for ALLOW_TAG_ALT. */
#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
void
purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
char **plain_out)
{
GString *xhtml = NULL;
GString *plain = NULL;
GString *url = NULL;
GString *cdata = NULL;
GList *tags = NULL, *tag;
const char *c = html;
char quote = '\0';
#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
quote = *(ptr++); \
else \
quote = '\0';
#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
if(xhtml_out)
xhtml = g_string_new("");
if(plain_out)
plain = g_string_new("");
while(c && *c) {
if(*c == '<') {
if(*(c+1) == '/') { /* closing tag */
tag = tags;
while(tag) {
struct purple_parse_tag *pt = tag->data;
if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
c += strlen(pt->src_tag) + 3;
break;
}
tag = tag->next;
}
if(tag) {
while(tags) {
struct purple_parse_tag *pt = tags->data;
if(xhtml && !pt->ignore)
g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
if(plain && purple_strequal(pt->src_tag, "a")) {
/* if this is a link, we have to add the url to the plaintext, too */
if (cdata && url &&
(!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
g_utf8_collate(url->str + 7, cdata->str) != 0)))
g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
if (cdata) {
g_string_free(cdata, TRUE);
cdata = NULL;
}
}
if(tags == tag)
break;
tags = g_list_delete_link(tags, tags);
g_free(pt);
}
g_free(tag->data);
tags = g_list_delete_link(tags, tag);
} else {
/* a closing tag we weren't expecting...
* we'll let it slide, if it's really a tag...if it's
* just a </ we'll escape it properly */
const char *end = c+2;
while(*end && g_ascii_isalpha(*end))
end++;
if(*end == '>') {
c = end+1;
} else {
if(xhtml)
xhtml = g_string_append(xhtml, "&lt;");
if(plain)
plain = g_string_append_c(plain, '<');
c++;
}
}
} else { /* opening tag */
ALLOW_TAG("blockquote");
ALLOW_TAG("cite");
ALLOW_TAG("div");
ALLOW_TAG("em");
ALLOW_TAG("h1");
ALLOW_TAG("h2");
ALLOW_TAG("h3");
ALLOW_TAG("h4");
ALLOW_TAG("h5");
ALLOW_TAG("h6");
/* we only allow html to start the message */
if(c == html) {
ALLOW_TAG("html");
}
ALLOW_TAG_ALT("i", "em");
ALLOW_TAG_ALT("italic", "em");
ALLOW_TAG("li");
ALLOW_TAG("ol");
ALLOW_TAG("p");
ALLOW_TAG("pre");
ALLOW_TAG("q");
ALLOW_TAG("span");
ALLOW_TAG("ul");
/* we skip <HR> because it's not legal in XHTML-IM. However,
* we still want to send something sensible, so we put a
* linebreak in its place. <BR> also needs special handling
* because putting a </BR> to close it would just be dumb. */
if((!g_ascii_strncasecmp(c, "<br", 3)
|| !g_ascii_strncasecmp(c, "<hr", 3))
&& (*(c+3) == '>' ||
!g_ascii_strncasecmp(c+3, "/>", 2) ||
!g_ascii_strncasecmp(c+3, " />", 3))) {
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<br/>");
if(plain && *c != '\n')
plain = g_string_append_c(plain, '\n');
continue;
}
if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
if (*(c+2) == '>')
pt->src_tag = "b";
else if (*(c+2) == 'o')
pt->src_tag = "bold";
else
pt->src_tag = "strong";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = *(c+2) == '>' ? "u" : "underline";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if (xhtml)
xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = *(c+2) == '>' ? "s" : "strike";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "sub";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "sup";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
continue;
}
if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
const char *p = c + 4;
GString *src = NULL, *alt = NULL;
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&amp;"); \
else if (*from == '\'') \
to = g_string_append(to, "&apos;"); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "src=", 4)) {
const char *q = p + 4;
if (src)
g_string_free(src, TRUE);
src = g_string_new("");
ESCAPE(q, src);
p = q;
} else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
const char *q = p + 4;
if (alt)
g_string_free(alt, TRUE);
alt = g_string_new("");
ESCAPE(q, alt);
p = q;
} else {
p++;
}
}
#undef ESCAPE
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
/* src and alt are required! */
if(src && xhtml)
g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
if(alt) {
if(plain)
plain = g_string_append(plain, purple_unescape_html(alt->str));
if(!src && xhtml)
xhtml = g_string_append(xhtml, alt->str);
g_string_free(alt, TRUE);
}
g_string_free(src, TRUE);
continue;
}
if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
const char *p = c + 2;
struct purple_parse_tag *pt;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "href=", 5)) {
const char *q = p + 5;
if (url)
g_string_free(url, TRUE);
url = g_string_new("");
if (cdata)
g_string_free(cdata, TRUE);
cdata = g_string_new("");
CHECK_QUOTE(q);
while (VALID_CHAR(q)) {
int len;
if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
url = g_string_append(url, "&amp;");
else if (*q == '"')
url = g_string_append(url, "&quot;");
else
url = g_string_append_c(url, *q);
q++;
}
p = q;
} else {
p++;
}
}
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "a";
pt->dest_tag = "a";
tags = g_list_prepend(tags, pt);
if(xhtml)
g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
continue;
}
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&amp;"); \
else if (*from == '\'') \
to = g_string_append_c(to, '\"'); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
const char *p = c + 5;
GString *style = g_string_new("");
struct purple_parse_tag *pt;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "back=", 5)) {
const char *q = p + 5;
GString *color = g_string_new("");
ESCAPE(q, color);
g_string_append_printf(style, "background: %s; ", color->str);
g_string_free(color, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "color=", 6)) {
const char *q = p + 6;
GString *color = g_string_new("");
ESCAPE(q, color);
g_string_append_printf(style, "color: %s; ", color->str);
g_string_free(color, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "face=", 5)) {
const char *q = p + 5;
GString *face = g_string_new("");
ESCAPE(q, face);
g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
g_string_free(face, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "size=", 5)) {
const char *q = p + 5;
int sz;
const char *size = "medium";
CHECK_QUOTE(q);
sz = atoi(q);
switch (sz)
{
case 1:
size = "xx-small";
break;
case 2:
size = "small";
break;
case 3:
size = "medium";
break;
case 4:
size = "large";
break;
case 5:
size = "x-large";
break;
case 6:
case 7:
size = "xx-large";
break;
default:
break;
}
g_string_append_printf(style, "font-size: %s; ", size);
p = q;
} else {
p++;
}
}
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "font";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
if(style->len && xhtml)
g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
else
pt->ignore = TRUE;
g_string_free(style, TRUE);
continue;
}
#undef ESCAPE
if (!g_ascii_strncasecmp(c, "<body ", 6)) {
const char *p = c + 6;
gboolean did_something = FALSE;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
const char *q = p + 8;
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
GString *color = g_string_new("");
CHECK_QUOTE(q);
while (VALID_CHAR(q)) {
color = g_string_append_c(color, *q);
q++;
}
if (xhtml)
g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
g_string_free(color, TRUE);
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt->src_tag = "body";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
did_something = TRUE;
break;
}
p++;
}
if (did_something) continue;
}
/* this has to come after the special case for bgcolor */
ALLOW_TAG("body");
if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
char *p = strstr(c + strlen("<!--"), "-->");
if(p) {
if(xhtml)
xhtml = g_string_append(xhtml, "<!--");
c += strlen("<!--");
continue;
}
}
if(xhtml)
xhtml = g_string_append(xhtml, "&lt;");
if(plain)
plain = g_string_append_c(plain, '<');
c++;
}
} else if(*c == '&') {
char buf[7];
const char *pln;
int len;
if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
len = 1;
g_snprintf(buf, sizeof(buf), "%c", *c);
pln = buf;
}
if(xhtml)
xhtml = g_string_append_len(xhtml, c, len);
if(plain)
plain = g_string_append(plain, pln);
if(cdata)
cdata = g_string_append_len(cdata, c, len);
c += len;
} else {
if(xhtml)
xhtml = g_string_append_c(xhtml, *c);
if(plain)
plain = g_string_append_c(plain, *c);
if(cdata)
cdata = g_string_append_c(cdata, *c);
c++;
}
}
if(xhtml) {
for (tag = tags; tag ; tag = tag->next) {
struct purple_parse_tag *pt = tag->data;
if(!pt->ignore)
g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
}
}
g_list_free(tags);
if(xhtml_out)
*xhtml_out = g_string_free(xhtml, FALSE);
if(plain_out)
*plain_out = g_string_free(plain, FALSE);
if(url)
g_string_free(url, TRUE);
if (cdata)
g_string_free(cdata, TRUE);
#undef CHECK_QUOTE
#undef VALID_CHAR
}
/* The following are probably reasonable changes:
* - \n should be converted to a normal space
* - in addition to <br>, <p> and <div> etc. should also be converted into \n
* - We want to turn </td>#whitespace<td> sequences into a single tab
* - We want to turn </tr>#whitespace<tr> sequences into a single \n
* - <script>...</script> and <style>...</style> should be completely removed
*/
char *
purple_markup_strip_html(const char *str)
{
int i, j, k, entlen;
gboolean visible = TRUE;
gboolean closing_td_p = FALSE;
gchar *str2;
const gchar *cdata_close_tag = NULL, *ent;
gchar *href = NULL;
int href_st = 0;
if(!str)
return NULL;
str2 = g_strdup(str);
for (i = 0, j = 0; str2[i]; i++)
{
if (str2[i] == '<')
{
if (cdata_close_tag)
{
/* Note: Don't even assume any other tag is a tag in CDATA */
if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
strlen(cdata_close_tag)) == 0)
{
i += strlen(cdata_close_tag) - 1;
cdata_close_tag = NULL;
}
continue;
}
else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
{
str2[j++] = '\t';
visible = TRUE;
}
else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
{
closing_td_p = TRUE;
visible = FALSE;
}
else
{
closing_td_p = FALSE;
visible = TRUE;
}
k = i + 1;
if(g_ascii_isspace(str2[k]))
visible = TRUE;
else if (str2[k])
{
/* Scan until we end the tag either implicitly (closed start
* tag) or explicitly, using a sloppy method (i.e., < or >
* inside quoted attributes will screw us up)
*/
while (str2[k] && str2[k] != '<' && str2[k] != '>')
{
k++;
}
/* If we've got an <a> tag with an href, save the address
* to print later. */
if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
g_ascii_isspace(str2[i+2]))
{
int st; /* start of href, inclusive [ */
int end; /* end of href, exclusive ) */
char delim = ' ';
/* Find start of href */
for (st = i + 3; st < k; st++)
{
if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
{
st += 5;
if (str2[st] == '"' || str2[st] == '\'')
{
delim = str2[st];
st++;
}
break;
}
}
/* find end of address */
for (end = st; end < k && str2[end] != delim; end++)
{
/* All the work is done in the loop construct above. */
}
/* If there's an address, save it. If there was
* already one saved, kill it. */
if (st < k)
{
char *tmp;
g_free(href);
tmp = g_strndup(str2 + st, end - st);
href = purple_unescape_html(tmp);
g_free(tmp);
href_st = j;
}
}
/* Replace </a> with an ascii representation of the
* address the link was pointing to. */
else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
{
size_t hrlen = strlen(href);
/* Only insert the href if it's different from the CDATA. */
if ((hrlen != (gsize)(j - href_st) ||
strncmp(str2 + href_st, href, hrlen)) &&
(hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
strncmp(str2 + href_st, href + 7, hrlen - 7)))
{
str2[j++] = ' ';
str2[j++] = '(';
memmove(str2 + j, href, hrlen);
j += hrlen;
str2[j++] = ')';
g_free(href);
href = NULL;
}
}
/* Check for tags which should be mapped to newline (but ignore some of
* the tags at the beginning of the text) */
else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
|| g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
{
str2[j++] = '\n';
}
/* Check for tags which begin CDATA and need to be closed */
else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
{
cdata_close_tag = "</script>";
}
else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
{
cdata_close_tag = "</style>";
}
/* Update the index and continue checking after the tag */
i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
continue;
}
}
else if (cdata_close_tag)
{
continue;
}
else if (!g_ascii_isspace(str2[i]))
{
visible = TRUE;
}
if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
{
while (*ent)
str2[j++] = *ent++;
i += entlen - 1;
continue;
}
if (visible)
str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
}
g_free(href);
str2[j] = '\0';
return str2;
}
static gboolean
badchar(char c)
{
switch (c) {
case ' ':
case ',':
case '\0':
case '\n':
case '\r':
case '<':
case '>':
case '"':
return TRUE;
default:
return FALSE;
}
}
static gboolean
badentity(const char *c)
{
if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
!g_ascii_strncasecmp(c, "&gt;", 4) ||
!g_ascii_strncasecmp(c, "&quot;", 6)) {
return TRUE;
}
return FALSE;
}
static const char *
process_link(GString *ret,
const char *start, const char *c,
int matchlen,
const char *urlprefix,
int inside_paren)
{
char *url_buf, *tmpurlbuf;
const char *t;
for (t = c;; t++) {
if (!badchar(*t) && !badentity(t))
continue;
if (t - c == matchlen)
break;
if (*t == ',' && *(t + 1) != ' ') {
continue;
}
if (t > start && *(t - 1) == '.')
t--;
if (t > start && *(t - 1) == ')' && inside_paren > 0)
t--;
url_buf = g_strndup(c, t - c);
tmpurlbuf = purple_unescape_html(url_buf);
g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
urlprefix,
tmpurlbuf, url_buf);
g_free(tmpurlbuf);
g_free(url_buf);
return t;
}
return c;
}
char *
purple_markup_linkify(const char *text)
{
const char *c, *t, *q = NULL;
char *tmpurlbuf, *url_buf;
gunichar g;
gboolean inside_html = FALSE;
int inside_paren = 0;
GString *ret;
if (text == NULL)
return NULL;
ret = g_string_new("");
c = text;
while (*c) {
if(*c == '(' && !inside_html) {
inside_paren++;
ret = g_string_append_c(ret, *c);
c++;
}
if(inside_html) {
if(*c == '>') {
inside_html = FALSE;
} else if(!q && (*c == '\"' || *c == '\'')) {
q = c;
} else if(q) {
if(*c == *q)
q = NULL;
}
} else if(*c == '<') {
inside_html = TRUE;
if (!g_ascii_strncasecmp(c, "<A", 2)) {
while (1) {
if (!g_ascii_strncasecmp(c, "/A>", 3)) {
inside_html = FALSE;
break;
}
ret = g_string_append_c(ret, *c);
c++;
if (!(*c))
break;
}
}
} else if (!g_ascii_strncasecmp(c, "http://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "https://", 8)) {
c = process_link(ret, text, c, 8, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
c = process_link(ret, text, c, 6, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "file://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 4, "http://", inside_paren);
} else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 4, "ftp://", inside_paren);
} else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 5, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
t = c;
while (1) {
if (badchar(*t) || badentity(t)) {
char *d;
if (t - c == 7) {
break;
}
if (t > text && *(t - 1) == '.')
t--;
if ((d = strstr(c + 7, "?")) != NULL && d < t)
url_buf = g_strndup(c + 7, d - c - 7);
else
url_buf = g_strndup(c + 7, t - c - 7);
if (!purple_email_is_valid(url_buf)) {
g_free(url_buf);
break;
}
g_free(url_buf);
url_buf = g_strndup(c, t - c);
tmpurlbuf = purple_unescape_html(url_buf);
g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
tmpurlbuf, url_buf);
g_free(url_buf);
g_free(tmpurlbuf);
c = t;
break;
}
t++;
}
} else if (c != text && (*c == '@')) {
int flag;
GString *gurl_buf = NULL;
const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
flag = 0;
else {
flag = 1;
gurl_buf = g_string_new("");
}
t = c;
while (flag) {
/* iterate backwards grabbing the local part of an email address */
g = g_utf8_get_char(t);
if (badchar(*t) || (g >= 127) || (*t == '(') ||
((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
!g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
(t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
/* local part will already be part of ret, strip it out */
ret = g_string_truncate(ret, ret->len - (c - t));
ret = g_string_append_unichar(ret, g);
break;
} else {
g_string_prepend_unichar(gurl_buf, g);
t = g_utf8_find_prev_char(text, t);
if (t < text) {
ret = g_string_assign(ret, "");
break;
}
}
}
t = g_utf8_find_next_char(c, NULL);
while (flag) {
/* iterate forwards grabbing the domain part of an email address */
g = g_utf8_get_char(t);
if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
char *d;
url_buf = g_string_free(gurl_buf, FALSE);
gurl_buf = NULL;
/* strip off trailing periods */
if (*url_buf) {
for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
*d = '\0';
}
tmpurlbuf = purple_unescape_html(url_buf);
if (purple_email_is_valid(tmpurlbuf)) {
g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
tmpurlbuf, url_buf);
} else {
g_string_append(ret, url_buf);
}
g_free(url_buf);
g_free(tmpurlbuf);
c = t;
break;
} else {
g_string_append_unichar(gurl_buf, g);
t = g_utf8_find_next_char(t, NULL);
}
}
if (gurl_buf) {
g_string_free(gurl_buf, TRUE);
}
}
if(*c == ')' && !inside_html) {
inside_paren--;
ret = g_string_append_c(ret, *c);
c++;
}
if (*c == 0)
break;
ret = g_string_append_c(ret, *c);
c++;
}
return g_string_free(ret, FALSE);
}
char *purple_unescape_text(const char *in)
{
GString *ret;
const char *c = in;
if (in == NULL)
return NULL;
ret = g_string_new("");
while (*c) {
int len;
const char *ent;
if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
g_string_append(ret, ent);
c += len;
} else {
g_string_append_c(ret, *c);
c++;
}
}
return g_string_free(ret, FALSE);
}
char *purple_unescape_html(const char *html)
{
GString *ret;
const char *c = html;
if (html == NULL)
return NULL;
ret = g_string_new("");
while (*c) {
int len;
const char *ent;
if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
g_string_append(ret, ent);
c += len;
} else if (!strncmp(c, "<br>", 4)) {
g_string_append_c(ret, '\n');
c += 4;
} else {
g_string_append_c(ret, *c);
c++;
}
}
return g_string_free(ret, FALSE);
}
char *
purple_markup_slice(const char *str, guint x, guint y)
{
GString *ret;
GQueue *q;
guint z = 0;
gboolean appended = FALSE;
gunichar c;
char *tag;
g_return_val_if_fail(str != NULL, NULL);
g_return_val_if_fail(x <= y, NULL);
if (x == y)
return g_strdup("");
ret = g_string_new("");
q = g_queue_new();
while (*str && (z < y)) {
c = g_utf8_get_char(str);
if (c == '<') {
char *end = strchr(str, '>');
if (!end) {
g_string_free(ret, TRUE);
while ((tag = g_queue_pop_head(q)))
g_free(tag);
g_queue_free(q);
return NULL;
}
if (!g_ascii_strncasecmp(str, "<img ", 5)) {
z += strlen("[Image]");
} else if (!g_ascii_strncasecmp(str, "<br", 3)) {
z += 1;
} else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
z += strlen("\n---\n");
} else if (!g_ascii_strncasecmp(str, "</", 2)) {
/* pop stack */
char *tmp;
tmp = g_queue_pop_head(q);
g_free(tmp);
/* z += 0; */
} else {
/* push it unto the stack */
char *tmp;
tmp = g_strndup(str, end - str + 1);
g_queue_push_head(q, tmp);
/* z += 0; */
}
if (z >= x) {
g_string_append_len(ret, str, end - str + 1);
}
str = end;
} else if (c == '&') {
char *end = strchr(str, ';');
if (!end) {
g_string_free(ret, TRUE);
while ((tag = g_queue_pop_head(q)))
g_free(tag);
g_queue_free(q);
return NULL;
}
if (z >= x)
g_string_append_len(ret, str, end - str + 1);
z++;
str = end;
} else {
if (z == x && z > 0 && !appended) {
GList *l = q->tail;
while (l) {
tag = l->data;
g_string_append(ret, tag);
l = l->prev;
}
appended = TRUE;
}
if (z >= x)
g_string_append_unichar(ret, c);
z++;
}
str = g_utf8_next_char(str);
}
while ((tag = g_queue_pop_head(q))) {
char *name;
name = purple_markup_get_tag_name(tag);
g_string_append_printf(ret, "</%s>", name);
g_free(name);
g_free(tag);
}
g_queue_free(q);
return g_string_free(ret, FALSE);
}
char *
purple_markup_get_tag_name(const char *tag)
{
int i;
g_return_val_if_fail(tag != NULL, NULL);
g_return_val_if_fail(*tag == '<', NULL);
for (i = 1; tag[i]; i++)
if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
break;
return g_strndup(tag+1, i-1);
}