pidgin/pidgin

Really disable some plugins
default tip
43 hours ago, Gary Kramlich
44125b8e3b27
Really disable some plugins

Previous we disabled some plugins that we want to keep around but can't port
yet by setting `build_by_default` to `false`. However, this isn't working on
many machines. I haven't figured out why, so instead lets just make their
meson.build files exit early.

Testing Done:
Ran the turtles on a machine that was still building these plugins.

Reviewed at https://reviews.imfreedom.org/r/3142/
/*
* Purple - Internet Messaging Library
* Copyright (C) Pidgin Developers <devel@pidgin.im>
*
* Purple is the legal property of its developers, whose names are too numerous
* to list here. Please refer to the COPYRIGHT file distributed with this
* source distribution.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this library; if not, see <https://www.gnu.org/licenses/>.
*/
#include "purplemarkup.h"
#include "util.h"
const char *
purple_markup_unescape_entity(const char *text, int *length)
{
const char *pln;
int len;
if (!text || *text != '&')
return NULL;
#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
if(IS_ENTITY("&amp;"))
pln = "&";
else if(IS_ENTITY("&lt;"))
pln = "<";
else if(IS_ENTITY("&gt;"))
pln = ">";
else if(IS_ENTITY("&nbsp;"))
pln = " ";
else if(IS_ENTITY("&copy;"))
pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
else if(IS_ENTITY("&quot;"))
pln = "\"";
else if(IS_ENTITY("&reg;"))
pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
else if(IS_ENTITY("&apos;"))
pln = "\'";
else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
static char buf[7];
const char *start = text + 2;
char *end;
guint64 pound;
int base = 10;
int buflen;
if (*start == 'x') {
base = 16;
start++;
}
pound = g_ascii_strtoull(start, &end, base);
if (pound == 0 || pound > INT_MAX || *end != ';') {
return NULL;
}
len = (end - text) + 1;
buflen = g_unichar_to_utf8((gunichar)pound, buf);
buf[buflen] = '\0';
pln = buf;
}
else
return NULL;
if (length)
*length = len;
return pln;
}
struct purple_parse_tag {
char *src_tag;
char *dest_tag;
gboolean ignore;
};
/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
recommended in the GCC docs). It contains 'continue's that should
affect the while-loop in purple_markup_html_to_xhtml and doing the
above would break that.
Also, remember to put braces in constructs that require them for
multiple statements when using this macro. */
#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
const char *o = c + strlen("<" x); \
const char *p = NULL, *q = NULL, *r = NULL; \
/* o = iterating over full tag \
* p = > (end of tag) \
* q = start of quoted bit \
* r = < inside tag \
*/ \
GString *innards = g_string_new(""); \
while(o && *o) { \
if(!q && (*o == '\"' || *o == '\'') ) { \
q = o; \
} else if(q) { \
if(*o == *q) { /* end of quoted bit */ \
char *unescaped = g_strndup(q+1, o-q-1); \
char *escaped = g_markup_escape_text(unescaped, -1); \
g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
g_free(unescaped); \
g_free(escaped); \
q = NULL; \
} else if(*c == '\\') { \
o++; \
} \
} else if(*o == '<') { \
r = o; \
} else if(*o == '>') { \
p = o; \
break; \
} else { \
innards = g_string_append_c(innards, *o); \
} \
o++; \
} \
if(p && !r) { /* got an end of tag and no other < earlier */\
if(*(p-1) != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
} \
if(xhtml) { \
xhtml = g_string_append(xhtml, "<" y); \
xhtml = g_string_append(xhtml, innards->str); \
xhtml = g_string_append_c(xhtml, '>'); \
} \
c = p + 1; \
} else { /* got end of tag with earlier < *or* didn't get anything */ \
if(xhtml) \
xhtml = g_string_append(xhtml, "&lt;"); \
if(plain) \
plain = g_string_append_c(plain, '<'); \
c++; \
} \
g_string_free(innards, TRUE); \
continue; \
} \
if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
(*(c+strlen("<" x)) == '>' || \
!g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
if(xhtml) \
xhtml = g_string_append(xhtml, "<" y); \
c += strlen("<" x); \
if(*c != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
if(xhtml) \
xhtml = g_string_append_c(xhtml, '>'); \
} else { \
if(xhtml) \
xhtml = g_string_append(xhtml, "/>");\
} \
c = strchr(c, '>') + 1; \
continue; \
}
/* Don't forget to check the note above for ALLOW_TAG_ALT. */
#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
void
purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
char **plain_out)
{
GString *xhtml = NULL;
GString *plain = NULL;
GString *url = NULL;
GString *cdata = NULL;
GList *tags = NULL, *tag;
const char *c = html;
char quote = '\0';
#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
quote = *(ptr++); \
else \
quote = '\0';
#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
if(xhtml_out)
xhtml = g_string_new("");
if(plain_out)
plain = g_string_new("");
while(c && *c) {
if(*c == '<') {
if(*(c+1) == '/') { /* closing tag */
tag = tags;
while(tag) {
struct purple_parse_tag *pt = tag->data;
if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
c += strlen(pt->src_tag) + 3;
break;
}
tag = tag->next;
}
if(tag) {
while(tags) {
struct purple_parse_tag *pt = tags->data;
if(xhtml && !pt->ignore)
g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
if(plain && purple_strequal(pt->src_tag, "a")) {
/* if this is a link, we have to add the url to the plaintext, too */
if (cdata && url &&
(!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
g_utf8_collate(url->str + 7, cdata->str) != 0)))
{
char *unescaped = purple_unescape_html(url->str);
g_string_append_printf(plain, " <%s>",
g_strstrip(unescaped));
g_free(unescaped);
}
if (cdata) {
g_string_free(cdata, TRUE);
cdata = NULL;
}
}
if(tags == tag)
break;
tags = g_list_delete_link(tags, tags);
g_free(pt);
}
g_free(tag->data);
tags = g_list_delete_link(tags, tag);
} else {
/* a closing tag we weren't expecting...
* we'll let it slide, if it's really a tag...if it's
* just a </ we'll escape it properly */
const char *end = c+2;
while(*end && g_ascii_isalpha(*end))
end++;
if(*end == '>') {
c = end+1;
} else {
if(xhtml)
xhtml = g_string_append(xhtml, "&lt;");
if(plain)
plain = g_string_append_c(plain, '<');
c++;
}
}
} else { /* opening tag */
ALLOW_TAG("blockquote");
ALLOW_TAG("cite");
ALLOW_TAG("div");
ALLOW_TAG("em");
ALLOW_TAG("h1");
ALLOW_TAG("h2");
ALLOW_TAG("h3");
ALLOW_TAG("h4");
ALLOW_TAG("h5");
ALLOW_TAG("h6");
/* we only allow html to start the message */
if(c == html) {
ALLOW_TAG("html");
}
ALLOW_TAG_ALT("i", "em");
ALLOW_TAG_ALT("italic", "em");
ALLOW_TAG("li");
ALLOW_TAG("ol");
ALLOW_TAG("p");
ALLOW_TAG("pre");
ALLOW_TAG("q");
ALLOW_TAG("span");
ALLOW_TAG("ul");
/* we skip <HR> because it's not legal in XHTML-IM. However,
* we still want to send something sensible, so we put a
* linebreak in its place. <BR> also needs special handling
* because putting a </BR> to close it would just be dumb. */
if((!g_ascii_strncasecmp(c, "<br", 3)
|| !g_ascii_strncasecmp(c, "<hr", 3))
&& (*(c+3) == '>' ||
!g_ascii_strncasecmp(c+3, "/>", 2) ||
!g_ascii_strncasecmp(c+3, " />", 3))) {
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<br/>");
if(plain && *c != '\n')
plain = g_string_append_c(plain, '\n');
continue;
}
if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
if (*(c+2) == '>')
pt->src_tag = "b";
else if (*(c+2) == 'o')
pt->src_tag = "bold";
else
pt->src_tag = "strong";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = *(c+2) == '>' ? "u" : "underline";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if (xhtml)
xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = *(c+2) == '>' ? "s" : "strike";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "sub";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
continue;
}
if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "sup";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
c = strchr(c, '>') + 1;
if(xhtml)
xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
continue;
}
if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
const char *p = c + 4;
GString *src = NULL, *alt = NULL;
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&amp;"); \
else if (*from == '\'') \
to = g_string_append(to, "&apos;"); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "src=", 4)) {
const char *q = p + 4;
if (src)
g_string_free(src, TRUE);
src = g_string_new("");
ESCAPE(q, src);
p = q;
} else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
const char *q = p + 4;
if (alt)
g_string_free(alt, TRUE);
alt = g_string_new("");
ESCAPE(q, alt);
p = q;
} else {
p++;
}
}
#undef ESCAPE
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
/* src and alt are required! */
if(src && xhtml)
g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
if(alt) {
if(plain) {
char *unescaped = purple_unescape_html(alt->str);
plain = g_string_append(plain, unescaped);
g_free(unescaped);
}
if(!src && xhtml) {
xhtml = g_string_append(xhtml, alt->str);
}
g_string_free(alt, TRUE);
}
g_string_free(src, TRUE);
continue;
}
if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
const char *p = c + 2;
struct purple_parse_tag *pt;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "href=", 5)) {
const char *q = p + 5;
if (url)
g_string_free(url, TRUE);
url = g_string_new("");
if (cdata)
g_string_free(cdata, TRUE);
cdata = g_string_new("");
CHECK_QUOTE(q);
while (VALID_CHAR(q)) {
int len;
if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
url = g_string_append(url, "&amp;");
else if (*q == '"')
url = g_string_append(url, "&quot;");
else
url = g_string_append_c(url, *q);
q++;
}
p = q;
} else {
p++;
}
}
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "a";
pt->dest_tag = "a";
tags = g_list_prepend(tags, pt);
if(xhtml)
g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
continue;
}
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&amp;"); \
else if (*from == '\'') \
to = g_string_append_c(to, '\"'); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
const char *p = c + 5;
GString *style = g_string_new("");
struct purple_parse_tag *pt;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "back=", 5)) {
const char *q = p + 5;
GString *color = g_string_new("");
ESCAPE(q, color);
g_string_append_printf(style, "background: %s; ", color->str);
g_string_free(color, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "color=", 6)) {
const char *q = p + 6;
GString *color = g_string_new("");
ESCAPE(q, color);
g_string_append_printf(style, "color: %s; ", color->str);
g_string_free(color, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "face=", 5)) {
const char *q = p + 5;
GString *face = g_string_new("");
ESCAPE(q, face);
g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
g_string_free(face, TRUE);
p = q;
} else if (!g_ascii_strncasecmp(p, "size=", 5)) {
const char *q = p + 5;
int sz;
const char *size = "medium";
CHECK_QUOTE(q);
sz = atoi(q);
switch (sz)
{
case 1:
size = "xx-small";
break;
case 2:
size = "small";
break;
case 3:
size = "medium";
break;
case 4:
size = "large";
break;
case 5:
size = "x-large";
break;
case 6:
case 7:
size = "xx-large";
break;
default:
break;
}
g_string_append_printf(style, "font-size: %s; ", size);
p = q;
} else {
p++;
}
}
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt = g_new0(struct purple_parse_tag, 1);
pt->src_tag = "font";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
if(style->len && xhtml)
g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
else
pt->ignore = TRUE;
g_string_free(style, TRUE);
continue;
}
#undef ESCAPE
if (!g_ascii_strncasecmp(c, "<body ", 6)) {
const char *p = c + 6;
gboolean did_something = FALSE;
while (*p && *p != '>') {
if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
const char *q = p + 8;
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
GString *color = g_string_new("");
CHECK_QUOTE(q);
while (VALID_CHAR(q)) {
color = g_string_append_c(color, *q);
q++;
}
if (xhtml)
g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
g_string_free(color, TRUE);
if ((c = strchr(p, '>')) != NULL)
c++;
else
c = p;
pt->src_tag = "body";
pt->dest_tag = "span";
tags = g_list_prepend(tags, pt);
did_something = TRUE;
break;
}
p++;
}
if (did_something) continue;
}
/* this has to come after the special case for bgcolor */
ALLOW_TAG("body");
if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
char *p = strstr(c + strlen("<!--"), "-->");
if(p) {
if(xhtml)
xhtml = g_string_append(xhtml, "<!--");
c += strlen("<!--");
continue;
}
}
if(xhtml)
xhtml = g_string_append(xhtml, "&lt;");
if(plain)
plain = g_string_append_c(plain, '<');
c++;
}
} else if(*c == '&') {
char buf[7];
const char *pln;
int len;
if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
len = 1;
g_snprintf(buf, sizeof(buf), "%c", *c);
pln = buf;
}
if(xhtml)
xhtml = g_string_append_len(xhtml, c, len);
if(plain)
plain = g_string_append(plain, pln);
if(cdata)
cdata = g_string_append_len(cdata, c, len);
c += len;
} else {
if(xhtml)
xhtml = g_string_append_c(xhtml, *c);
if(plain)
plain = g_string_append_c(plain, *c);
if(cdata)
cdata = g_string_append_c(cdata, *c);
c++;
}
}
if(xhtml) {
for (tag = tags; tag ; tag = tag->next) {
struct purple_parse_tag *pt = tag->data;
if(!pt->ignore)
g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
}
}
g_clear_list(&tags, g_free);
if(xhtml_out)
*xhtml_out = g_string_free(xhtml, FALSE);
if(plain_out)
*plain_out = g_string_free(plain, FALSE);
if(url)
g_string_free(url, TRUE);
if (cdata)
g_string_free(cdata, TRUE);
#undef CHECK_QUOTE
#undef VALID_CHAR
}
/* The following are probably reasonable changes:
* - \n should be converted to a normal space
* - in addition to <br>, <p> and <div> etc. should also be converted into \n
* - We want to turn </td>#whitespace<td> sequences into a single tab
* - We want to turn </tr>#whitespace<tr> sequences into a single \n
* - <script>...</script> and <style>...</style> should be completely removed
*/
char *
purple_markup_strip_html(const char *str)
{
int i, j, k, entlen;
gboolean visible = TRUE;
gboolean closing_td_p = FALSE;
gchar *str2;
const gchar *cdata_close_tag = NULL, *ent;
gchar *href = NULL;
int href_st = 0;
if(!str)
return NULL;
str2 = g_strdup(str);
for (i = 0, j = 0; str2[i]; i++)
{
if (str2[i] == '<')
{
if (cdata_close_tag)
{
/* Note: Don't even assume any other tag is a tag in CDATA */
if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
strlen(cdata_close_tag)) == 0)
{
i += strlen(cdata_close_tag) - 1;
cdata_close_tag = NULL;
}
continue;
}
else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
{
str2[j++] = '\t';
visible = TRUE;
}
else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
{
closing_td_p = TRUE;
visible = FALSE;
}
else
{
closing_td_p = FALSE;
visible = TRUE;
}
k = i + 1;
if(g_ascii_isspace(str2[k]))
visible = TRUE;
else if (str2[k])
{
/* Scan until we end the tag either implicitly (closed start
* tag) or explicitly, using a sloppy method (i.e., < or >
* inside quoted attributes will screw us up)
*/
while (str2[k] && str2[k] != '<' && str2[k] != '>')
{
k++;
}
/* If we've got an <a> tag with an href, save the address
* to print later. */
if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
g_ascii_isspace(str2[i+2]))
{
int st; /* start of href, inclusive [ */
int end; /* end of href, exclusive ) */
char delim = ' ';
/* Find start of href */
for (st = i + 3; st < k; st++)
{
if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
{
st += 5;
if (str2[st] == '"' || str2[st] == '\'')
{
delim = str2[st];
st++;
}
break;
}
}
/* find end of address */
for (end = st; end < k && str2[end] != delim; end++)
{
/* All the work is done in the loop construct above. */
}
/* If there's an address, save it. If there was
* already one saved, kill it. */
if (st < k)
{
char *tmp;
g_free(href);
tmp = g_strndup(str2 + st, end - st);
href = purple_unescape_html(tmp);
g_free(tmp);
href_st = j;
}
}
/* Replace </a> with an ascii representation of the
* address the link was pointing to. */
else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
{
size_t hrlen = strlen(href);
/* Only insert the href if it's different from the CDATA. */
if ((hrlen != (gsize)(j - href_st) ||
strncmp(str2 + href_st, href, hrlen)) &&
(hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
strncmp(str2 + href_st, href + 7, hrlen - 7)))
{
str2[j++] = ' ';
str2[j++] = '(';
memmove(str2 + j, href, hrlen);
j += hrlen;
str2[j++] = ')';
g_free(href);
href = NULL;
}
}
/* Check for tags which should be mapped to newline (but ignore some of
* the tags at the beginning of the text) */
else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
|| g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
|| g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
{
str2[j++] = '\n';
}
/* Check for tags which begin CDATA and need to be closed */
else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
{
cdata_close_tag = "</script>";
}
else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
{
cdata_close_tag = "</style>";
}
/* Update the index and continue checking after the tag */
i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
continue;
}
}
else if (cdata_close_tag)
{
continue;
}
else if (!g_ascii_isspace(str2[i]))
{
visible = TRUE;
}
if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
{
while (*ent)
str2[j++] = *ent++;
i += entlen - 1;
continue;
}
if (visible)
str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
}
g_free(href);
str2[j] = '\0';
return str2;
}
static gboolean
badchar(char c)
{
switch (c) {
case ' ':
case ',':
case '\0':
case '\n':
case '\r':
case '<':
case '>':
case '"':
return TRUE;
default:
return FALSE;
}
}
static gboolean
badentity(const char *c)
{
if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
!g_ascii_strncasecmp(c, "&gt;", 4) ||
!g_ascii_strncasecmp(c, "&quot;", 6)) {
return TRUE;
}
return FALSE;
}
static const char *
process_link(GString *ret,
const char *start, const char *c,
int matchlen,
const char *urlprefix,
int inside_paren)
{
char *url_buf, *tmpurlbuf;
const char *t;
for (t = c;; t++) {
if (!badchar(*t) && !badentity(t))
continue;
if (t - c == matchlen)
break;
if (*t == ',' && *(t + 1) != ' ') {
continue;
}
if (t > start && *(t - 1) == '.')
t--;
if (t > start && *(t - 1) == ')' && inside_paren > 0)
t--;
url_buf = g_strndup(c, t - c);
tmpurlbuf = purple_unescape_html(url_buf);
g_string_append_printf(ret, "<a href=\"%s%s\">%s</a>",
urlprefix,
tmpurlbuf, url_buf);
g_free(tmpurlbuf);
g_free(url_buf);
return t;
}
return c;
}
char *
purple_markup_linkify(const char *text)
{
const char *c, *t, *q = NULL;
char *tmpurlbuf, *url_buf;
gunichar g;
gboolean inside_html = FALSE;
int inside_paren = 0;
GString *ret;
if (text == NULL)
return NULL;
ret = g_string_new("");
c = text;
while (*c) {
if(*c == '(' && !inside_html) {
inside_paren++;
ret = g_string_append_c(ret, *c);
c++;
}
if(inside_html) {
if(*c == '>') {
inside_html = FALSE;
} else if(!q && (*c == '\"' || *c == '\'')) {
q = c;
} else if(q) {
if(*c == *q)
q = NULL;
}
} else if(*c == '<') {
inside_html = TRUE;
if (!g_ascii_strncasecmp(c, "<a", 2)) {
while (1) {
if (!g_ascii_strncasecmp(c, "/a>", 3)) {
inside_html = FALSE;
break;
}
ret = g_string_append_c(ret, *c);
c++;
if (!(*c))
break;
}
}
} else if (!g_ascii_strncasecmp(c, "http://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "https://", 8)) {
c = process_link(ret, text, c, 8, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
c = process_link(ret, text, c, 6, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "file://", 7)) {
c = process_link(ret, text, c, 7, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 4, "http://", inside_paren);
} else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 4, "ftp://", inside_paren);
} else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
c = process_link(ret, text, c, 5, "", inside_paren);
} else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
t = c;
while (1) {
if (badchar(*t) || badentity(t)) {
char *d;
if (t - c == 7) {
break;
}
if (t > text && *(t - 1) == '.')
t--;
if ((d = strstr(c + 7, "?")) != NULL && d < t)
url_buf = g_strndup(c + 7, d - c - 7);
else
url_buf = g_strndup(c + 7, t - c - 7);
if (!purple_email_is_valid(url_buf)) {
g_free(url_buf);
break;
}
g_free(url_buf);
url_buf = g_strndup(c, t - c);
tmpurlbuf = purple_unescape_html(url_buf);
g_string_append_printf(ret, "<a href=\"%s\">%s</a>",
tmpurlbuf, url_buf);
g_free(url_buf);
g_free(tmpurlbuf);
c = t;
break;
}
t++;
}
} else if (c != text && (*c == '@')) {
int flag;
GString *gurl_buf = NULL;
const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
flag = 0;
else {
flag = 1;
gurl_buf = g_string_new("");
}
t = c;
while (flag) {
/* iterate backwards grabbing the local part of an email address */
g = g_utf8_get_char(t);
if (badchar(*t) || (g >= 127) || (*t == '(') ||
((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
!g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
(t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
/* local part will already be part of ret, strip it out */
ret = g_string_truncate(ret, ret->len - (c - t));
ret = g_string_append_unichar(ret, g);
break;
} else {
g_string_prepend_unichar(gurl_buf, g);
t = g_utf8_find_prev_char(text, t);
if (t < text) {
ret = g_string_assign(ret, "");
break;
}
}
}
t = g_utf8_find_next_char(c, NULL);
while (flag) {
/* iterate forwards grabbing the domain part of an email address */
g = g_utf8_get_char(t);
if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
char *d;
url_buf = g_string_free(gurl_buf, FALSE);
gurl_buf = NULL;
/* strip off trailing periods */
if (*url_buf) {
for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
*d = '\0';
}
tmpurlbuf = purple_unescape_html(url_buf);
if (purple_email_is_valid(tmpurlbuf)) {
g_string_append_printf(ret, "<a href=\"mailto:%s\">%s</a>",
tmpurlbuf, url_buf);
} else {
g_string_append(ret, url_buf);
}
g_free(url_buf);
g_free(tmpurlbuf);
c = t;
break;
} else {
g_string_append_unichar(gurl_buf, g);
t = g_utf8_find_next_char(t, NULL);
}
}
if (gurl_buf) {
g_string_free(gurl_buf, TRUE);
}
}
if(*c == ')' && !inside_html) {
inside_paren--;
ret = g_string_append_c(ret, *c);
c++;
}
if (*c == 0)
break;
ret = g_string_append_c(ret, *c);
c++;
}
return g_string_free(ret, FALSE);
}
char *purple_unescape_text(const char *in)
{
GString *ret;
const char *c = in;
if (in == NULL)
return NULL;
ret = g_string_new("");
while (*c) {
int len;
const char *ent;
if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
g_string_append(ret, ent);
c += len;
} else {
g_string_append_c(ret, *c);
c++;
}
}
return g_string_free(ret, FALSE);
}
char *purple_unescape_html(const char *html)
{
GString *ret;
const char *c = html;
if (html == NULL)
return NULL;
ret = g_string_new("");
while (*c) {
int len;
const char *ent;
if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
g_string_append(ret, ent);
c += len;
} else if (!strncmp(c, "<br>", 4)) {
g_string_append_c(ret, '\n');
c += 4;
} else {
g_string_append_c(ret, *c);
c++;
}
}
return g_string_free(ret, FALSE);
}
char *
purple_markup_slice(const char *str, guint x, guint y)
{
GString *ret;
GQueue *q;
guint z = 0;
gboolean appended = FALSE;
gunichar c;
char *tag;
g_return_val_if_fail(str != NULL, NULL);
g_return_val_if_fail(x <= y, NULL);
if (x == y)
return g_strdup("");
ret = g_string_new("");
q = g_queue_new();
while (*str && (z < y)) {
c = g_utf8_get_char(str);
if (c == '<') {
char *end = strchr(str, '>');
if (!end) {
g_string_free(ret, TRUE);
while ((tag = g_queue_pop_head(q)))
g_free(tag);
g_queue_free(q);
return NULL;
}
if (!g_ascii_strncasecmp(str, "<img ", 5)) {
z += strlen("[Image]");
} else if (!g_ascii_strncasecmp(str, "<br", 3)) {
z += 1;
} else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
z += strlen("\n---\n");
} else if (!g_ascii_strncasecmp(str, "</", 2)) {
/* pop stack */
char *tmp;
tmp = g_queue_pop_head(q);
g_free(tmp);
/* z += 0; */
} else {
/* push it unto the stack */
char *tmp;
tmp = g_strndup(str, end - str + 1);
g_queue_push_head(q, tmp);
/* z += 0; */
}
if (z >= x) {
g_string_append_len(ret, str, end - str + 1);
}
str = end;
} else if (c == '&') {
char *end = strchr(str, ';');
if (!end) {
g_string_free(ret, TRUE);
while ((tag = g_queue_pop_head(q)))
g_free(tag);
g_queue_free(q);
return NULL;
}
if (z >= x)
g_string_append_len(ret, str, end - str + 1);
z++;
str = end;
} else {
if (z == x && z > 0 && !appended) {
GList *l = q->tail;
while (l) {
tag = l->data;
g_string_append(ret, tag);
l = l->prev;
}
appended = TRUE;
}
if (z >= x)
g_string_append_unichar(ret, c);
z++;
}
str = g_utf8_next_char(str);
}
while ((tag = g_queue_pop_head(q))) {
char *name;
name = purple_markup_get_tag_name(tag);
g_string_append_printf(ret, "</%s>", name);
g_free(name);
g_free(tag);
}
g_queue_free(q);
return g_string_free(ret, FALSE);
}
char *
purple_markup_get_tag_name(const char *tag)
{
int i;
g_return_val_if_fail(tag != NULL, NULL);
g_return_val_if_fail(*tag == '<', NULL);
for (i = 1; tag[i]; i++)
if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
break;
return g_strndup(tag+1, i-1);
}