pidgin/pidgin
Clone
Summary
Browse
Changes
Graph
Make user interfaces setup the default history adapter.
2021-10-18, Gary Kramlich
755e1554051c
Make user interfaces setup the default history adapter.
This helps avoid some issues with the unit tests as well as gives us more
flexibility in the future.
Testing Done:
Ran the unit tests without issue. Ran Pidgin 3 with no existing config directory and verified that `history.db` was created properly.
Reviewed at https://reviews.imfreedom.org/r/1033/
/*
* Purple - Internet Messaging Library
* Copyright (C) Pidgin Developers <devel@pidgin.im>
*
* Purple is the legal property of its developers, whose names are too numerous
* to list here. Please refer to the COPYRIGHT file distributed with this
* source distribution.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
#include
"purplemarkup.h"
#include
"util.h"
/*
* This function is stolen from glib's gmarkup.c and modified to not
* replace ' with '
*/
static
void
append_escaped_text
(
GString
*
str
,
const
gchar
*
text
,
gssize
length
)
{
const
gchar
*
p
;
const
gchar
*
end
;
gunichar
c
;
p
=
text
;
end
=
text
+
length
;
while
(
p
!=
end
)
{
const
gchar
*
next
;
next
=
g_utf8_next_char
(
p
);
switch
(
*
p
)
{
case
'&'
:
g_string_append
(
str
,
"&"
);
break
;
case
'<'
:
g_string_append
(
str
,
"<"
);
break
;
case
'>'
:
g_string_append
(
str
,
">"
);
break
;
case
'"'
:
g_string_append
(
str
,
"""
);
break
;
default
:
c
=
g_utf8_get_char
(
p
);
if
((
0x1
<=
c
&&
c
<=
0x8
)
||
(
0xb
<=
c
&&
c
<=
0xc
)
||
(
0xe
<=
c
&&
c
<=
0x1f
)
||
(
0x7f
<=
c
&&
c
<=
0x84
)
||
(
0x86
<=
c
&&
c
<=
0x9f
))
g_string_append_printf
(
str
,
"&#x%x;"
,
c
);
else
g_string_append_len
(
str
,
p
,
next
-
p
);
break
;
}
p
=
next
;
}
}
/* This function is stolen from glib's gmarkup.c */
gchar
*
purple_markup_escape_text
(
const
gchar
*
text
,
gssize
length
)
{
GString
*
str
;
g_return_val_if_fail
(
text
!=
NULL
,
NULL
);
if
(
length
<
0
)
length
=
strlen
(
text
);
/* prealloc at least as long as original text */
str
=
g_string_sized_new
(
length
);
append_escaped_text
(
str
,
text
,
length
);
return
g_string_free
(
str
,
FALSE
);
}
const
char
*
purple_markup_unescape_entity
(
const
char
*
text
,
int
*
length
)
{
const
char
*
pln
;
int
len
;
if
(
!
text
||
*
text
!=
'&'
)
return
NULL
;
#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
if
(
IS_ENTITY
(
"&"
))
pln
=
"&"
;
else
if
(
IS_ENTITY
(
"<"
))
pln
=
"<"
;
else
if
(
IS_ENTITY
(
">"
))
pln
=
">"
;
else
if
(
IS_ENTITY
(
" "
))
pln
=
" "
;
else
if
(
IS_ENTITY
(
"©"
))
pln
=
"
\302\251
"
;
/* or use g_unichar_to_utf8(0xa9); */
else
if
(
IS_ENTITY
(
"""
))
pln
=
"
\"
"
;
else
if
(
IS_ENTITY
(
"®"
))
pln
=
"
\302\256
"
;
/* or use g_unichar_to_utf8(0xae); */
else
if
(
IS_ENTITY
(
"'"
))
pln
=
"
\'
"
;
else
if
(
text
[
1
]
==
'#'
&&
(
g_ascii_isxdigit
(
text
[
2
])
||
text
[
2
]
==
'x'
))
{
static
char
buf
[
7
];
const
char
*
start
=
text
+
2
;
char
*
end
;
guint64
pound
;
int
base
=
10
;
int
buflen
;
if
(
*
start
==
'x'
)
{
base
=
16
;
start
++
;
}
pound
=
g_ascii_strtoull
(
start
,
&
end
,
base
);
if
(
pound
==
0
||
pound
>
INT_MAX
||
*
end
!=
';'
)
{
return
NULL
;
}
len
=
(
end
-
text
)
+
1
;
buflen
=
g_unichar_to_utf8
((
gunichar
)
pound
,
buf
);
buf
[
buflen
]
=
'\0'
;
pln
=
buf
;
}
else
return
NULL
;
if
(
length
)
*
length
=
len
;
return
pln
;
}
char
*
purple_markup_get_css_property
(
const
gchar
*
style
,
const
gchar
*
opt
)
{
const
gchar
*
css_str
=
style
;
const
gchar
*
css_value_start
;
const
gchar
*
css_value_end
;
gchar
*
tmp
;
gchar
*
ret
;
g_return_val_if_fail
(
opt
!=
NULL
,
NULL
);
if
(
!
css_str
)
return
NULL
;
/* find the CSS property */
while
(
1
)
{
/* skip whitespace characters */
while
(
*
css_str
&&
g_ascii_isspace
(
*
css_str
))
css_str
++
;
if
(
!
g_ascii_isalpha
(
*
css_str
))
return
NULL
;
if
(
g_ascii_strncasecmp
(
css_str
,
opt
,
strlen
(
opt
)))
{
/* go to next css property positioned after the next ';' */
while
(
*
css_str
&&
*
css_str
!=
'"'
&&
*
css_str
!=
';'
)
css_str
++
;
if
(
*
css_str
!=
';'
)
return
NULL
;
css_str
++
;
}
else
break
;
}
/* find the CSS value position in the string */
css_str
+=
strlen
(
opt
);
while
(
*
css_str
&&
g_ascii_isspace
(
*
css_str
))
css_str
++
;
if
(
*
css_str
!=
':'
)
return
NULL
;
css_str
++
;
while
(
*
css_str
&&
g_ascii_isspace
(
*
css_str
))
css_str
++
;
if
(
*
css_str
==
'\0'
||
*
css_str
==
'"'
||
*
css_str
==
';'
)
return
NULL
;
/* mark the CSS value */
css_value_start
=
css_str
;
while
(
*
css_str
&&
*
css_str
!=
'"'
&&
*
css_str
!=
';'
)
css_str
++
;
css_value_end
=
css_str
-
1
;
/* Removes trailing whitespace */
while
(
css_value_end
>
css_value_start
&&
g_ascii_isspace
(
*
css_value_end
))
css_value_end
--
;
tmp
=
g_strndup
(
css_value_start
,
css_value_end
-
css_value_start
+
1
);
ret
=
purple_unescape_html
(
tmp
);
g_free
(
tmp
);
return
ret
;
}
gboolean
purple_markup_is_rtl
(
const
char
*
html
)
{
GData
*
attributes
;
const
gchar
*
start
,
*
end
;
gboolean
res
=
FALSE
;
if
(
purple_markup_find_tag
(
"span"
,
html
,
&
start
,
&
end
,
&
attributes
))
{
/* tmp is a member of attributes and is free with g_datalist_clear call */
const
char
*
tmp
=
g_datalist_get_data
(
&
attributes
,
"dir"
);
if
(
tmp
&&
!
g_ascii_strcasecmp
(
tmp
,
"RTL"
))
res
=
TRUE
;
if
(
!
res
)
{
tmp
=
g_datalist_get_data
(
&
attributes
,
"style"
);
if
(
tmp
)
{
char
*
tmp2
=
purple_markup_get_css_property
(
tmp
,
"direction"
);
if
(
tmp2
&&
!
g_ascii_strcasecmp
(
tmp2
,
"RTL"
))
res
=
TRUE
;
g_free
(
tmp2
);
}
}
g_datalist_clear
(
&
attributes
);
}
return
res
;
}
gboolean
purple_markup_find_tag
(
const
char
*
needle
,
const
char
*
haystack
,
const
char
**
start
,
const
char
**
end
,
GData
**
attributes
)
{
GData
*
attribs
;
const
char
*
cur
=
haystack
;
char
*
name
=
NULL
;
gboolean
found
=
FALSE
;
gboolean
in_tag
=
FALSE
;
gboolean
in_attr
=
FALSE
;
const
char
*
in_quotes
=
NULL
;
size_t
needlelen
;
g_return_val_if_fail
(
needle
!=
NULL
,
FALSE
);
g_return_val_if_fail
(
*
needle
!=
'\0'
,
FALSE
);
g_return_val_if_fail
(
haystack
!=
NULL
,
FALSE
);
g_return_val_if_fail
(
start
!=
NULL
,
FALSE
);
g_return_val_if_fail
(
end
!=
NULL
,
FALSE
);
g_return_val_if_fail
(
attributes
!=
NULL
,
FALSE
);
needlelen
=
strlen
(
needle
);
g_datalist_init
(
&
attribs
);
while
(
*
cur
&&
!
found
)
{
if
(
in_tag
)
{
if
(
in_quotes
)
{
const
char
*
close
=
cur
;
while
(
*
close
&&
*
close
!=
*
in_quotes
)
close
++
;
/* if we got the close quote, store the value and carry on from *
* after it. if we ran to the end of the string, point to the NULL *
* and we're outta here */
if
(
*
close
)
{
/* only store a value if we have an attribute name */
if
(
name
)
{
size_t
len
=
close
-
cur
;
char
*
val
=
g_strndup
(
cur
,
len
);
g_datalist_set_data_full
(
&
attribs
,
name
,
val
,
g_free
);
g_free
(
name
);
name
=
NULL
;
}
in_quotes
=
NULL
;
cur
=
close
+
1
;
}
else
{
cur
=
close
;
}
}
else
if
(
in_attr
)
{
const
char
*
close
=
cur
;
while
(
*
close
&&
*
close
!=
'>'
&&
*
close
!=
'"'
&&
*
close
!=
'\''
&&
*
close
!=
' '
&&
*
close
!=
'='
)
close
++
;
/* if we got the equals, store the name of the attribute. if we got
* the quote, save the attribute and go straight to quote mode.
* otherwise the tag closed or we reached the end of the string,
* so we can get outta here */
switch
(
*
close
)
{
case
'"'
:
case
'\''
:
in_quotes
=
close
;
/* fall through */
case
'='
:
{
size_t
len
=
close
-
cur
;
/* don't store a blank attribute name */
if
(
len
)
{
g_free
(
name
);
name
=
g_ascii_strdown
(
cur
,
len
);
}
in_attr
=
FALSE
;
cur
=
close
+
1
;
}
break
;
case
' '
:
case
'>'
:
in_attr
=
FALSE
;
/* fall through */
default
:
cur
=
close
;
break
;
}
}
else
{
switch
(
*
cur
)
{
case
' '
:
/* swallow extra spaces inside tag */
while
(
*
cur
&&
*
cur
==
' '
)
cur
++
;
in_attr
=
TRUE
;
break
;
case
'>'
:
found
=
TRUE
;
*
end
=
cur
;
break
;
case
'"'
:
case
'\''
:
in_quotes
=
cur
;
/* fall through */
default
:
cur
++
;
break
;
}
}
}
else
{
/* if we hit a < followed by the name of our tag... */
if
(
*
cur
==
'<'
&&
!
g_ascii_strncasecmp
(
cur
+
1
,
needle
,
needlelen
))
{
*
start
=
cur
;
cur
=
cur
+
needlelen
+
1
;
/* if we're pointing at a space or a >, we found the right tag. if *
* we're not, we've found a longer tag, so we need to skip to the *
* >, but not being distracted by >s inside quotes. */
if
(
*
cur
==
' '
||
*
cur
==
'>'
)
{
in_tag
=
TRUE
;
}
else
{
while
(
*
cur
&&
*
cur
!=
'"'
&&
*
cur
!=
'\''
&&
*
cur
!=
'>'
)
{
if
(
*
cur
==
'"'
)
{
cur
++
;
while
(
*
cur
&&
*
cur
!=
'"'
)
cur
++
;
}
else
if
(
*
cur
==
'\''
)
{
cur
++
;
while
(
*
cur
&&
*
cur
!=
'\''
)
cur
++
;
}
else
{
cur
++
;
}
}
}
}
else
{
cur
++
;
}
}
}
/* clean up any attribute name from a premature termination */
g_free
(
name
);
if
(
found
)
{
*
attributes
=
attribs
;
}
else
{
*
start
=
NULL
;
*
end
=
NULL
;
*
attributes
=
NULL
;
}
return
found
;
}
struct
purple_parse_tag
{
char
*
src_tag
;
char
*
dest_tag
;
gboolean
ignore
;
};
/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
recommended in the GCC docs). It contains 'continue's that should
affect the while-loop in purple_markup_html_to_xhtml and doing the
above would break that.
Also, remember to put braces in constructs that require them for
multiple statements when using this macro. */
#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
const char *o = c + strlen("<" x); \
const char *p = NULL, *q = NULL, *r = NULL; \
/* o = iterating over full tag \
* p = > (end of tag) \
* q = start of quoted bit \
* r = < inside tag \
*/
\
GString *innards = g_string_new(""); \
while(o && *o) { \
if(!q && (*o == '\"' || *o == '\'') ) { \
q = o; \
} else if(q) { \
if(*o == *q) {
/* end of quoted bit */
\
char *unescaped = g_strndup(q+1, o-q-1); \
char *escaped = g_markup_escape_text(unescaped, -1); \
g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
g_free(unescaped); \
g_free(escaped); \
q = NULL; \
} else if(*c == '\\') { \
o++; \
} \
} else if(*o == '<') { \
r = o; \
} else if(*o == '>') { \
p = o; \
break; \
} else { \
innards = g_string_append_c(innards, *o); \
} \
o++; \
} \
if(p && !r) {
/* got an end of tag and no other < earlier */
\
if(*(p-1) != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
} \
if(xhtml) { \
xhtml = g_string_append(xhtml, "<" y); \
xhtml = g_string_append(xhtml, innards->str); \
xhtml = g_string_append_c(xhtml, '>'); \
} \
c = p + 1; \
} else {
/* got end of tag with earlier < *or* didn't get anything */
\
if(xhtml) \
xhtml = g_string_append(xhtml, "<"); \
if(plain) \
plain = g_string_append_c(plain, '<'); \
c++; \
} \
g_string_free(innards, TRUE); \
continue; \
} \
if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
(*(c+strlen("<" x)) == '>' || \
!g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
if(xhtml) \
xhtml = g_string_append(xhtml, "<" y); \
c += strlen("<" x); \
if(*c != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
if(xhtml) \
xhtml = g_string_append_c(xhtml, '>'); \
} else { \
if(xhtml) \
xhtml = g_string_append(xhtml, "/>");\
} \
c = strchr(c, '>') + 1; \
continue; \
}
/* Don't forget to check the note above for ALLOW_TAG_ALT. */
#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
void
purple_markup_html_to_xhtml
(
const
char
*
html
,
char
**
xhtml_out
,
char
**
plain_out
)
{
GString
*
xhtml
=
NULL
;
GString
*
plain
=
NULL
;
GString
*
url
=
NULL
;
GString
*
cdata
=
NULL
;
GList
*
tags
=
NULL
,
*
tag
;
const
char
*
c
=
html
;
char
quote
=
'\0'
;
#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
quote = *(ptr++); \
else \
quote = '\0';
#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
g_return_if_fail
(
xhtml_out
!=
NULL
||
plain_out
!=
NULL
);
if
(
xhtml_out
)
xhtml
=
g_string_new
(
""
);
if
(
plain_out
)
plain
=
g_string_new
(
""
);
while
(
c
&&
*
c
)
{
if
(
*
c
==
'<'
)
{
if
(
*
(
c
+
1
)
==
'/'
)
{
/* closing tag */
tag
=
tags
;
while
(
tag
)
{
struct
purple_parse_tag
*
pt
=
tag
->
data
;
if
(
!
g_ascii_strncasecmp
((
c
+
2
),
pt
->
src_tag
,
strlen
(
pt
->
src_tag
))
&&
*
(
c
+
strlen
(
pt
->
src_tag
)
+
2
)
==
'>'
)
{
c
+=
strlen
(
pt
->
src_tag
)
+
3
;
break
;
}
tag
=
tag
->
next
;
}
if
(
tag
)
{
while
(
tags
)
{
struct
purple_parse_tag
*
pt
=
tags
->
data
;
if
(
xhtml
&&
!
pt
->
ignore
)
g_string_append_printf
(
xhtml
,
"</%s>"
,
pt
->
dest_tag
);
if
(
plain
&&
purple_strequal
(
pt
->
src_tag
,
"a"
))
{
/* if this is a link, we have to add the url to the plaintext, too */
if
(
cdata
&&
url
&&
(
!
g_string_equal
(
cdata
,
url
)
&&
(
g_ascii_strncasecmp
(
url
->
str
,
"mailto:"
,
7
)
!=
0
||
g_utf8_collate
(
url
->
str
+
7
,
cdata
->
str
)
!=
0
)))
g_string_append_printf
(
plain
,
" <%s>"
,
g_strstrip
(
purple_unescape_html
(
url
->
str
)));
if
(
cdata
)
{
g_string_free
(
cdata
,
TRUE
);
cdata
=
NULL
;
}
}
if
(
tags
==
tag
)
break
;
tags
=
g_list_delete_link
(
tags
,
tags
);
g_free
(
pt
);
}
g_free
(
tag
->
data
);
tags
=
g_list_delete_link
(
tags
,
tag
);
}
else
{
/* a closing tag we weren't expecting...
* we'll let it slide, if it's really a tag...if it's
* just a </ we'll escape it properly */
const
char
*
end
=
c
+
2
;
while
(
*
end
&&
g_ascii_isalpha
(
*
end
))
end
++
;
if
(
*
end
==
'>'
)
{
c
=
end
+
1
;
}
else
{
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<"
);
if
(
plain
)
plain
=
g_string_append_c
(
plain
,
'<'
);
c
++
;
}
}
}
else
{
/* opening tag */
ALLOW_TAG
(
"blockquote"
);
ALLOW_TAG
(
"cite"
);
ALLOW_TAG
(
"div"
);
ALLOW_TAG
(
"em"
);
ALLOW_TAG
(
"h1"
);
ALLOW_TAG
(
"h2"
);
ALLOW_TAG
(
"h3"
);
ALLOW_TAG
(
"h4"
);
ALLOW_TAG
(
"h5"
);
ALLOW_TAG
(
"h6"
);
/* we only allow html to start the message */
if
(
c
==
html
)
{
ALLOW_TAG
(
"html"
);
}
ALLOW_TAG_ALT
(
"i"
,
"em"
);
ALLOW_TAG_ALT
(
"italic"
,
"em"
);
ALLOW_TAG
(
"li"
);
ALLOW_TAG
(
"ol"
);
ALLOW_TAG
(
"p"
);
ALLOW_TAG
(
"pre"
);
ALLOW_TAG
(
"q"
);
ALLOW_TAG
(
"span"
);
ALLOW_TAG
(
"ul"
);
/* we skip <HR> because it's not legal in XHTML-IM. However,
* we still want to send something sensible, so we put a
* linebreak in its place. <BR> also needs special handling
* because putting a </BR> to close it would just be dumb. */
if
((
!
g_ascii_strncasecmp
(
c
,
"<br"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<hr"
,
3
))
&&
(
*
(
c
+
3
)
==
'>'
||
!
g_ascii_strncasecmp
(
c
+
3
,
"/>"
,
2
)
||
!
g_ascii_strncasecmp
(
c
+
3
,
" />"
,
3
)))
{
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<br/>"
);
if
(
plain
&&
*
c
!=
'\n'
)
plain
=
g_string_append_c
(
plain
,
'\n'
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<b>"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<bold>"
,
strlen
(
"<bold>"
))
||
!
g_ascii_strncasecmp
(
c
,
"<strong>"
,
strlen
(
"<strong>"
)))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
if
(
*
(
c
+
2
)
==
'>'
)
pt
->
src_tag
=
"b"
;
else
if
(
*
(
c
+
2
)
==
'o'
)
pt
->
src_tag
=
"bold"
;
else
pt
->
src_tag
=
"strong"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='font-weight: bold;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<u>"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<underline>"
,
strlen
(
"<underline>"
)))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
*
(
c
+
2
)
==
'>'
?
"u"
:
"underline"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='text-decoration: underline;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<s>"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<strike>"
,
strlen
(
"<strike>"
)))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
*
(
c
+
2
)
==
'>'
?
"s"
:
"strike"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='text-decoration: line-through;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<sub>"
,
5
))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"sub"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='vertical-align:sub;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<sup>"
,
5
))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"sup"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='vertical-align:super;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<img"
,
4
)
&&
(
*
(
c
+
4
)
==
'>'
||
*
(
c
+
4
)
==
' '
))
{
const
char
*
p
=
c
+
4
;
GString
*
src
=
NULL
,
*
alt
=
NULL
;
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&"); \
else if (*from == '\'') \
to = g_string_append(to, "'"); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"src="
,
4
))
{
const
char
*
q
=
p
+
4
;
if
(
src
)
g_string_free
(
src
,
TRUE
);
src
=
g_string_new
(
""
);
ESCAPE
(
q
,
src
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"alt="
,
4
))
{
const
char
*
q
=
p
+
4
;
if
(
alt
)
g_string_free
(
alt
,
TRUE
);
alt
=
g_string_new
(
""
);
ESCAPE
(
q
,
alt
);
p
=
q
;
}
else
{
p
++
;
}
}
#undef ESCAPE
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
/* src and alt are required! */
if
(
src
&&
xhtml
)
g_string_append_printf
(
xhtml
,
"<img src='%s' alt='%s' />"
,
g_strstrip
(
src
->
str
),
alt
?
alt
->
str
:
""
);
if
(
alt
)
{
if
(
plain
)
plain
=
g_string_append
(
plain
,
purple_unescape_html
(
alt
->
str
));
if
(
!
src
&&
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
alt
->
str
);
g_string_free
(
alt
,
TRUE
);
}
g_string_free
(
src
,
TRUE
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<a"
,
2
)
&&
(
*
(
c
+
2
)
==
'>'
||
*
(
c
+
2
)
==
' '
))
{
const
char
*
p
=
c
+
2
;
struct
purple_parse_tag
*
pt
;
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"href="
,
5
))
{
const
char
*
q
=
p
+
5
;
if
(
url
)
g_string_free
(
url
,
TRUE
);
url
=
g_string_new
(
""
);
if
(
cdata
)
g_string_free
(
cdata
,
TRUE
);
cdata
=
g_string_new
(
""
);
CHECK_QUOTE
(
q
);
while
(
VALID_CHAR
(
q
))
{
int
len
;
if
((
*
q
==
'&'
)
&&
(
purple_markup_unescape_entity
(
q
,
&
len
)
==
NULL
))
url
=
g_string_append
(
url
,
"&"
);
else
if
(
*
q
==
'"'
)
url
=
g_string_append
(
url
,
"""
);
else
url
=
g_string_append_c
(
url
,
*
q
);
q
++
;
}
p
=
q
;
}
else
{
p
++
;
}
}
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"a"
;
pt
->
dest_tag
=
"a"
;
tags
=
g_list_prepend
(
tags
,
pt
);
if
(
xhtml
)
g_string_append_printf
(
xhtml
,
"<a href=
\"
%s
\"
>"
,
url
?
g_strstrip
(
url
->
str
)
:
""
);
continue
;
}
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&"); \
else if (*from == '\'') \
to = g_string_append_c(to, '\"'); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<font"
,
5
)
&&
(
*
(
c
+
5
)
==
'>'
||
*
(
c
+
5
)
==
' '
))
{
const
char
*
p
=
c
+
5
;
GString
*
style
=
g_string_new
(
""
);
struct
purple_parse_tag
*
pt
;
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"back="
,
5
))
{
const
char
*
q
=
p
+
5
;
GString
*
color
=
g_string_new
(
""
);
ESCAPE
(
q
,
color
);
g_string_append_printf
(
style
,
"background: %s; "
,
color
->
str
);
g_string_free
(
color
,
TRUE
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"color="
,
6
))
{
const
char
*
q
=
p
+
6
;
GString
*
color
=
g_string_new
(
""
);
ESCAPE
(
q
,
color
);
g_string_append_printf
(
style
,
"color: %s; "
,
color
->
str
);
g_string_free
(
color
,
TRUE
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"face="
,
5
))
{
const
char
*
q
=
p
+
5
;
GString
*
face
=
g_string_new
(
""
);
ESCAPE
(
q
,
face
);
g_string_append_printf
(
style
,
"font-family: %s; "
,
g_strstrip
(
face
->
str
));
g_string_free
(
face
,
TRUE
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"size="
,
5
))
{
const
char
*
q
=
p
+
5
;
int
sz
;
const
char
*
size
=
"medium"
;
CHECK_QUOTE
(
q
);
sz
=
atoi
(
q
);
switch
(
sz
)
{
case
1
:
size
=
"xx-small"
;
break
;
case
2
:
size
=
"small"
;
break
;
case
3
:
size
=
"medium"
;
break
;
case
4
:
size
=
"large"
;
break
;
case
5
:
size
=
"x-large"
;
break
;
case
6
:
case
7
:
size
=
"xx-large"
;
break
;
default
:
break
;
}
g_string_append_printf
(
style
,
"font-size: %s; "
,
size
);
p
=
q
;
}
else
{
p
++
;
}
}
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"font"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
if
(
style
->
len
&&
xhtml
)
g_string_append_printf
(
xhtml
,
"<span style='%s'>"
,
g_strstrip
(
style
->
str
));
else
pt
->
ignore
=
TRUE
;
g_string_free
(
style
,
TRUE
);
continue
;
}
#undef ESCAPE
if
(
!
g_ascii_strncasecmp
(
c
,
"<body "
,
6
))
{
const
char
*
p
=
c
+
6
;
gboolean
did_something
=
FALSE
;
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"bgcolor="
,
8
))
{
const
char
*
q
=
p
+
8
;
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
GString
*
color
=
g_string_new
(
""
);
CHECK_QUOTE
(
q
);
while
(
VALID_CHAR
(
q
))
{
color
=
g_string_append_c
(
color
,
*
q
);
q
++
;
}
if
(
xhtml
)
g_string_append_printf
(
xhtml
,
"<span style='background: %s;'>"
,
g_strstrip
(
color
->
str
));
g_string_free
(
color
,
TRUE
);
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
pt
->
src_tag
=
"body"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
did_something
=
TRUE
;
break
;
}
p
++
;
}
if
(
did_something
)
continue
;
}
/* this has to come after the special case for bgcolor */
ALLOW_TAG
(
"body"
);
if
(
!
g_ascii_strncasecmp
(
c
,
"<!--"
,
strlen
(
"<!--"
)))
{
char
*
p
=
strstr
(
c
+
strlen
(
"<!--"
),
"-->"
);
if
(
p
)
{
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<!--"
);
c
+=
strlen
(
"<!--"
);
continue
;
}
}
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<"
);
if
(
plain
)
plain
=
g_string_append_c
(
plain
,
'<'
);
c
++
;
}
}
else
if
(
*
c
==
'&'
)
{
char
buf
[
7
];
const
char
*
pln
;
int
len
;
if
((
pln
=
purple_markup_unescape_entity
(
c
,
&
len
))
==
NULL
)
{
len
=
1
;
g_snprintf
(
buf
,
sizeof
(
buf
),
"%c"
,
*
c
);
pln
=
buf
;
}
if
(
xhtml
)
xhtml
=
g_string_append_len
(
xhtml
,
c
,
len
);
if
(
plain
)
plain
=
g_string_append
(
plain
,
pln
);
if
(
cdata
)
cdata
=
g_string_append_len
(
cdata
,
c
,
len
);
c
+=
len
;
}
else
{
if
(
xhtml
)
xhtml
=
g_string_append_c
(
xhtml
,
*
c
);
if
(
plain
)
plain
=
g_string_append_c
(
plain
,
*
c
);
if
(
cdata
)
cdata
=
g_string_append_c
(
cdata
,
*
c
);
c
++
;
}
}
if
(
xhtml
)
{
for
(
tag
=
tags
;
tag
;
tag
=
tag
->
next
)
{
struct
purple_parse_tag
*
pt
=
tag
->
data
;
if
(
!
pt
->
ignore
)
g_string_append_printf
(
xhtml
,
"</%s>"
,
pt
->
dest_tag
);
}
}
g_list_free
(
tags
);
if
(
xhtml_out
)
*
xhtml_out
=
g_string_free
(
xhtml
,
FALSE
);
if
(
plain_out
)
*
plain_out
=
g_string_free
(
plain
,
FALSE
);
if
(
url
)
g_string_free
(
url
,
TRUE
);
if
(
cdata
)
g_string_free
(
cdata
,
TRUE
);
#undef CHECK_QUOTE
#undef VALID_CHAR
}
/* The following are probably reasonable changes:
* - \n should be converted to a normal space
* - in addition to <br>, <p> and <div> etc. should also be converted into \n
* - We want to turn </td>#whitespace<td> sequences into a single tab
* - We want to turn </tr>#whitespace<tr> sequences into a single \n
* - <script>...</script> and <style>...</style> should be completely removed
*/
char
*
purple_markup_strip_html
(
const
char
*
str
)
{
int
i
,
j
,
k
,
entlen
;
gboolean
visible
=
TRUE
;
gboolean
closing_td_p
=
FALSE
;
gchar
*
str2
;
const
gchar
*
cdata_close_tag
=
NULL
,
*
ent
;
gchar
*
href
=
NULL
;
int
href_st
=
0
;
if
(
!
str
)
return
NULL
;
str2
=
g_strdup
(
str
);
for
(
i
=
0
,
j
=
0
;
str2
[
i
];
i
++
)
{
if
(
str2
[
i
]
==
'<'
)
{
if
(
cdata_close_tag
)
{
/* Note: Don't even assume any other tag is a tag in CDATA */
if
(
g_ascii_strncasecmp
(
str2
+
i
,
cdata_close_tag
,
strlen
(
cdata_close_tag
))
==
0
)
{
i
+=
strlen
(
cdata_close_tag
)
-
1
;
cdata_close_tag
=
NULL
;
}
continue
;
}
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<td"
,
3
)
==
0
&&
closing_td_p
)
{
str2
[
j
++
]
=
'\t'
;
visible
=
TRUE
;
}
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"</td>"
,
5
)
==
0
)
{
closing_td_p
=
TRUE
;
visible
=
FALSE
;
}
else
{
closing_td_p
=
FALSE
;
visible
=
TRUE
;
}
k
=
i
+
1
;
if
(
g_ascii_isspace
(
str2
[
k
]))
visible
=
TRUE
;
else
if
(
str2
[
k
])
{
/* Scan until we end the tag either implicitly (closed start
* tag) or explicitly, using a sloppy method (i.e., < or >
* inside quoted attributes will screw us up)
*/
while
(
str2
[
k
]
&&
str2
[
k
]
!=
'<'
&&
str2
[
k
]
!=
'>'
)
{
k
++
;
}
/* If we've got an <a> tag with an href, save the address
* to print later. */
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<a"
,
2
)
==
0
&&
g_ascii_isspace
(
str2
[
i
+
2
]))
{
int
st
;
/* start of href, inclusive [ */
int
end
;
/* end of href, exclusive ) */
char
delim
=
' '
;
/* Find start of href */
for
(
st
=
i
+
3
;
st
<
k
;
st
++
)
{
if
(
g_ascii_strncasecmp
(
str2
+
st
,
"href="
,
5
)
==
0
)
{
st
+=
5
;
if
(
str2
[
st
]
==
'"'
||
str2
[
st
]
==
'\''
)
{
delim
=
str2
[
st
];
st
++
;
}
break
;
}
}
/* find end of address */
for
(
end
=
st
;
end
<
k
&&
str2
[
end
]
!=
delim
;
end
++
)
{
/* All the work is done in the loop construct above. */
}
/* If there's an address, save it. If there was
* already one saved, kill it. */
if
(
st
<
k
)
{
char
*
tmp
;
g_free
(
href
);
tmp
=
g_strndup
(
str2
+
st
,
end
-
st
);
href
=
purple_unescape_html
(
tmp
);
g_free
(
tmp
);
href_st
=
j
;
}
}
/* Replace </a> with an ascii representation of the
* address the link was pointing to. */
else
if
(
href
!=
NULL
&&
g_ascii_strncasecmp
(
str2
+
i
,
"</a>"
,
4
)
==
0
)
{
size_t
hrlen
=
strlen
(
href
);
/* Only insert the href if it's different from the CDATA. */
if
((
hrlen
!=
(
gsize
)(
j
-
href_st
)
||
strncmp
(
str2
+
href_st
,
href
,
hrlen
))
&&
(
hrlen
!=
(
gsize
)(
j
-
href_st
+
7
)
||
/* 7 == strlen("http://") */
strncmp
(
str2
+
href_st
,
href
+
7
,
hrlen
-
7
)))
{
str2
[
j
++
]
=
' '
;
str2
[
j
++
]
=
'('
;
memmove
(
str2
+
j
,
href
,
hrlen
);
j
+=
hrlen
;
str2
[
j
++
]
=
')'
;
g_free
(
href
);
href
=
NULL
;
}
}
/* Check for tags which should be mapped to newline (but ignore some of
* the tags at the beginning of the text) */
else
if
((
j
&&
(
g_ascii_strncasecmp
(
str2
+
i
,
"<p>"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<tr"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<hr"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<li"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<div"
,
4
)
==
0
))
||
g_ascii_strncasecmp
(
str2
+
i
,
"<br"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"</table>"
,
8
)
==
0
)
{
str2
[
j
++
]
=
'\n'
;
}
/* Check for tags which begin CDATA and need to be closed */
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<script"
,
7
)
==
0
)
{
cdata_close_tag
=
"</script>"
;
}
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<style"
,
6
)
==
0
)
{
cdata_close_tag
=
"</style>"
;
}
/* Update the index and continue checking after the tag */
i
=
(
str2
[
k
]
==
'<'
||
str2
[
k
]
==
'\0'
)
?
k
-
1
:
k
;
continue
;
}
}
else
if
(
cdata_close_tag
)
{
continue
;
}
else
if
(
!
g_ascii_isspace
(
str2
[
i
]))
{
visible
=
TRUE
;
}
if
(
str2
[
i
]
==
'&'
&&
(
ent
=
purple_markup_unescape_entity
(
str2
+
i
,
&
entlen
))
!=
NULL
)
{
while
(
*
ent
)
str2
[
j
++
]
=
*
ent
++
;
i
+=
entlen
-
1
;
continue
;
}
if
(
visible
)
str2
[
j
++
]
=
g_ascii_isspace
(
str2
[
i
])
?
' '
:
str2
[
i
];
}
g_free
(
href
);
str2
[
j
]
=
'\0'
;
return
str2
;
}
static
gboolean
badchar
(
char
c
)
{
switch
(
c
)
{
case
' '
:
case
','
:
case
'\0'
:
case
'\n'
:
case
'\r'
:
case
'<'
:
case
'>'
:
case
'"'
:
return
TRUE
;
default
:
return
FALSE
;
}
}
static
gboolean
badentity
(
const
char
*
c
)
{
if
(
!
g_ascii_strncasecmp
(
c
,
"<"
,
4
)
||
!
g_ascii_strncasecmp
(
c
,
">"
,
4
)
||
!
g_ascii_strncasecmp
(
c
,
"""
,
6
))
{
return
TRUE
;
}
return
FALSE
;
}
static
const
char
*
process_link
(
GString
*
ret
,
const
char
*
start
,
const
char
*
c
,
int
matchlen
,
const
char
*
urlprefix
,
int
inside_paren
)
{
char
*
url_buf
,
*
tmpurlbuf
;
const
char
*
t
;
for
(
t
=
c
;;
t
++
)
{
if
(
!
badchar
(
*
t
)
&&
!
badentity
(
t
))
continue
;
if
(
t
-
c
==
matchlen
)
break
;
if
(
*
t
==
','
&&
*
(
t
+
1
)
!=
' '
)
{
continue
;
}
if
(
t
>
start
&&
*
(
t
-
1
)
==
'.'
)
t
--
;
if
(
t
>
start
&&
*
(
t
-
1
)
==
')'
&&
inside_paren
>
0
)
t
--
;
url_buf
=
g_strndup
(
c
,
t
-
c
);
tmpurlbuf
=
purple_unescape_html
(
url_buf
);
g_string_append_printf
(
ret
,
"<A HREF=
\"
%s%s
\"
>%s</A>"
,
urlprefix
,
tmpurlbuf
,
url_buf
);
g_free
(
tmpurlbuf
);
g_free
(
url_buf
);
return
t
;
}
return
c
;
}
char
*
purple_markup_linkify
(
const
char
*
text
)
{
const
char
*
c
,
*
t
,
*
q
=
NULL
;
char
*
tmpurlbuf
,
*
url_buf
;
gunichar
g
;
gboolean
inside_html
=
FALSE
;
int
inside_paren
=
0
;
GString
*
ret
;
if
(
text
==
NULL
)
return
NULL
;
ret
=
g_string_new
(
""
);
c
=
text
;
while
(
*
c
)
{
if
(
*
c
==
'('
&&
!
inside_html
)
{
inside_paren
++
;
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
if
(
inside_html
)
{
if
(
*
c
==
'>'
)
{
inside_html
=
FALSE
;
}
else
if
(
!
q
&&
(
*
c
==
'\"'
||
*
c
==
'\''
))
{
q
=
c
;
}
else
if
(
q
)
{
if
(
*
c
==
*
q
)
q
=
NULL
;
}
}
else
if
(
*
c
==
'<'
)
{
inside_html
=
TRUE
;
if
(
!
g_ascii_strncasecmp
(
c
,
"<A"
,
2
))
{
while
(
1
)
{
if
(
!
g_ascii_strncasecmp
(
c
,
"/A>"
,
3
))
{
inside_html
=
FALSE
;
break
;
}
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
if
(
!
(
*
c
))
break
;
}
}
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"http://"
,
7
))
{
c
=
process_link
(
ret
,
text
,
c
,
7
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"https://"
,
8
))
{
c
=
process_link
(
ret
,
text
,
c
,
8
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"ftp://"
,
6
))
{
c
=
process_link
(
ret
,
text
,
c
,
6
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"sftp://"
,
7
))
{
c
=
process_link
(
ret
,
text
,
c
,
7
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"file://"
,
7
))
{
c
=
process_link
(
ret
,
text
,
c
,
7
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"www."
,
4
)
&&
c
[
4
]
!=
'.'
&&
(
c
==
text
||
badchar
(
c
[
-1
])
||
badentity
(
c
-1
)))
{
c
=
process_link
(
ret
,
text
,
c
,
4
,
"http://"
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"ftp."
,
4
)
&&
c
[
4
]
!=
'.'
&&
(
c
==
text
||
badchar
(
c
[
-1
])
||
badentity
(
c
-1
)))
{
c
=
process_link
(
ret
,
text
,
c
,
4
,
"ftp://"
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"xmpp:"
,
5
)
&&
(
c
==
text
||
badchar
(
c
[
-1
])
||
badentity
(
c
-1
)))
{
c
=
process_link
(
ret
,
text
,
c
,
5
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"mailto:"
,
7
))
{
t
=
c
;
while
(
1
)
{
if
(
badchar
(
*
t
)
||
badentity
(
t
))
{
char
*
d
;
if
(
t
-
c
==
7
)
{
break
;
}
if
(
t
>
text
&&
*
(
t
-
1
)
==
'.'
)
t
--
;
if
((
d
=
strstr
(
c
+
7
,
"?"
))
!=
NULL
&&
d
<
t
)
url_buf
=
g_strndup
(
c
+
7
,
d
-
c
-
7
);
else
url_buf
=
g_strndup
(
c
+
7
,
t
-
c
-
7
);
if
(
!
purple_email_is_valid
(
url_buf
))
{
g_free
(
url_buf
);
break
;
}
g_free
(
url_buf
);
url_buf
=
g_strndup
(
c
,
t
-
c
);
tmpurlbuf
=
purple_unescape_html
(
url_buf
);
g_string_append_printf
(
ret
,
"<A HREF=
\"
%s
\"
>%s</A>"
,
tmpurlbuf
,
url_buf
);
g_free
(
url_buf
);
g_free
(
tmpurlbuf
);
c
=
t
;
break
;
}
t
++
;
}
}
else
if
(
c
!=
text
&&
(
*
c
==
'@'
))
{
int
flag
;
GString
*
gurl_buf
=
NULL
;
const
char
illegal_chars
[]
=
"!@#$%^&*()[]{}/|
\\
<>
\"
:;
\r\n
\0
"
;
if
(
strchr
(
illegal_chars
,
*
(
c
-
1
))
||
strchr
(
illegal_chars
,
*
(
c
+
1
)))
flag
=
0
;
else
{
flag
=
1
;
gurl_buf
=
g_string_new
(
""
);
}
t
=
c
;
while
(
flag
)
{
/* iterate backwards grabbing the local part of an email address */
g
=
g_utf8_get_char
(
t
);
if
(
badchar
(
*
t
)
||
(
g
>=
127
)
||
(
*
t
==
'('
)
||
((
*
t
==
';'
)
&&
((
t
>
(
text
+
2
)
&&
(
!
g_ascii_strncasecmp
(
t
-
3
,
"<"
,
4
)
||
!
g_ascii_strncasecmp
(
t
-
3
,
">"
,
4
)))
||
(
t
>
(
text
+
4
)
&&
(
!
g_ascii_strncasecmp
(
t
-
5
,
"""
,
6
))))))
{
/* local part will already be part of ret, strip it out */
ret
=
g_string_truncate
(
ret
,
ret
->
len
-
(
c
-
t
));
ret
=
g_string_append_unichar
(
ret
,
g
);
break
;
}
else
{
g_string_prepend_unichar
(
gurl_buf
,
g
);
t
=
g_utf8_find_prev_char
(
text
,
t
);
if
(
t
<
text
)
{
ret
=
g_string_assign
(
ret
,
""
);
break
;
}
}
}
t
=
g_utf8_find_next_char
(
c
,
NULL
);
while
(
flag
)
{
/* iterate forwards grabbing the domain part of an email address */
g
=
g_utf8_get_char
(
t
);
if
(
badchar
(
*
t
)
||
(
g
>=
127
)
||
(
*
t
==
')'
)
||
badentity
(
t
))
{
char
*
d
;
url_buf
=
g_string_free
(
gurl_buf
,
FALSE
);
gurl_buf
=
NULL
;
/* strip off trailing periods */
if
(
*
url_buf
)
{
for
(
d
=
url_buf
+
strlen
(
url_buf
)
-
1
;
*
d
==
'.'
;
d
--
,
t
--
)
*
d
=
'\0'
;
}
tmpurlbuf
=
purple_unescape_html
(
url_buf
);
if
(
purple_email_is_valid
(
tmpurlbuf
))
{
g_string_append_printf
(
ret
,
"<A HREF=
\"
mailto:%s
\"
>%s</A>"
,
tmpurlbuf
,
url_buf
);
}
else
{
g_string_append
(
ret
,
url_buf
);
}
g_free
(
url_buf
);
g_free
(
tmpurlbuf
);
c
=
t
;
break
;
}
else
{
g_string_append_unichar
(
gurl_buf
,
g
);
t
=
g_utf8_find_next_char
(
t
,
NULL
);
}
}
if
(
gurl_buf
)
{
g_string_free
(
gurl_buf
,
TRUE
);
}
}
if
(
*
c
==
')'
&&
!
inside_html
)
{
inside_paren
--
;
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
if
(
*
c
==
0
)
break
;
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_unescape_text
(
const
char
*
in
)
{
GString
*
ret
;
const
char
*
c
=
in
;
if
(
in
==
NULL
)
return
NULL
;
ret
=
g_string_new
(
""
);
while
(
*
c
)
{
int
len
;
const
char
*
ent
;
if
((
ent
=
purple_markup_unescape_entity
(
c
,
&
len
))
!=
NULL
)
{
g_string_append
(
ret
,
ent
);
c
+=
len
;
}
else
{
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
}
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_unescape_html
(
const
char
*
html
)
{
GString
*
ret
;
const
char
*
c
=
html
;
if
(
html
==
NULL
)
return
NULL
;
ret
=
g_string_new
(
""
);
while
(
*
c
)
{
int
len
;
const
char
*
ent
;
if
((
ent
=
purple_markup_unescape_entity
(
c
,
&
len
))
!=
NULL
)
{
g_string_append
(
ret
,
ent
);
c
+=
len
;
}
else
if
(
!
strncmp
(
c
,
"<br>"
,
4
))
{
g_string_append_c
(
ret
,
'\n'
);
c
+=
4
;
}
else
{
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
}
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_markup_slice
(
const
char
*
str
,
guint
x
,
guint
y
)
{
GString
*
ret
;
GQueue
*
q
;
guint
z
=
0
;
gboolean
appended
=
FALSE
;
gunichar
c
;
char
*
tag
;
g_return_val_if_fail
(
str
!=
NULL
,
NULL
);
g_return_val_if_fail
(
x
<=
y
,
NULL
);
if
(
x
==
y
)
return
g_strdup
(
""
);
ret
=
g_string_new
(
""
);
q
=
g_queue_new
();
while
(
*
str
&&
(
z
<
y
))
{
c
=
g_utf8_get_char
(
str
);
if
(
c
==
'<'
)
{
char
*
end
=
strchr
(
str
,
'>'
);
if
(
!
end
)
{
g_string_free
(
ret
,
TRUE
);
while
((
tag
=
g_queue_pop_head
(
q
)))
g_free
(
tag
);
g_queue_free
(
q
);
return
NULL
;
}
if
(
!
g_ascii_strncasecmp
(
str
,
"<img "
,
5
))
{
z
+=
strlen
(
"[Image]"
);
}
else
if
(
!
g_ascii_strncasecmp
(
str
,
"<br"
,
3
))
{
z
+=
1
;
}
else
if
(
!
g_ascii_strncasecmp
(
str
,
"<hr>"
,
4
))
{
z
+=
strlen
(
"
\n
---
\n
"
);
}
else
if
(
!
g_ascii_strncasecmp
(
str
,
"</"
,
2
))
{
/* pop stack */
char
*
tmp
;
tmp
=
g_queue_pop_head
(
q
);
g_free
(
tmp
);
/* z += 0; */
}
else
{
/* push it unto the stack */
char
*
tmp
;
tmp
=
g_strndup
(
str
,
end
-
str
+
1
);
g_queue_push_head
(
q
,
tmp
);
/* z += 0; */
}
if
(
z
>=
x
)
{
g_string_append_len
(
ret
,
str
,
end
-
str
+
1
);
}
str
=
end
;
}
else
if
(
c
==
'&'
)
{
char
*
end
=
strchr
(
str
,
';'
);
if
(
!
end
)
{
g_string_free
(
ret
,
TRUE
);
while
((
tag
=
g_queue_pop_head
(
q
)))
g_free
(
tag
);
g_queue_free
(
q
);
return
NULL
;
}
if
(
z
>=
x
)
g_string_append_len
(
ret
,
str
,
end
-
str
+
1
);
z
++
;
str
=
end
;
}
else
{
if
(
z
==
x
&&
z
>
0
&&
!
appended
)
{
GList
*
l
=
q
->
tail
;
while
(
l
)
{
tag
=
l
->
data
;
g_string_append
(
ret
,
tag
);
l
=
l
->
prev
;
}
appended
=
TRUE
;
}
if
(
z
>=
x
)
g_string_append_unichar
(
ret
,
c
);
z
++
;
}
str
=
g_utf8_next_char
(
str
);
}
while
((
tag
=
g_queue_pop_head
(
q
)))
{
char
*
name
;
name
=
purple_markup_get_tag_name
(
tag
);
g_string_append_printf
(
ret
,
"</%s>"
,
name
);
g_free
(
name
);
g_free
(
tag
);
}
g_queue_free
(
q
);
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_markup_get_tag_name
(
const
char
*
tag
)
{
int
i
;
g_return_val_if_fail
(
tag
!=
NULL
,
NULL
);
g_return_val_if_fail
(
*
tag
==
'<'
,
NULL
);
for
(
i
=
1
;
tag
[
i
];
i
++
)
if
(
tag
[
i
]
==
'>'
||
tag
[
i
]
==
' '
||
tag
[
i
]
==
'/'
)
break
;
return
g_strndup
(
tag
+
1
,
i
-1
);
}