pidgin/pidgin
Clone
Summary
Browse
Changes
Graph
Add purple_saved_presence_matches to enable searching in UIs
4 months ago, Gary Kramlich
8627951788d1
Add purple_saved_presence_matches to enable searching in UIs
Testing Done:
Consulted with Turtles and ran the unit tests under valgrind.
Reviewed at https://reviews.imfreedom.org/r/2942/
/*
* Purple - Internet Messaging Library
* Copyright (C) Pidgin Developers <devel@pidgin.im>
*
* Purple is the legal property of its developers, whose names are too numerous
* to list here. Please refer to the COPYRIGHT file distributed with this
* source distribution.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
#include
"purplemarkup.h"
#include
"util.h"
const
char
*
purple_markup_unescape_entity
(
const
char
*
text
,
int
*
length
)
{
const
char
*
pln
;
int
len
;
if
(
!
text
||
*
text
!=
'&'
)
return
NULL
;
#define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
if
(
IS_ENTITY
(
"&"
))
pln
=
"&"
;
else
if
(
IS_ENTITY
(
"<"
))
pln
=
"<"
;
else
if
(
IS_ENTITY
(
">"
))
pln
=
">"
;
else
if
(
IS_ENTITY
(
" "
))
pln
=
" "
;
else
if
(
IS_ENTITY
(
"©"
))
pln
=
"
\302\251
"
;
/* or use g_unichar_to_utf8(0xa9); */
else
if
(
IS_ENTITY
(
"""
))
pln
=
"
\"
"
;
else
if
(
IS_ENTITY
(
"®"
))
pln
=
"
\302\256
"
;
/* or use g_unichar_to_utf8(0xae); */
else
if
(
IS_ENTITY
(
"'"
))
pln
=
"
\'
"
;
else
if
(
text
[
1
]
==
'#'
&&
(
g_ascii_isxdigit
(
text
[
2
])
||
text
[
2
]
==
'x'
))
{
static
char
buf
[
7
];
const
char
*
start
=
text
+
2
;
char
*
end
;
guint64
pound
;
int
base
=
10
;
int
buflen
;
if
(
*
start
==
'x'
)
{
base
=
16
;
start
++
;
}
pound
=
g_ascii_strtoull
(
start
,
&
end
,
base
);
if
(
pound
==
0
||
pound
>
INT_MAX
||
*
end
!=
';'
)
{
return
NULL
;
}
len
=
(
end
-
text
)
+
1
;
buflen
=
g_unichar_to_utf8
((
gunichar
)
pound
,
buf
);
buf
[
buflen
]
=
'\0'
;
pln
=
buf
;
}
else
return
NULL
;
if
(
length
)
*
length
=
len
;
return
pln
;
}
struct
purple_parse_tag
{
char
*
src_tag
;
char
*
dest_tag
;
gboolean
ignore
;
};
/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
recommended in the GCC docs). It contains 'continue's that should
affect the while-loop in purple_markup_html_to_xhtml and doing the
above would break that.
Also, remember to put braces in constructs that require them for
multiple statements when using this macro. */
#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
const char *o = c + strlen("<" x); \
const char *p = NULL, *q = NULL, *r = NULL; \
/* o = iterating over full tag \
* p = > (end of tag) \
* q = start of quoted bit \
* r = < inside tag \
*/
\
GString *innards = g_string_new(""); \
while(o && *o) { \
if(!q && (*o == '\"' || *o == '\'') ) { \
q = o; \
} else if(q) { \
if(*o == *q) {
/* end of quoted bit */
\
char *unescaped = g_strndup(q+1, o-q-1); \
char *escaped = g_markup_escape_text(unescaped, -1); \
g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
g_free(unescaped); \
g_free(escaped); \
q = NULL; \
} else if(*c == '\\') { \
o++; \
} \
} else if(*o == '<') { \
r = o; \
} else if(*o == '>') { \
p = o; \
break; \
} else { \
innards = g_string_append_c(innards, *o); \
} \
o++; \
} \
if(p && !r) {
/* got an end of tag and no other < earlier */
\
if(*(p-1) != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
} \
if(xhtml) { \
xhtml = g_string_append(xhtml, "<" y); \
xhtml = g_string_append(xhtml, innards->str); \
xhtml = g_string_append_c(xhtml, '>'); \
} \
c = p + 1; \
} else {
/* got end of tag with earlier < *or* didn't get anything */
\
if(xhtml) \
xhtml = g_string_append(xhtml, "<"); \
if(plain) \
plain = g_string_append_c(plain, '<'); \
c++; \
} \
g_string_free(innards, TRUE); \
continue; \
} \
if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
(*(c+strlen("<" x)) == '>' || \
!g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
if(xhtml) \
xhtml = g_string_append(xhtml, "<" y); \
c += strlen("<" x); \
if(*c != '/') { \
struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
pt->src_tag = x; \
pt->dest_tag = y; \
tags = g_list_prepend(tags, pt); \
if(xhtml) \
xhtml = g_string_append_c(xhtml, '>'); \
} else { \
if(xhtml) \
xhtml = g_string_append(xhtml, "/>");\
} \
c = strchr(c, '>') + 1; \
continue; \
}
/* Don't forget to check the note above for ALLOW_TAG_ALT. */
#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
void
purple_markup_html_to_xhtml
(
const
char
*
html
,
char
**
xhtml_out
,
char
**
plain_out
)
{
GString
*
xhtml
=
NULL
;
GString
*
plain
=
NULL
;
GString
*
url
=
NULL
;
GString
*
cdata
=
NULL
;
GList
*
tags
=
NULL
,
*
tag
;
const
char
*
c
=
html
;
char
quote
=
'\0'
;
#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
quote = *(ptr++); \
else \
quote = '\0';
#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
g_return_if_fail
(
xhtml_out
!=
NULL
||
plain_out
!=
NULL
);
if
(
xhtml_out
)
xhtml
=
g_string_new
(
""
);
if
(
plain_out
)
plain
=
g_string_new
(
""
);
while
(
c
&&
*
c
)
{
if
(
*
c
==
'<'
)
{
if
(
*
(
c
+
1
)
==
'/'
)
{
/* closing tag */
tag
=
tags
;
while
(
tag
)
{
struct
purple_parse_tag
*
pt
=
tag
->
data
;
if
(
!
g_ascii_strncasecmp
((
c
+
2
),
pt
->
src_tag
,
strlen
(
pt
->
src_tag
))
&&
*
(
c
+
strlen
(
pt
->
src_tag
)
+
2
)
==
'>'
)
{
c
+=
strlen
(
pt
->
src_tag
)
+
3
;
break
;
}
tag
=
tag
->
next
;
}
if
(
tag
)
{
while
(
tags
)
{
struct
purple_parse_tag
*
pt
=
tags
->
data
;
if
(
xhtml
&&
!
pt
->
ignore
)
g_string_append_printf
(
xhtml
,
"</%s>"
,
pt
->
dest_tag
);
if
(
plain
&&
purple_strequal
(
pt
->
src_tag
,
"a"
))
{
/* if this is a link, we have to add the url to the plaintext, too */
if
(
cdata
&&
url
&&
(
!
g_string_equal
(
cdata
,
url
)
&&
(
g_ascii_strncasecmp
(
url
->
str
,
"mailto:"
,
7
)
!=
0
||
g_utf8_collate
(
url
->
str
+
7
,
cdata
->
str
)
!=
0
)))
{
char
*
unescaped
=
purple_unescape_html
(
url
->
str
);
g_string_append_printf
(
plain
,
" <%s>"
,
g_strstrip
(
unescaped
));
g_free
(
unescaped
);
}
if
(
cdata
)
{
g_string_free
(
cdata
,
TRUE
);
cdata
=
NULL
;
}
}
if
(
tags
==
tag
)
break
;
tags
=
g_list_delete_link
(
tags
,
tags
);
g_free
(
pt
);
}
g_free
(
tag
->
data
);
tags
=
g_list_delete_link
(
tags
,
tag
);
}
else
{
/* a closing tag we weren't expecting...
* we'll let it slide, if it's really a tag...if it's
* just a </ we'll escape it properly */
const
char
*
end
=
c
+
2
;
while
(
*
end
&&
g_ascii_isalpha
(
*
end
))
end
++
;
if
(
*
end
==
'>'
)
{
c
=
end
+
1
;
}
else
{
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<"
);
if
(
plain
)
plain
=
g_string_append_c
(
plain
,
'<'
);
c
++
;
}
}
}
else
{
/* opening tag */
ALLOW_TAG
(
"blockquote"
);
ALLOW_TAG
(
"cite"
);
ALLOW_TAG
(
"div"
);
ALLOW_TAG
(
"em"
);
ALLOW_TAG
(
"h1"
);
ALLOW_TAG
(
"h2"
);
ALLOW_TAG
(
"h3"
);
ALLOW_TAG
(
"h4"
);
ALLOW_TAG
(
"h5"
);
ALLOW_TAG
(
"h6"
);
/* we only allow html to start the message */
if
(
c
==
html
)
{
ALLOW_TAG
(
"html"
);
}
ALLOW_TAG_ALT
(
"i"
,
"em"
);
ALLOW_TAG_ALT
(
"italic"
,
"em"
);
ALLOW_TAG
(
"li"
);
ALLOW_TAG
(
"ol"
);
ALLOW_TAG
(
"p"
);
ALLOW_TAG
(
"pre"
);
ALLOW_TAG
(
"q"
);
ALLOW_TAG
(
"span"
);
ALLOW_TAG
(
"ul"
);
/* we skip <HR> because it's not legal in XHTML-IM. However,
* we still want to send something sensible, so we put a
* linebreak in its place. <BR> also needs special handling
* because putting a </BR> to close it would just be dumb. */
if
((
!
g_ascii_strncasecmp
(
c
,
"<br"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<hr"
,
3
))
&&
(
*
(
c
+
3
)
==
'>'
||
!
g_ascii_strncasecmp
(
c
+
3
,
"/>"
,
2
)
||
!
g_ascii_strncasecmp
(
c
+
3
,
" />"
,
3
)))
{
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<br/>"
);
if
(
plain
&&
*
c
!=
'\n'
)
plain
=
g_string_append_c
(
plain
,
'\n'
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<b>"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<bold>"
,
strlen
(
"<bold>"
))
||
!
g_ascii_strncasecmp
(
c
,
"<strong>"
,
strlen
(
"<strong>"
)))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
if
(
*
(
c
+
2
)
==
'>'
)
pt
->
src_tag
=
"b"
;
else
if
(
*
(
c
+
2
)
==
'o'
)
pt
->
src_tag
=
"bold"
;
else
pt
->
src_tag
=
"strong"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='font-weight: bold;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<u>"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<underline>"
,
strlen
(
"<underline>"
)))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
*
(
c
+
2
)
==
'>'
?
"u"
:
"underline"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='text-decoration: underline;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<s>"
,
3
)
||
!
g_ascii_strncasecmp
(
c
,
"<strike>"
,
strlen
(
"<strike>"
)))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
*
(
c
+
2
)
==
'>'
?
"s"
:
"strike"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='text-decoration: line-through;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<sub>"
,
5
))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"sub"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='vertical-align:sub;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<sup>"
,
5
))
{
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"sup"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
c
=
strchr
(
c
,
'>'
)
+
1
;
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<span style='vertical-align:super;'>"
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<img"
,
4
)
&&
(
*
(
c
+
4
)
==
'>'
||
*
(
c
+
4
)
==
' '
))
{
const
char
*
p
=
c
+
4
;
GString
*
src
=
NULL
,
*
alt
=
NULL
;
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&"); \
else if (*from == '\'') \
to = g_string_append(to, "'"); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"src="
,
4
))
{
const
char
*
q
=
p
+
4
;
if
(
src
)
g_string_free
(
src
,
TRUE
);
src
=
g_string_new
(
""
);
ESCAPE
(
q
,
src
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"alt="
,
4
))
{
const
char
*
q
=
p
+
4
;
if
(
alt
)
g_string_free
(
alt
,
TRUE
);
alt
=
g_string_new
(
""
);
ESCAPE
(
q
,
alt
);
p
=
q
;
}
else
{
p
++
;
}
}
#undef ESCAPE
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
/* src and alt are required! */
if
(
src
&&
xhtml
)
g_string_append_printf
(
xhtml
,
"<img src='%s' alt='%s' />"
,
g_strstrip
(
src
->
str
),
alt
?
alt
->
str
:
""
);
if
(
alt
)
{
if
(
plain
)
{
char
*
unescaped
=
purple_unescape_html
(
alt
->
str
);
plain
=
g_string_append
(
plain
,
unescaped
);
g_free
(
unescaped
);
}
if
(
!
src
&&
xhtml
)
{
xhtml
=
g_string_append
(
xhtml
,
alt
->
str
);
}
g_string_free
(
alt
,
TRUE
);
}
g_string_free
(
src
,
TRUE
);
continue
;
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<a"
,
2
)
&&
(
*
(
c
+
2
)
==
'>'
||
*
(
c
+
2
)
==
' '
))
{
const
char
*
p
=
c
+
2
;
struct
purple_parse_tag
*
pt
;
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"href="
,
5
))
{
const
char
*
q
=
p
+
5
;
if
(
url
)
g_string_free
(
url
,
TRUE
);
url
=
g_string_new
(
""
);
if
(
cdata
)
g_string_free
(
cdata
,
TRUE
);
cdata
=
g_string_new
(
""
);
CHECK_QUOTE
(
q
);
while
(
VALID_CHAR
(
q
))
{
int
len
;
if
((
*
q
==
'&'
)
&&
(
purple_markup_unescape_entity
(
q
,
&
len
)
==
NULL
))
url
=
g_string_append
(
url
,
"&"
);
else
if
(
*
q
==
'"'
)
url
=
g_string_append
(
url
,
"""
);
else
url
=
g_string_append_c
(
url
,
*
q
);
q
++
;
}
p
=
q
;
}
else
{
p
++
;
}
}
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"a"
;
pt
->
dest_tag
=
"a"
;
tags
=
g_list_prepend
(
tags
,
pt
);
if
(
xhtml
)
g_string_append_printf
(
xhtml
,
"<a href=
\"
%s
\"
>"
,
url
?
g_strstrip
(
url
->
str
)
:
""
);
continue
;
}
#define ESCAPE(from, to) \
CHECK_QUOTE(from); \
while (VALID_CHAR(from)) { \
int len; \
if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
to = g_string_append(to, "&"); \
else if (*from == '\'') \
to = g_string_append_c(to, '\"'); \
else \
to = g_string_append_c(to, *from); \
from++; \
}
if
(
!
g_ascii_strncasecmp
(
c
,
"<font"
,
5
)
&&
(
*
(
c
+
5
)
==
'>'
||
*
(
c
+
5
)
==
' '
))
{
const
char
*
p
=
c
+
5
;
GString
*
style
=
g_string_new
(
""
);
struct
purple_parse_tag
*
pt
;
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"back="
,
5
))
{
const
char
*
q
=
p
+
5
;
GString
*
color
=
g_string_new
(
""
);
ESCAPE
(
q
,
color
);
g_string_append_printf
(
style
,
"background: %s; "
,
color
->
str
);
g_string_free
(
color
,
TRUE
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"color="
,
6
))
{
const
char
*
q
=
p
+
6
;
GString
*
color
=
g_string_new
(
""
);
ESCAPE
(
q
,
color
);
g_string_append_printf
(
style
,
"color: %s; "
,
color
->
str
);
g_string_free
(
color
,
TRUE
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"face="
,
5
))
{
const
char
*
q
=
p
+
5
;
GString
*
face
=
g_string_new
(
""
);
ESCAPE
(
q
,
face
);
g_string_append_printf
(
style
,
"font-family: %s; "
,
g_strstrip
(
face
->
str
));
g_string_free
(
face
,
TRUE
);
p
=
q
;
}
else
if
(
!
g_ascii_strncasecmp
(
p
,
"size="
,
5
))
{
const
char
*
q
=
p
+
5
;
int
sz
;
const
char
*
size
=
"medium"
;
CHECK_QUOTE
(
q
);
sz
=
atoi
(
q
);
switch
(
sz
)
{
case
1
:
size
=
"xx-small"
;
break
;
case
2
:
size
=
"small"
;
break
;
case
3
:
size
=
"medium"
;
break
;
case
4
:
size
=
"large"
;
break
;
case
5
:
size
=
"x-large"
;
break
;
case
6
:
case
7
:
size
=
"xx-large"
;
break
;
default
:
break
;
}
g_string_append_printf
(
style
,
"font-size: %s; "
,
size
);
p
=
q
;
}
else
{
p
++
;
}
}
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
pt
->
src_tag
=
"font"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
if
(
style
->
len
&&
xhtml
)
g_string_append_printf
(
xhtml
,
"<span style='%s'>"
,
g_strstrip
(
style
->
str
));
else
pt
->
ignore
=
TRUE
;
g_string_free
(
style
,
TRUE
);
continue
;
}
#undef ESCAPE
if
(
!
g_ascii_strncasecmp
(
c
,
"<body "
,
6
))
{
const
char
*
p
=
c
+
6
;
gboolean
did_something
=
FALSE
;
while
(
*
p
&&
*
p
!=
'>'
)
{
if
(
!
g_ascii_strncasecmp
(
p
,
"bgcolor="
,
8
))
{
const
char
*
q
=
p
+
8
;
struct
purple_parse_tag
*
pt
=
g_new0
(
struct
purple_parse_tag
,
1
);
GString
*
color
=
g_string_new
(
""
);
CHECK_QUOTE
(
q
);
while
(
VALID_CHAR
(
q
))
{
color
=
g_string_append_c
(
color
,
*
q
);
q
++
;
}
if
(
xhtml
)
g_string_append_printf
(
xhtml
,
"<span style='background: %s;'>"
,
g_strstrip
(
color
->
str
));
g_string_free
(
color
,
TRUE
);
if
((
c
=
strchr
(
p
,
'>'
))
!=
NULL
)
c
++
;
else
c
=
p
;
pt
->
src_tag
=
"body"
;
pt
->
dest_tag
=
"span"
;
tags
=
g_list_prepend
(
tags
,
pt
);
did_something
=
TRUE
;
break
;
}
p
++
;
}
if
(
did_something
)
continue
;
}
/* this has to come after the special case for bgcolor */
ALLOW_TAG
(
"body"
);
if
(
!
g_ascii_strncasecmp
(
c
,
"<!--"
,
strlen
(
"<!--"
)))
{
char
*
p
=
strstr
(
c
+
strlen
(
"<!--"
),
"-->"
);
if
(
p
)
{
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<!--"
);
c
+=
strlen
(
"<!--"
);
continue
;
}
}
if
(
xhtml
)
xhtml
=
g_string_append
(
xhtml
,
"<"
);
if
(
plain
)
plain
=
g_string_append_c
(
plain
,
'<'
);
c
++
;
}
}
else
if
(
*
c
==
'&'
)
{
char
buf
[
7
];
const
char
*
pln
;
int
len
;
if
((
pln
=
purple_markup_unescape_entity
(
c
,
&
len
))
==
NULL
)
{
len
=
1
;
g_snprintf
(
buf
,
sizeof
(
buf
),
"%c"
,
*
c
);
pln
=
buf
;
}
if
(
xhtml
)
xhtml
=
g_string_append_len
(
xhtml
,
c
,
len
);
if
(
plain
)
plain
=
g_string_append
(
plain
,
pln
);
if
(
cdata
)
cdata
=
g_string_append_len
(
cdata
,
c
,
len
);
c
+=
len
;
}
else
{
if
(
xhtml
)
xhtml
=
g_string_append_c
(
xhtml
,
*
c
);
if
(
plain
)
plain
=
g_string_append_c
(
plain
,
*
c
);
if
(
cdata
)
cdata
=
g_string_append_c
(
cdata
,
*
c
);
c
++
;
}
}
if
(
xhtml
)
{
for
(
tag
=
tags
;
tag
;
tag
=
tag
->
next
)
{
struct
purple_parse_tag
*
pt
=
tag
->
data
;
if
(
!
pt
->
ignore
)
g_string_append_printf
(
xhtml
,
"</%s>"
,
pt
->
dest_tag
);
}
}
g_clear_list
(
&
tags
,
g_free
);
if
(
xhtml_out
)
*
xhtml_out
=
g_string_free
(
xhtml
,
FALSE
);
if
(
plain_out
)
*
plain_out
=
g_string_free
(
plain
,
FALSE
);
if
(
url
)
g_string_free
(
url
,
TRUE
);
if
(
cdata
)
g_string_free
(
cdata
,
TRUE
);
#undef CHECK_QUOTE
#undef VALID_CHAR
}
/* The following are probably reasonable changes:
* - \n should be converted to a normal space
* - in addition to <br>, <p> and <div> etc. should also be converted into \n
* - We want to turn </td>#whitespace<td> sequences into a single tab
* - We want to turn </tr>#whitespace<tr> sequences into a single \n
* - <script>...</script> and <style>...</style> should be completely removed
*/
char
*
purple_markup_strip_html
(
const
char
*
str
)
{
int
i
,
j
,
k
,
entlen
;
gboolean
visible
=
TRUE
;
gboolean
closing_td_p
=
FALSE
;
gchar
*
str2
;
const
gchar
*
cdata_close_tag
=
NULL
,
*
ent
;
gchar
*
href
=
NULL
;
int
href_st
=
0
;
if
(
!
str
)
return
NULL
;
str2
=
g_strdup
(
str
);
for
(
i
=
0
,
j
=
0
;
str2
[
i
];
i
++
)
{
if
(
str2
[
i
]
==
'<'
)
{
if
(
cdata_close_tag
)
{
/* Note: Don't even assume any other tag is a tag in CDATA */
if
(
g_ascii_strncasecmp
(
str2
+
i
,
cdata_close_tag
,
strlen
(
cdata_close_tag
))
==
0
)
{
i
+=
strlen
(
cdata_close_tag
)
-
1
;
cdata_close_tag
=
NULL
;
}
continue
;
}
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<td"
,
3
)
==
0
&&
closing_td_p
)
{
str2
[
j
++
]
=
'\t'
;
visible
=
TRUE
;
}
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"</td>"
,
5
)
==
0
)
{
closing_td_p
=
TRUE
;
visible
=
FALSE
;
}
else
{
closing_td_p
=
FALSE
;
visible
=
TRUE
;
}
k
=
i
+
1
;
if
(
g_ascii_isspace
(
str2
[
k
]))
visible
=
TRUE
;
else
if
(
str2
[
k
])
{
/* Scan until we end the tag either implicitly (closed start
* tag) or explicitly, using a sloppy method (i.e., < or >
* inside quoted attributes will screw us up)
*/
while
(
str2
[
k
]
&&
str2
[
k
]
!=
'<'
&&
str2
[
k
]
!=
'>'
)
{
k
++
;
}
/* If we've got an <a> tag with an href, save the address
* to print later. */
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<a"
,
2
)
==
0
&&
g_ascii_isspace
(
str2
[
i
+
2
]))
{
int
st
;
/* start of href, inclusive [ */
int
end
;
/* end of href, exclusive ) */
char
delim
=
' '
;
/* Find start of href */
for
(
st
=
i
+
3
;
st
<
k
;
st
++
)
{
if
(
g_ascii_strncasecmp
(
str2
+
st
,
"href="
,
5
)
==
0
)
{
st
+=
5
;
if
(
str2
[
st
]
==
'"'
||
str2
[
st
]
==
'\''
)
{
delim
=
str2
[
st
];
st
++
;
}
break
;
}
}
/* find end of address */
for
(
end
=
st
;
end
<
k
&&
str2
[
end
]
!=
delim
;
end
++
)
{
/* All the work is done in the loop construct above. */
}
/* If there's an address, save it. If there was
* already one saved, kill it. */
if
(
st
<
k
)
{
char
*
tmp
;
g_free
(
href
);
tmp
=
g_strndup
(
str2
+
st
,
end
-
st
);
href
=
purple_unescape_html
(
tmp
);
g_free
(
tmp
);
href_st
=
j
;
}
}
/* Replace </a> with an ascii representation of the
* address the link was pointing to. */
else
if
(
href
!=
NULL
&&
g_ascii_strncasecmp
(
str2
+
i
,
"</a>"
,
4
)
==
0
)
{
size_t
hrlen
=
strlen
(
href
);
/* Only insert the href if it's different from the CDATA. */
if
((
hrlen
!=
(
gsize
)(
j
-
href_st
)
||
strncmp
(
str2
+
href_st
,
href
,
hrlen
))
&&
(
hrlen
!=
(
gsize
)(
j
-
href_st
+
7
)
||
/* 7 == strlen("http://") */
strncmp
(
str2
+
href_st
,
href
+
7
,
hrlen
-
7
)))
{
str2
[
j
++
]
=
' '
;
str2
[
j
++
]
=
'('
;
memmove
(
str2
+
j
,
href
,
hrlen
);
j
+=
hrlen
;
str2
[
j
++
]
=
')'
;
g_free
(
href
);
href
=
NULL
;
}
}
/* Check for tags which should be mapped to newline (but ignore some of
* the tags at the beginning of the text) */
else
if
((
j
&&
(
g_ascii_strncasecmp
(
str2
+
i
,
"<p>"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<tr"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<hr"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<li"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"<div"
,
4
)
==
0
))
||
g_ascii_strncasecmp
(
str2
+
i
,
"<br"
,
3
)
==
0
||
g_ascii_strncasecmp
(
str2
+
i
,
"</table>"
,
8
)
==
0
)
{
str2
[
j
++
]
=
'\n'
;
}
/* Check for tags which begin CDATA and need to be closed */
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<script"
,
7
)
==
0
)
{
cdata_close_tag
=
"</script>"
;
}
else
if
(
g_ascii_strncasecmp
(
str2
+
i
,
"<style"
,
6
)
==
0
)
{
cdata_close_tag
=
"</style>"
;
}
/* Update the index and continue checking after the tag */
i
=
(
str2
[
k
]
==
'<'
||
str2
[
k
]
==
'\0'
)
?
k
-
1
:
k
;
continue
;
}
}
else
if
(
cdata_close_tag
)
{
continue
;
}
else
if
(
!
g_ascii_isspace
(
str2
[
i
]))
{
visible
=
TRUE
;
}
if
(
str2
[
i
]
==
'&'
&&
(
ent
=
purple_markup_unescape_entity
(
str2
+
i
,
&
entlen
))
!=
NULL
)
{
while
(
*
ent
)
str2
[
j
++
]
=
*
ent
++
;
i
+=
entlen
-
1
;
continue
;
}
if
(
visible
)
str2
[
j
++
]
=
g_ascii_isspace
(
str2
[
i
])
?
' '
:
str2
[
i
];
}
g_free
(
href
);
str2
[
j
]
=
'\0'
;
return
str2
;
}
static
gboolean
badchar
(
char
c
)
{
switch
(
c
)
{
case
' '
:
case
','
:
case
'\0'
:
case
'\n'
:
case
'\r'
:
case
'<'
:
case
'>'
:
case
'"'
:
return
TRUE
;
default
:
return
FALSE
;
}
}
static
gboolean
badentity
(
const
char
*
c
)
{
if
(
!
g_ascii_strncasecmp
(
c
,
"<"
,
4
)
||
!
g_ascii_strncasecmp
(
c
,
">"
,
4
)
||
!
g_ascii_strncasecmp
(
c
,
"""
,
6
))
{
return
TRUE
;
}
return
FALSE
;
}
static
const
char
*
process_link
(
GString
*
ret
,
const
char
*
start
,
const
char
*
c
,
int
matchlen
,
const
char
*
urlprefix
,
int
inside_paren
)
{
char
*
url_buf
,
*
tmpurlbuf
;
const
char
*
t
;
for
(
t
=
c
;;
t
++
)
{
if
(
!
badchar
(
*
t
)
&&
!
badentity
(
t
))
continue
;
if
(
t
-
c
==
matchlen
)
break
;
if
(
*
t
==
','
&&
*
(
t
+
1
)
!=
' '
)
{
continue
;
}
if
(
t
>
start
&&
*
(
t
-
1
)
==
'.'
)
t
--
;
if
(
t
>
start
&&
*
(
t
-
1
)
==
')'
&&
inside_paren
>
0
)
t
--
;
url_buf
=
g_strndup
(
c
,
t
-
c
);
tmpurlbuf
=
purple_unescape_html
(
url_buf
);
g_string_append_printf
(
ret
,
"<a href=
\"
%s%s
\"
>%s</a>"
,
urlprefix
,
tmpurlbuf
,
url_buf
);
g_free
(
tmpurlbuf
);
g_free
(
url_buf
);
return
t
;
}
return
c
;
}
char
*
purple_markup_linkify
(
const
char
*
text
)
{
const
char
*
c
,
*
t
,
*
q
=
NULL
;
char
*
tmpurlbuf
,
*
url_buf
;
gunichar
g
;
gboolean
inside_html
=
FALSE
;
int
inside_paren
=
0
;
GString
*
ret
;
if
(
text
==
NULL
)
return
NULL
;
ret
=
g_string_new
(
""
);
c
=
text
;
while
(
*
c
)
{
if
(
*
c
==
'('
&&
!
inside_html
)
{
inside_paren
++
;
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
if
(
inside_html
)
{
if
(
*
c
==
'>'
)
{
inside_html
=
FALSE
;
}
else
if
(
!
q
&&
(
*
c
==
'\"'
||
*
c
==
'\''
))
{
q
=
c
;
}
else
if
(
q
)
{
if
(
*
c
==
*
q
)
q
=
NULL
;
}
}
else
if
(
*
c
==
'<'
)
{
inside_html
=
TRUE
;
if
(
!
g_ascii_strncasecmp
(
c
,
"<a"
,
2
))
{
while
(
1
)
{
if
(
!
g_ascii_strncasecmp
(
c
,
"/a>"
,
3
))
{
inside_html
=
FALSE
;
break
;
}
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
if
(
!
(
*
c
))
break
;
}
}
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"http://"
,
7
))
{
c
=
process_link
(
ret
,
text
,
c
,
7
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"https://"
,
8
))
{
c
=
process_link
(
ret
,
text
,
c
,
8
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"ftp://"
,
6
))
{
c
=
process_link
(
ret
,
text
,
c
,
6
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"sftp://"
,
7
))
{
c
=
process_link
(
ret
,
text
,
c
,
7
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"file://"
,
7
))
{
c
=
process_link
(
ret
,
text
,
c
,
7
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"www."
,
4
)
&&
c
[
4
]
!=
'.'
&&
(
c
==
text
||
badchar
(
c
[
-1
])
||
badentity
(
c
-1
)))
{
c
=
process_link
(
ret
,
text
,
c
,
4
,
"http://"
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"ftp."
,
4
)
&&
c
[
4
]
!=
'.'
&&
(
c
==
text
||
badchar
(
c
[
-1
])
||
badentity
(
c
-1
)))
{
c
=
process_link
(
ret
,
text
,
c
,
4
,
"ftp://"
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"xmpp:"
,
5
)
&&
(
c
==
text
||
badchar
(
c
[
-1
])
||
badentity
(
c
-1
)))
{
c
=
process_link
(
ret
,
text
,
c
,
5
,
""
,
inside_paren
);
}
else
if
(
!
g_ascii_strncasecmp
(
c
,
"mailto:"
,
7
))
{
t
=
c
;
while
(
1
)
{
if
(
badchar
(
*
t
)
||
badentity
(
t
))
{
char
*
d
;
if
(
t
-
c
==
7
)
{
break
;
}
if
(
t
>
text
&&
*
(
t
-
1
)
==
'.'
)
t
--
;
if
((
d
=
strstr
(
c
+
7
,
"?"
))
!=
NULL
&&
d
<
t
)
url_buf
=
g_strndup
(
c
+
7
,
d
-
c
-
7
);
else
url_buf
=
g_strndup
(
c
+
7
,
t
-
c
-
7
);
if
(
!
purple_email_is_valid
(
url_buf
))
{
g_free
(
url_buf
);
break
;
}
g_free
(
url_buf
);
url_buf
=
g_strndup
(
c
,
t
-
c
);
tmpurlbuf
=
purple_unescape_html
(
url_buf
);
g_string_append_printf
(
ret
,
"<a href=
\"
%s
\"
>%s</a>"
,
tmpurlbuf
,
url_buf
);
g_free
(
url_buf
);
g_free
(
tmpurlbuf
);
c
=
t
;
break
;
}
t
++
;
}
}
else
if
(
c
!=
text
&&
(
*
c
==
'@'
))
{
int
flag
;
GString
*
gurl_buf
=
NULL
;
const
char
illegal_chars
[]
=
"!@#$%^&*()[]{}/|
\\
<>
\"
:;
\r\n
\0
"
;
if
(
strchr
(
illegal_chars
,
*
(
c
-
1
))
||
strchr
(
illegal_chars
,
*
(
c
+
1
)))
flag
=
0
;
else
{
flag
=
1
;
gurl_buf
=
g_string_new
(
""
);
}
t
=
c
;
while
(
flag
)
{
/* iterate backwards grabbing the local part of an email address */
g
=
g_utf8_get_char
(
t
);
if
(
badchar
(
*
t
)
||
(
g
>=
127
)
||
(
*
t
==
'('
)
||
((
*
t
==
';'
)
&&
((
t
>
(
text
+
2
)
&&
(
!
g_ascii_strncasecmp
(
t
-
3
,
"<"
,
4
)
||
!
g_ascii_strncasecmp
(
t
-
3
,
">"
,
4
)))
||
(
t
>
(
text
+
4
)
&&
(
!
g_ascii_strncasecmp
(
t
-
5
,
"""
,
6
))))))
{
/* local part will already be part of ret, strip it out */
ret
=
g_string_truncate
(
ret
,
ret
->
len
-
(
c
-
t
));
ret
=
g_string_append_unichar
(
ret
,
g
);
break
;
}
else
{
g_string_prepend_unichar
(
gurl_buf
,
g
);
t
=
g_utf8_find_prev_char
(
text
,
t
);
if
(
t
<
text
)
{
ret
=
g_string_assign
(
ret
,
""
);
break
;
}
}
}
t
=
g_utf8_find_next_char
(
c
,
NULL
);
while
(
flag
)
{
/* iterate forwards grabbing the domain part of an email address */
g
=
g_utf8_get_char
(
t
);
if
(
badchar
(
*
t
)
||
(
g
>=
127
)
||
(
*
t
==
')'
)
||
badentity
(
t
))
{
char
*
d
;
url_buf
=
g_string_free
(
gurl_buf
,
FALSE
);
gurl_buf
=
NULL
;
/* strip off trailing periods */
if
(
*
url_buf
)
{
for
(
d
=
url_buf
+
strlen
(
url_buf
)
-
1
;
*
d
==
'.'
;
d
--
,
t
--
)
*
d
=
'\0'
;
}
tmpurlbuf
=
purple_unescape_html
(
url_buf
);
if
(
purple_email_is_valid
(
tmpurlbuf
))
{
g_string_append_printf
(
ret
,
"<a href=
\"
mailto:%s
\"
>%s</a>"
,
tmpurlbuf
,
url_buf
);
}
else
{
g_string_append
(
ret
,
url_buf
);
}
g_free
(
url_buf
);
g_free
(
tmpurlbuf
);
c
=
t
;
break
;
}
else
{
g_string_append_unichar
(
gurl_buf
,
g
);
t
=
g_utf8_find_next_char
(
t
,
NULL
);
}
}
if
(
gurl_buf
)
{
g_string_free
(
gurl_buf
,
TRUE
);
}
}
if
(
*
c
==
')'
&&
!
inside_html
)
{
inside_paren
--
;
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
if
(
*
c
==
0
)
break
;
ret
=
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_unescape_text
(
const
char
*
in
)
{
GString
*
ret
;
const
char
*
c
=
in
;
if
(
in
==
NULL
)
return
NULL
;
ret
=
g_string_new
(
""
);
while
(
*
c
)
{
int
len
;
const
char
*
ent
;
if
((
ent
=
purple_markup_unescape_entity
(
c
,
&
len
))
!=
NULL
)
{
g_string_append
(
ret
,
ent
);
c
+=
len
;
}
else
{
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
}
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_unescape_html
(
const
char
*
html
)
{
GString
*
ret
;
const
char
*
c
=
html
;
if
(
html
==
NULL
)
return
NULL
;
ret
=
g_string_new
(
""
);
while
(
*
c
)
{
int
len
;
const
char
*
ent
;
if
((
ent
=
purple_markup_unescape_entity
(
c
,
&
len
))
!=
NULL
)
{
g_string_append
(
ret
,
ent
);
c
+=
len
;
}
else
if
(
!
strncmp
(
c
,
"<br>"
,
4
))
{
g_string_append_c
(
ret
,
'\n'
);
c
+=
4
;
}
else
{
g_string_append_c
(
ret
,
*
c
);
c
++
;
}
}
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_markup_slice
(
const
char
*
str
,
guint
x
,
guint
y
)
{
GString
*
ret
;
GQueue
*
q
;
guint
z
=
0
;
gboolean
appended
=
FALSE
;
gunichar
c
;
char
*
tag
;
g_return_val_if_fail
(
str
!=
NULL
,
NULL
);
g_return_val_if_fail
(
x
<=
y
,
NULL
);
if
(
x
==
y
)
return
g_strdup
(
""
);
ret
=
g_string_new
(
""
);
q
=
g_queue_new
();
while
(
*
str
&&
(
z
<
y
))
{
c
=
g_utf8_get_char
(
str
);
if
(
c
==
'<'
)
{
char
*
end
=
strchr
(
str
,
'>'
);
if
(
!
end
)
{
g_string_free
(
ret
,
TRUE
);
while
((
tag
=
g_queue_pop_head
(
q
)))
g_free
(
tag
);
g_queue_free
(
q
);
return
NULL
;
}
if
(
!
g_ascii_strncasecmp
(
str
,
"<img "
,
5
))
{
z
+=
strlen
(
"[Image]"
);
}
else
if
(
!
g_ascii_strncasecmp
(
str
,
"<br"
,
3
))
{
z
+=
1
;
}
else
if
(
!
g_ascii_strncasecmp
(
str
,
"<hr>"
,
4
))
{
z
+=
strlen
(
"
\n
---
\n
"
);
}
else
if
(
!
g_ascii_strncasecmp
(
str
,
"</"
,
2
))
{
/* pop stack */
char
*
tmp
;
tmp
=
g_queue_pop_head
(
q
);
g_free
(
tmp
);
/* z += 0; */
}
else
{
/* push it unto the stack */
char
*
tmp
;
tmp
=
g_strndup
(
str
,
end
-
str
+
1
);
g_queue_push_head
(
q
,
tmp
);
/* z += 0; */
}
if
(
z
>=
x
)
{
g_string_append_len
(
ret
,
str
,
end
-
str
+
1
);
}
str
=
end
;
}
else
if
(
c
==
'&'
)
{
char
*
end
=
strchr
(
str
,
';'
);
if
(
!
end
)
{
g_string_free
(
ret
,
TRUE
);
while
((
tag
=
g_queue_pop_head
(
q
)))
g_free
(
tag
);
g_queue_free
(
q
);
return
NULL
;
}
if
(
z
>=
x
)
g_string_append_len
(
ret
,
str
,
end
-
str
+
1
);
z
++
;
str
=
end
;
}
else
{
if
(
z
==
x
&&
z
>
0
&&
!
appended
)
{
GList
*
l
=
q
->
tail
;
while
(
l
)
{
tag
=
l
->
data
;
g_string_append
(
ret
,
tag
);
l
=
l
->
prev
;
}
appended
=
TRUE
;
}
if
(
z
>=
x
)
g_string_append_unichar
(
ret
,
c
);
z
++
;
}
str
=
g_utf8_next_char
(
str
);
}
while
((
tag
=
g_queue_pop_head
(
q
)))
{
char
*
name
;
name
=
purple_markup_get_tag_name
(
tag
);
g_string_append_printf
(
ret
,
"</%s>"
,
name
);
g_free
(
name
);
g_free
(
tag
);
}
g_queue_free
(
q
);
return
g_string_free
(
ret
,
FALSE
);
}
char
*
purple_markup_get_tag_name
(
const
char
*
tag
)
{
int
i
;
g_return_val_if_fail
(
tag
!=
NULL
,
NULL
);
g_return_val_if_fail
(
*
tag
==
'<'
,
NULL
);
for
(
i
=
1
;
tag
[
i
];
i
++
)
if
(
tag
[
i
]
==
'>'
||
tag
[
i
]
==
' '
||
tag
[
i
]
==
'/'
)
break
;
return
g_strndup
(
tag
+
1
,
i
-1
);
}