summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Coopersmith <alan.coopersmith@oracle.com>2010-06-30 18:38:57 -0700
committerAlan Coopersmith <alan.coopersmith@oracle.com>2010-07-07 10:47:38 -0700
commit3fa31068bcae6a5bee7fbd41788e13d6d56da8c0 (patch)
treeef44114833b2f6637e4245e1a1cfd4275809c5f2
parent6a4f77d4ac1737dd49f3462d98e0f7e41e50ab18 (diff)
Make iconv() usage optional
Allows building without iconv, though character set conversion will not be supported in that case. Handles UTF8_STRING validation and output for UTF-8 locales without iconv (using is_valid_utf8() function copied from X.Org's app/xprop/xprop.c) Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com> Reviewed-by: James Cloos <cloos@jhcloos.com>
-rw-r--r--configure.ac6
-rw-r--r--xwininfo.c114
2 files changed, 114 insertions, 6 deletions
diff --git a/configure.ac b/configure.ac
index cd7d2a9..4344cf1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -33,6 +33,8 @@ XORG_MACROS_VERSION(1.3)
AM_CONFIG_HEADER(config.h)
+AC_USE_SYSTEM_EXTENSIONS
+
AC_PROG_CC
AC_PROG_INSTALL
@@ -40,6 +42,10 @@ XORG_DEFAULT_OPTIONS
AC_CHECK_FUNCS([strlcat])
+# Check for iconv in libc, then libiconv
+AC_SEARCH_LIBS([iconv], [iconv], [AC_DEFINE([HAVE_ICONV], 1,
+ [Define to 1 if you have the iconv() function])])
+
# Allow using xcb-icccm, but don't make it the default while the API is
# still being changed.
AC_MSG_CHECKING([whether to use xcb-icccm library])
diff --git a/xwininfo.c b/xwininfo.c
index aba5890..cd81834 100644
--- a/xwininfo.c
+++ b/xwininfo.c
@@ -77,7 +77,9 @@ of the copyright holder.
#include <string.h>
#include <locale.h>
#include <langinfo.h>
-#include <iconv.h>
+#ifdef HAVE_ICONV
+# include <iconv.h>
+#endif
#include <ctype.h>
#include <errno.h>
@@ -240,8 +242,10 @@ static void wininfo_wipe (struct wininfo *);
static const char *window_id_format = "0x%lx";
-static const char *user_encoding;
+#ifdef HAVE_ICONV
static iconv_t iconv_from_utf8;
+#endif
+static const char *user_encoding;
static void print_utf8 (const char *, char *, size_t, const char *);
static void print_friendly_name (const char *, const char *, const char *);
@@ -431,6 +435,8 @@ main (int argc, char **argv)
if (!setlocale (LC_ALL, ""))
fprintf (stderr, "%s: can not set locale properly\n", program_name);
user_encoding = nl_langinfo (CODESET);
+ if (user_encoding == NULL)
+ user_encoding = "unknown encoding";
memset (w, 0, sizeof(struct wininfo));
@@ -656,9 +662,11 @@ main (int argc, char **argv)
wininfo_wipe (w);
xcb_disconnect (dpy);
+#ifdef HAVE_ICONV
if (iconv_from_utf8 && (iconv_from_utf8 != (iconv_t) -1)) {
iconv_close (iconv_from_utf8);
}
+#endif
exit (0);
}
@@ -1778,6 +1786,83 @@ get_net_wm_name (xcb_connection_t *dpy, xcb_window_t win)
}
}
+/* [Copied from code added by Yang Zhao to xprop/xprop.c]
+ *
+ * Validate a string as UTF-8 encoded according to RFC 3629
+ *
+ * Simply, a unicode code point (up to 21-bits long) is encoded as follows:
+ *
+ * Char. number range | UTF-8 octet sequence
+ * (hexadecimal) | (binary)
+ * --------------------+---------------------------------------------
+ * 0000 0000-0000 007F | 0xxxxxxx
+ * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+ * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * Validation is done left-to-right, and an error condition, if any, refers to
+ * only the left-most problem in the string.
+ *
+ * Return values:
+ * UTF8_VALID: Valid UTF-8 encoded string
+ * UTF8_OVERLONG: Using more bytes than needed for a code point
+ * UTF8_SHORT_TAIL: Not enough bytes in a multi-byte sequence
+ * UTF8_LONG_TAIL: Too many bytes in a multi-byte sequence
+ * UTF8_FORBIDDEN_VALUE: Forbidden prefix or code point outside 0x10FFFF
+ */
+#define UTF8_VALID 0
+#define UTF8_FORBIDDEN_VALUE 1
+#define UTF8_OVERLONG 2
+#define UTF8_SHORT_TAIL 3
+#define UTF8_LONG_TAIL 4
+static int
+is_valid_utf8 (const char *string, int len)
+{
+ unsigned long codepoint;
+ int rem, i;
+ unsigned char c;
+
+ rem = 0;
+ for (i = 0; i < len; i++) {
+ c = (unsigned char) string[i];
+
+ /* Order of type check:
+ * - Single byte code point
+ * - Non-starting byte of multi-byte sequence
+ * - Start of 2-byte sequence
+ * - Start of 3-byte sequence
+ * - Start of 4-byte sequence
+ */
+ if (!(c & 0x80)) {
+ if (rem > 0) return UTF8_SHORT_TAIL;
+ rem = 0;
+ codepoint = c;
+ } else if ((c & 0xC0) == 0x80) {
+ if (rem == 0) return UTF8_LONG_TAIL;
+ rem--;
+ codepoint |= (c & 0x3F) << (rem * 6);
+ if (codepoint == 0) return UTF8_OVERLONG;
+ } else if ((c & 0xE0) == 0xC0) {
+ if (rem > 0) return UTF8_SHORT_TAIL;
+ rem = 1;
+ codepoint = (c & 0x1F) << 6;
+ if (codepoint == 0) return UTF8_OVERLONG;
+ } else if ((c & 0xF0) == 0xE0) {
+ if (rem > 0) return UTF8_SHORT_TAIL;
+ rem = 2;
+ codepoint = (c & 0x0F) << 12;
+ } else if ((c & 0xF8) == 0xF0) {
+ if (rem > 0) return UTF8_SHORT_TAIL;
+ rem = 3;
+ codepoint = (c & 0x07) << 18;
+ if (codepoint > 0x10FFFF) return UTF8_FORBIDDEN_VALUE;
+ } else
+ return UTF8_FORBIDDEN_VALUE;
+ }
+
+ return UTF8_VALID;
+}
+
/*
* Converts a UTF-8 encoded string to the current locale encoding,
* if possible, and prints it, with prefix before and suffix after.
@@ -1786,21 +1871,35 @@ get_net_wm_name (xcb_connection_t *dpy, xcb_window_t win)
static void
print_utf8 (const char *prefix, char *u8str, size_t length, const char *suffix)
{
- char convbuf[BUFSIZ];
- char *inp = u8str;
size_t inlen = length;
- int convres;
if (inlen < 0) {
- inlen = strlen (inp);
+ inlen = strlen (u8str);
}
+ if (is_valid_utf8 (u8str, inlen) != UTF8_VALID) {
+ printf (" (invalid UTF8_STRING)");
+ return;
+ }
+
+ if (strcmp (user_encoding, "UTF-8") == 0) {
+ /* Don't need to convert */
+ printf ("%s", prefix);
+ fwrite (u8str, 1, inlen, stdout);
+ printf ("%s", suffix);
+ return;
+ }
+
+#ifdef HAVE_ICONV
if (!iconv_from_utf8) {
iconv_from_utf8 = iconv_open (user_encoding, "UTF-8");
}
if (iconv_from_utf8 != (iconv_t) -1) {
Bool done = True;
+ char *inp = u8str;
+ char convbuf[BUFSIZ];
+ int convres;
printf ("%s", prefix);
do {
@@ -1826,6 +1925,9 @@ print_utf8 (const char *prefix, char *u8str, size_t length, const char *suffix)
printf (" (can't load iconv conversion for UTF8_STRING to %s)",
user_encoding);
}
+#else
+ printf (" (can't convert UTF8_STRING to %s)", user_encoding);
+#endif
}
/*