summaryrefslogtreecommitdiff
path: root/src/mm-charsets.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mm-charsets.h')
-rw-r--r--src/mm-charsets.h123
1 files changed, 79 insertions, 44 deletions
diff --git a/src/mm-charsets.h b/src/mm-charsets.h
index a85f3dc7..3071f6be 100644
--- a/src/mm-charsets.h
+++ b/src/mm-charsets.h
@@ -18,63 +18,98 @@
#include <glib.h>
+/*****************************************************************************************/
+
typedef enum {
- MM_MODEM_CHARSET_UNKNOWN = 0x00000000,
- MM_MODEM_CHARSET_GSM = 0x00000001,
- MM_MODEM_CHARSET_IRA = 0x00000002,
- MM_MODEM_CHARSET_8859_1 = 0x00000004,
- MM_MODEM_CHARSET_UTF8 = 0x00000008,
- MM_MODEM_CHARSET_UCS2 = 0x00000010,
- MM_MODEM_CHARSET_PCCP437 = 0x00000020,
- MM_MODEM_CHARSET_PCDN = 0x00000040,
- MM_MODEM_CHARSET_HEX = 0x00000080
+ MM_MODEM_CHARSET_UNKNOWN = 0,
+ MM_MODEM_CHARSET_GSM = 1 << 0,
+ MM_MODEM_CHARSET_IRA = 1 << 1,
+ MM_MODEM_CHARSET_8859_1 = 1 << 2,
+ MM_MODEM_CHARSET_UTF8 = 1 << 3,
+ MM_MODEM_CHARSET_UCS2 = 1 << 4,
+ MM_MODEM_CHARSET_PCCP437 = 1 << 5,
+ MM_MODEM_CHARSET_PCDN = 1 << 6,
+ MM_MODEM_CHARSET_UTF16 = 1 << 7,
} MMModemCharset;
-const char *mm_modem_charset_to_string (MMModemCharset charset);
+const gchar *mm_modem_charset_to_string (MMModemCharset charset);
+MMModemCharset mm_modem_charset_from_string (const gchar *string);
-MMModemCharset mm_modem_charset_from_string (const char *string);
+/*****************************************************************************************/
-/* Append the given string to the given byte array but re-encode it
- * into the given charset first. The original string is assumed to be
- * UTF-8 encoded.
- */
-gboolean mm_modem_charset_byte_array_append (GByteArray *array,
- const char *utf8,
- gboolean quoted,
- MMModemCharset charset);
+/* Checks whether conversion to the given charset may be done without errors */
+gboolean mm_charset_can_convert_to (const gchar *utf8,
+ MMModemCharset charset);
-/* Take a string in hex representation ("00430052" or "A4BE11" for example)
- * and convert it from the given character set to UTF-8.
- */
-char *mm_modem_charset_hex_to_utf8 (const char *src, MMModemCharset charset);
+guint8 *mm_charset_gsm_unpack (const guint8 *gsm,
+ guint32 num_septets,
+ guint8 start_offset, /* in bits */
+ guint32 *out_unpacked_len);
-/* Take a string in UTF-8 and convert it to the given charset in hex
- * representation.
- */
-char *mm_modem_charset_utf8_to_hex (const char *src, MMModemCharset charset);
+guint8 *mm_charset_gsm_pack (const guint8 *src,
+ guint32 src_len,
+ guint8 start_offset, /* in bits */
+ guint32 *out_packed_len);
-guint8 *mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len);
+/*****************************************************************************************/
-guint8 *mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len);
+/*
+ * Convert the given UTF-8 encoded string into the given charset.
+ *
+ * The output is given as a bytearray, because the target charset may allow
+ * embedded NUL bytes (e.g. UTF-16).
+ *
+ * The output encoded string is not guaranteed to be NUL-terminated, instead
+ * the bytearray length itself gives the correct string length.
+ */
+GByteArray *mm_modem_charset_bytearray_from_utf8 (const gchar *utf8,
+ MMModemCharset charset,
+ gboolean translit,
+ GError **error);
-/* Returns the size in bytes required to hold the UTF-8 string in the given charset */
-guint mm_charset_get_encoded_len (const char *utf8,
- MMModemCharset charset,
- guint *out_unsupported);
+/*
+ * Convert the given UTF-8 encoded string into the given charset.
+ *
+ * The output is given as a C string, and those charsets that allow
+ * embedded NUL bytes (e.g. UTF-16) will be hex-encoded.
+ *
+ * The output encoded string is guaranteed to be NUL-terminated, and so no
+ * explicit output length is returned.
+ */
+gchar *mm_modem_charset_str_from_utf8 (const gchar *utf8,
+ MMModemCharset charset,
+ gboolean translit,
+ GError **error);
-guint8 *gsm_unpack (const guint8 *gsm,
- guint32 num_septets,
- guint8 start_offset, /* in bits */
- guint32 *out_unpacked_len);
+/*
+ * Convert into an UTF-8 encoded string the input byte array, which is
+ * encoded in the given charset.
+ *
+ * The output string is guaranteed to be valid UTF-8 and NUL-terminated.
+ */
+gchar *mm_modem_charset_bytearray_to_utf8 (GByteArray *bytearray,
+ MMModemCharset charset,
+ gboolean translit,
+ GError **error);
-guint8 *gsm_pack (const guint8 *src,
- guint32 src_len,
- guint8 start_offset, /* in bits */
- guint32 *out_packed_len);
+/*
+ * Convert into an UTF-8 encoded string the input string, which is
+ * encoded in the given charset. Those charsets that allow embedded NUL
+ * bytes (e.g. UTF-16) need to be hex-encoded.
+ *
+ * If the input string is NUL-terminated, len may be given as -1; otherwise
+ * len needs to specify the number of valid bytes in the input string.
+ *
+ * The output string is guaranteed to be valid UTF-8 and NUL-terminated.
+ */
+gchar *mm_modem_charset_str_to_utf8 (const gchar *str,
+ gssize len,
+ MMModemCharset charset,
+ gboolean translit,
+ GError **error);
-gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset);
+/*****************************************************************************************/
-gchar *mm_utf8_take_and_convert_to_charset (gchar *str,
- MMModemCharset charset);
+void mm_modem_charsets_init (void);
#endif /* MM_CHARSETS_H */