summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBehdad Esfahbod <behdad@behdad.org>2018-02-13 21:41:51 -0800
committerBehdad Esfahbod <behdad@behdad.org>2018-02-13 21:41:51 -0800
commit15ba4fbe01433c8627f9e6a60106ca77d3e1ad4c (patch)
treef34b15e8f63c2446fc513f7d1bcdbfbc3abacbd8
parenteffddd03bd6fb0aac14b46a16b281f3749e44780 (diff)
[khmer] Add dump-khmer-data
-rw-r--r--src/Makefile.am4
-rw-r--r--src/Makefile.sources1
-rw-r--r--src/dump-khmer-data.cc43
-rw-r--r--src/hb-ot-shape-complex-khmer-private.hh124
-rw-r--r--src/hb-ot-shape-complex-khmer.cc98
5 files changed, 173 insertions, 97 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index b3be138d..f90b2ac2 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -353,11 +353,15 @@ endif
check_PROGRAMS += \
dump-indic-data \
+ dump-khmer-data \
dump-myanmar-data \
$(NULL)
dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc
dump_indic_data_CPPFLAGS = $(HBCFLAGS)
dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS)
+dump_khmer_data_SOURCES = dump-khmer-data.cc hb-ot-shape-complex-indic-table.cc
+dump_khmer_data_CPPFLAGS = $(HBCFLAGS)
+dump_khmer_data_LDADD = libharfbuzz.la $(HBLIBS)
dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc
dump_myanmar_data_CPPFLAGS = $(HBCFLAGS)
dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS)
diff --git a/src/Makefile.sources b/src/Makefile.sources
index 376d543a..ec60ec0a 100644
--- a/src/Makefile.sources
+++ b/src/Makefile.sources
@@ -109,6 +109,7 @@ HB_OT_sources = \
hb-ot-shape-complex-indic.cc \
hb-ot-shape-complex-indic-private.hh \
hb-ot-shape-complex-indic-table.cc \
+ hb-ot-shape-complex-khmer-private.hh \
hb-ot-shape-complex-khmer.cc \
hb-ot-shape-complex-myanmar-private.hh \
hb-ot-shape-complex-myanmar.cc \
diff --git a/src/dump-khmer-data.cc b/src/dump-khmer-data.cc
new file mode 100644
index 00000000..7dd09b2b
--- /dev/null
+++ b/src/dump-khmer-data.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#include "hb-ot-shape-complex-khmer-private.hh"
+
+int
+main (void)
+{
+ for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++)
+ {
+ hb_glyph_info_t info;
+ info.codepoint = u;
+ set_khmer_properties (info);
+ if (info.khmer_category() != INDIC_SYLLABIC_CATEGORY_OTHER ||
+ info.khmer_position() != INDIC_MATRA_CATEGORY_NOT_APPLICABLE)
+ printf("U+%04X %u %u\n", u,
+ info.khmer_category(),
+ info.khmer_position());
+ }
+}
diff --git a/src/hb-ot-shape-complex-khmer-private.hh b/src/hb-ot-shape-complex-khmer-private.hh
new file mode 100644
index 00000000..f90ef967
--- /dev/null
+++ b/src/hb-ot-shape-complex-khmer-private.hh
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2018 Google, Inc.
+ *
+ * This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH
+#define HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH
+
+#include "hb-private.hh"
+
+#include "hb-ot-shape-complex-indic-private.hh"
+
+
+/* buffer var allocations */
+#define khmer_category() indic_category() /* khmer_category_t */
+#define khmer_position() indic_position() /* khmer_position_t */
+
+
+typedef indic_category_t khmer_category_t;
+typedef indic_position_t khmer_position_t;
+
+
+static inline khmer_position_t
+matra_position_khmer (khmer_position_t side)
+{
+ switch ((int) side)
+ {
+ case POS_PRE_C:
+ return POS_PRE_M;
+
+ case POS_POST_C:
+ case POS_ABOVE_C:
+ case POS_BELOW_C:
+ return POS_AFTER_POST;
+
+ default:
+ return side;
+ };
+}
+
+static inline bool
+is_consonant_or_vowel (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V));
+}
+
+static inline bool
+is_coeng (const hb_glyph_info_t &info)
+{
+ return is_one_of (info, FLAG (OT_Coeng));
+}
+
+static inline void
+set_khmer_properties (hb_glyph_info_t &info)
+{
+ hb_codepoint_t u = info.codepoint;
+ unsigned int type = hb_indic_get_categories (u);
+ khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
+ khmer_position_t pos = (khmer_position_t) (type >> 8);
+
+
+ /*
+ * Re-assign category
+ */
+
+ if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
+ else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
+ u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
+ {
+ /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
+ * https://github.com/roozbehp/unicode-data/issues/5 */
+ cat = OT_M;
+ pos = POS_ABOVE_C;
+ }
+ else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
+ else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
+
+
+ /*
+ * Re-assign position.
+ */
+
+ if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
+ {
+ pos = POS_BASE_C;
+ if (u == 0x179Au)
+ cat = OT_Ra;
+ }
+ else if (cat == OT_M)
+ {
+ pos = matra_position_khmer (pos);
+ }
+ else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
+ {
+ pos = POS_SMVD;
+ }
+
+ info.khmer_category() = cat;
+ info.khmer_position() = pos;
+}
+
+
+#endif /* HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH */
diff --git a/src/hb-ot-shape-complex-khmer.cc b/src/hb-ot-shape-complex-khmer.cc
index 0e2ca88c..304879d8 100644
--- a/src/hb-ot-shape-complex-khmer.cc
+++ b/src/hb-ot-shape-complex-khmer.cc
@@ -24,105 +24,9 @@
* Google Author(s): Behdad Esfahbod
*/
-#include "hb-ot-shape-complex-indic-private.hh"
+#include "hb-ot-shape-complex-khmer-private.hh"
#include "hb-ot-layout-private.hh"
-/* buffer var allocations */
-#define khmer_category() indic_category() /* khmer_category_t */
-#define khmer_position() indic_position() /* khmer_position_t */
-
-
-/*
- * Khmer shaper.
- */
-
-typedef indic_category_t khmer_category_t;
-typedef indic_position_t khmer_position_t;
-
-
-static inline khmer_position_t
-matra_position_khmer (khmer_position_t side)
-{
- switch ((int) side)
- {
- case POS_PRE_C:
- return POS_PRE_M;
-
- case POS_POST_C:
- case POS_ABOVE_C:
- case POS_BELOW_C:
- return POS_AFTER_POST;
-
- default:
- return side;
- };
-}
-
-static inline bool
-is_consonant_or_vowel (const hb_glyph_info_t &info)
-{
- return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V));
-}
-
-static inline bool
-is_coeng (const hb_glyph_info_t &info)
-{
- return is_one_of (info, FLAG (OT_Coeng));
-}
-
-static inline void
-set_khmer_properties (hb_glyph_info_t &info)
-{
- hb_codepoint_t u = info.codepoint;
- unsigned int type = hb_indic_get_categories (u);
- khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
- khmer_position_t pos = (khmer_position_t) (type >> 8);
-
-
- /*
- * Re-assign category
- */
-
- if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
- else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
- u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
- {
- /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
- * https://github.com/roozbehp/unicode-data/issues/5 */
- cat = OT_M;
- pos = POS_ABOVE_C;
- }
- else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
- else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
-
-
- /*
- * Re-assign position.
- */
-
- if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
- {
- pos = POS_BASE_C;
- if (u == 0x179Au)
- cat = OT_Ra;
- }
- else if (cat == OT_M)
- {
- pos = matra_position_khmer (pos);
- }
- else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
- {
- pos = POS_SMVD;
- }
-
- info.khmer_category() = cat;
- info.khmer_position() = pos;
-}
-
-/*
- * Things above this line should ideally be moved to the Indic table itself.
- */
-
/*
* Khmer shaper.