summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBehdad Esfahbod <behdad@behdad.org>2013-04-29 21:16:15 -0400
committerBehdad Esfahbod <behdad@behdad.org>2013-05-02 15:39:16 -0400
commitcdbf24e87f0d8eaf16a733a293b71bc043fb79c7 (patch)
tree2e6fe51880f641c0a9e62c5f00fe9ab33f0e9b50
parent68db8c49d8e4a49a260b8260e24a604c5fdc0458 (diff)
[OTLayout] Accelerate lookups by batchingaccelerate-lookups
If we need to apply many many lookups, we can fasten that up by applying them in batches. For each batch we keep the union of the coverage of the lookups participating. We can then skip glyph ranges that do NOT participate in any lookup in the batch. The batch partition is determined optimally by a mathematical probability model on the glyphs and a dynamic-program to optimize the partition. The net effect is 30% speedup on Amiri. the downside is more memory consuption as each batch will keep an hb_set_t of its coverage. I'm not yet convinced that the tradeoff is worth pursuing. I'm trying to find out ways to optimized this more, with less memory overhead. This work also ignores the number of subtables per lookup. That may prove to be very important for the performance numbers from here on.
-rw-r--r--src/hb-ot-layout-gsub-table.hh8
-rw-r--r--src/hb-ot-map-private.hh14
-rw-r--r--src/hb-ot-map.cc297
-rw-r--r--src/hb-ot-shape-complex-arabic-fallback.hh1
4 files changed, 293 insertions, 27 deletions
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index 3982cd05..dd7d2850 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -1252,6 +1252,7 @@ struct SubstLookup : Lookup
unsigned int end,
const hb_set_digest_t *digest) const
{
+ bool inplace = end != (unsigned int) -1;
bool ret = false;
end = MIN (end, c->buffer->len);
@@ -1264,9 +1265,9 @@ struct SubstLookup : Lookup
if (likely (!is_reverse ()))
{
/* in/out forward substitution */
- c->buffer->clear_output ();
-
c->buffer->idx = start;
+ if (inplace)
+ c->buffer->out_len = start;
while (c->buffer->idx < end)
{
@@ -1277,9 +1278,6 @@ struct SubstLookup : Lookup
else
c->buffer->next_glyph ();
}
-
- if (ret)
- c->buffer->swap_buffers ();
}
else
{
diff --git a/src/hb-ot-map-private.hh b/src/hb-ot-map-private.hh
index 5ed54a6c..39421f75 100644
--- a/src/hb-ot-map-private.hh
+++ b/src/hb-ot-map-private.hh
@@ -39,6 +39,7 @@ static const hb_tag_t table_tags[2] = {HB_OT_TAG_GSUB, HB_OT_TAG_GPOS};
struct hb_ot_map_t
{
friend struct hb_ot_map_builder_t;
+ friend struct optimize_lookups_context_t;
public:
@@ -68,10 +69,15 @@ struct hb_ot_map_t
typedef void (*pause_func_t) (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
struct stage_map_t {
- unsigned int last_lookup; /* Cumulative */
+ unsigned int last_lookup; /* Actually, last_lookup+1 */
pause_func_t pause_func;
};
+ struct batch_map_t {
+ unsigned int last_lookup; /* Actually, last_lookup+1 */
+ hb_set_t *coverage;
+ };
+
hb_ot_map_t (void) { memset (this, 0, sizeof (*this)); }
@@ -117,6 +123,8 @@ struct hb_ot_map_t
*lookup_count = end - start;
}
+ HB_INTERNAL void optimize (hb_face_t *face);
+
HB_INTERNAL void collect_lookups (unsigned int table_index, hb_set_t *lookups) const;
HB_INTERNAL inline void apply (unsigned int table_index,
const struct hb_ot_shape_plan_t *plan,
@@ -131,6 +139,9 @@ struct hb_ot_map_t
{
lookups[table_index].finish ();
stages[table_index].finish ();
+ for (unsigned int batch_index = 0; batch_index < batches[table_index].len; batch_index++)
+ hb_set_destroy (batches[table_index][batch_index].coverage);
+ batches[table_index].finish ();
}
}
@@ -151,6 +162,7 @@ struct hb_ot_map_t
hb_prealloced_array_t<feature_map_t, 8> features;
hb_prealloced_array_t<lookup_map_t, 32> lookups[2]; /* GSUB/GPOS */
hb_prealloced_array_t<stage_map_t, 4> stages[2]; /* GSUB/GPOS */
+ hb_prealloced_array_t<batch_map_t, 4> batches[2]; /* GSUB/GPOS */
};
enum hb_ot_map_feature_flags_t {
diff --git a/src/hb-ot-map.cc b/src/hb-ot-map.cc
index df96a096..8734e067 100644
--- a/src/hb-ot-map.cc
+++ b/src/hb-ot-map.cc
@@ -27,6 +27,14 @@
*/
#include "hb-ot-map-private.hh"
+#include "hb-ot-layout-private.hh"
+
+
+
+#ifndef HB_DEBUG_MAP
+#define HB_DEBUG_MAP (HB_DEBUG+0)
+#endif
+
#include "hb-ot-layout-private.hh"
@@ -102,38 +110,98 @@ void hb_ot_map_builder_t::add_feature (hb_tag_t tag, unsigned int value,
info->stage[1] = current_stage[1];
}
+
+static inline bool
+may_skip (const hb_glyph_info_t &info, hb_set_t *coverage)
+{
+ return !(info.glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_MARK) &&
+ !coverage->has (info.codepoint);
+}
+
inline void hb_ot_map_t::apply (unsigned int table_index,
const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer) const
{
unsigned int i = 0;
+ unsigned int b = 0;
+
+ for (unsigned int stage_index = 0; stage_index < stages[table_index].len; stage_index++)
+ {
+ const stage_map_t stage = stages[table_index][stage_index];
- for (unsigned int stage_index = 0; stage_index < stages[table_index].len; stage_index++) {
- const stage_map_t *stage = &stages[table_index][stage_index];
- for (; i < stage->last_lookup; i++)
- switch (table_index)
+ for (; b < batches[table_index].len && batches[table_index][b].last_lookup <= stage.last_lookup; b++)
+ {
+ const batch_map_t batch = batches[table_index][b];
+
+ if (!batch.coverage)
{
- case 0:
- hb_ot_layout_substitute_lookup (font, buffer, lookups[table_index][i].index,
- 0, (unsigned int) -1,
- lookups[table_index][i].mask,
- lookups[table_index][i].auto_zwj);
- break;
-
- case 1:
- hb_ot_layout_position_lookup (font, buffer, lookups[table_index][i].index,
- 0, (unsigned int) -1,
- lookups[table_index][i].mask,
- lookups[table_index][i].auto_zwj);
- break;
+ switch (table_index)
+ {
+ case 0:
+ for (; i < batch.last_lookup; i++)
+ {
+ buffer->clear_output ();
+ hb_ot_layout_substitute_lookup (font, buffer, lookups[table_index][i].index,
+ 0, (unsigned int) -1,
+ lookups[table_index][i].mask,
+ lookups[table_index][i].auto_zwj);
+ if (buffer->have_output)
+ buffer->swap_buffers ();
+ }
+ break;
+ case 1:
+ for (; i < batch.last_lookup; i++)
+ hb_ot_layout_position_lookup (font, buffer, lookups[table_index][i].index,
+ 0, (unsigned int) -1,
+ lookups[table_index][i].mask,
+ lookups[table_index][i].auto_zwj);
+ break;
+ }
}
+ else
+ {
+ unsigned int start = 0, end = 0;
+ if (table_index == 0)
+ buffer->clear_output ();
+ for (;;)
+ {
+ while (start < buffer->len && may_skip (buffer->info[start], batch.coverage))
+ start++;
+ if (start >= buffer->len)
+ break;
+ end = start + 1;
+ while (end < buffer->len && !may_skip (buffer->info[end], batch.coverage))
+ end++;
+
+ switch (table_index)
+ {
+ case 0:
+ for (unsigned int j = i; j < batch.last_lookup; j++)
+ hb_ot_layout_substitute_lookup (font, buffer, lookups[table_index][j].index,
+ start, end,
+ lookups[table_index][j].mask,
+ lookups[table_index][j].auto_zwj);
+ break;
+ case 1:
+ for (unsigned int j = i; j < batch.last_lookup; j++)
+ hb_ot_layout_position_lookup (font, buffer, lookups[table_index][j].index,
+ start, end,
+ lookups[table_index][j].mask,
+ lookups[table_index][j].auto_zwj);
+ break;
+ }
- if (stage->pause_func)
- {
- buffer->clear_output ();
- stage->pause_func (plan, font, buffer);
+ start = end + 1;
+ }
+
+ assert (!buffer->has_separate_output ());
+ i = batch.last_lookup;
+ }
}
+
+ if (stage.pause_func)
+ stage.pause_func (plan, font, buffer);
}
}
@@ -317,4 +385,191 @@ hb_ot_map_builder_t::compile (hb_ot_map_t &m)
}
}
}
+
+ m.optimize (face);
+}
+
+
+struct optimize_lookups_context_t
+{
+ inline optimize_lookups_context_t (hb_ot_map_t *map_,
+ hb_face_t *face_,
+ unsigned int table_index_) :
+ map (map_),
+ face (face_),
+ table_index (table_index_),
+ start_lookup (0),
+ num_lookups (0),
+ num_glyphs (hb_face_get_glyph_count (face_)) {}
+
+ inline void add_lookups (unsigned int start_lookup_,
+ unsigned int num_lookups_)
+ {
+ set_lookups (start_lookup_, num_lookups_);
+ solve ();
+ }
+
+ private:
+
+ inline void set_lookups (unsigned int start_lookup_,
+ unsigned int num_lookups_)
+ {
+ start_lookup = start_lookup_;
+ num_lookups = num_lookups_;
+ }
+
+ inline void
+ solve (void)
+ {
+ if (!num_lookups)
+ return;
+
+ DEBUG_MSG_FUNC (MAP, map, "%d lookups", num_lookups);
+
+ hb_set_t *cov = hb_set_create ();
+
+ int best_move[num_lookups];
+ float best_cost[num_lookups];
+ best_move[0] = -1;
+ best_cost[0] = single_lookup_cost (0);
+ for (unsigned int i = 1; i < num_lookups; i++)
+ {
+ cov->clear ();
+ add_coverage (i, cov);
+ float this_cost = single_lookup_cost (i);
+ best_cost[i] = 1e20;
+ for (unsigned int j = i - 1; (int) j >= 0; j--)
+ {
+ if (best_cost[i] > best_cost[j] + this_cost)
+ {
+ best_cost[i] = best_cost[j] + this_cost;
+ best_move[i] = j;
+ }
+ add_coverage (j, cov);
+ this_cost = lookup_batch_cost (cov, i - j + 1);
+ if (this_cost > best_cost[i])
+ break; /* No chance */
+ }
+ if (best_cost[i] > this_cost)
+ {
+ best_cost[i] = this_cost;
+ best_move[i] = -1;
+ }
+ }
+
+ DEBUG_MSG_FUNC (MAP, map, "optimal solution costs %f", best_cost[num_lookups - 1]);
+ for (int i = num_lookups - 1; i >= 0; i = best_move[i])
+ {
+ unsigned int batch_num_lookups = i - best_move[i];
+ if (DEBUG_LEVEL_ENABLED (MAP, 1))
+ DEBUG_MSG_FUNC (MAP, map, "batch has %d lookups; costs %f",
+ batch_num_lookups,
+ best_cost[i] - (best_move[i] == -1 ? 0 : best_cost[best_move[i]]));
+
+ hb_ot_map_t::batch_map_t *batch = map->batches[table_index].push ();
+ if (batch)
+ {
+ batch->last_lookup = MAX (lookup_offset (i), lookup_offset (best_move[i] + 1)) + 1;
+ batch->coverage = batch_num_lookups == 1 ? NULL : hb_set_create ();
+
+ for (int j = i; j > best_move[i]; j--)
+ {
+ if (batch->coverage)
+ add_coverage (j, batch->coverage);
+ if (DEBUG_LEVEL_ENABLED (MAP, 2))
+ {
+ cov->clear ();
+ add_coverage (j, cov);
+ DEBUG_MSG_FUNC (MAP, map, "lookup %d (lookup-index %d) popcount %d",
+ lookup_offset (j),
+ lookup_index (j),
+ cov->get_population ());
+ }
+ }
+ }
+ }
+
+ hb_set_destroy (cov);
+ }
+
+ inline unsigned int lookup_offset (unsigned int i)
+ {
+ assert (i < num_lookups);
+ return start_lookup + num_lookups - 1 - i;
+ }
+
+ inline unsigned int lookup_index (unsigned int i)
+ {
+ return map->lookups[table_index][lookup_offset (i)].index;
+ }
+
+ inline void add_coverage (unsigned int i, hb_set_t *coverage)
+ {
+ hb_ot_layout_lookup_get_coverage (face,
+ table_tags[table_index],
+ lookup_index (i),
+ coverage);
+ }
+
+ /* Parameters */
+
+ inline float single_lookup_cost (unsigned int lookup_index HB_UNUSED)
+ {
+ return 1.0;
+ }
+ inline float
+ lookup_batch_cost (hb_set_t *cov, unsigned int n_lookups)
+ {
+ return .1 + probability (cov) * n_lookups;
+ }
+ inline float
+ probability (hb_set_t *cov)
+ {
+ /* Biggest hack: assume uniform glyph probability. */
+ return cov->get_population () / (float) num_glyphs;
+ }
+
+ private:
+
+ hb_ot_map_t *map;
+ hb_face_t *face;
+ unsigned int table_index;
+ unsigned int start_lookup;
+ unsigned int num_lookups;
+
+ unsigned int num_glyphs;
+};
+
+void
+hb_ot_map_t::optimize (hb_face_t *face)
+{
+ DEBUG_MSG_FUNC (MAP, this, "face %p", face);
+ for (unsigned int table_index = 0; table_index < 2; table_index++)
+ {
+ DEBUG_MSG_FUNC (MAP, this, "table %c%c%c%c", HB_UNTAG (table_tags[table_index]));
+
+ unsigned int i = 0;
+
+ for (unsigned int stage_index = 0; stage_index < stages[table_index].len; stage_index++)
+ {
+ stage_map_t *stage = &stages[table_index][stage_index];
+ unsigned int num_lookups = stage->last_lookup - i;
+ DEBUG_MSG_FUNC (MAP, this, "stage %d: %d lookups", stage_index, num_lookups);
+
+ optimize_lookups_context_t c (this, face, table_index);
+ unsigned int start = i;
+ for (; i < num_lookups; i++)
+ if (!hb_ot_layout_lookup_is_inplace (face, table_tags[table_index],
+ lookups[table_index][i].index))
+ {
+ DEBUG_MSG_FUNC (MAP, this, "lookup %d (lookup-index %d) NOT inplace",
+ i, lookups[table_index][i].index);
+
+ c.add_lookups (start, i - start);
+ c.add_lookups (i, 1);
+ start = i + 1;
+ }
+ c.add_lookups (start, i - start);
+ }
+ }
}
diff --git a/src/hb-ot-shape-complex-arabic-fallback.hh b/src/hb-ot-shape-complex-arabic-fallback.hh
index 8469ad86..66ef1f6a 100644
--- a/src/hb-ot-shape-complex-arabic-fallback.hh
+++ b/src/hb-ot-shape-complex-arabic-fallback.hh
@@ -248,6 +248,7 @@ arabic_fallback_plan_shape (arabic_fallback_plan_t *fallback_plan,
if (fallback_plan->lookup_array[i])
{
OT::hb_apply_context_t c (0, font, buffer, fallback_plan->mask_array[i], true/*auto_zwj*/);
+ /* XXX Currently broken because of clear_output / swap_buffers issues. */
fallback_plan->lookup_array[i]->apply_string (&c, 0, (unsigned int) -1, &fallback_plan->digest_array[i]);
}
}