summaryrefslogtreecommitdiff
path: root/gs
diff options
context:
space:
mode:
authorMichael Vrhel <michael.vrhel@artifex.com>2011-03-04 06:45:29 +0000
committerMichael Vrhel <michael.vrhel@artifex.com>2011-03-04 06:45:29 +0000
commitfdc21fee6c1679b641d9f296fafac9c1a4fff19d (patch)
tree67a0d3620f075ce3f0baca742ad394464e20050c /gs
parentbf07d980ff2349ca540f9e87fd99e10729815b08 (diff)
Reorganization of threshold code to move all the thresh holding operations into a new file.
git-svn-id: http://svn.ghostscript.com/ghostscript/trunk@12238 a1074d23-0009-0410-80fe-cf8c14f379e6
Diffstat (limited to 'gs')
-rw-r--r--gs/base/gsiparam.h13
-rw-r--r--gs/base/gxht_thresh.c328
-rw-r--r--gs/base/gxht_thresh.h35
-rw-r--r--gs/base/gximage.h12
-rw-r--r--gs/base/gximono.c327
-rw-r--r--gs/base/lib.mak10
6 files changed, 388 insertions, 337 deletions
diff --git a/gs/base/gsiparam.h b/gs/base/gsiparam.h
index f31f2477f..eb6eed3bb 100644
--- a/gs/base/gsiparam.h
+++ b/gs/base/gsiparam.h
@@ -288,6 +288,19 @@ void gs_image_t_init_mask_adjust(gs_image_t * pim, bool write_1s,
#define gs_image_t_init_mask(pim, write_1s)\
gs_image_t_init_mask_adjust(pim, write_1s, true)
+/* Used for bookkeeping ht buffer information in lanscape mode */
+typedef struct ht_landscape_info_s {
+ int count;
+ int widths[16];
+ int xstart;
+ int curr_pos;
+ int index;
+ int num_contones;
+ bool offset_set;
+ bool flipy;
+ int y_pos;
+} ht_landscape_info_t;
+
/****** REMAINDER OF FILE UNDER CONSTRUCTION. PROCEED AT YOUR OWN RISK. ******/
diff --git a/gs/base/gxht_thresh.c b/gs/base/gxht_thresh.c
new file mode 100644
index 000000000..0deb156fe
--- /dev/null
+++ b/gs/base/gxht_thresh.c
@@ -0,0 +1,328 @@
+/* Copyright (C) 2011-2012 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied, modified
+ or distributed except as expressly authorized under the terms of that
+ license. Refer to licensing information at http://www.artifex.com/
+ or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
+ San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/*$Id: gxhts_thresh.c $ */
+/* Halftone thresholding code */
+
+#include "memory_.h"
+#include "gx.h"
+#include "gsiparam.h"
+#include "gxht_thresh.h"
+#include "math_.h"
+
+#ifndef __WIN32__
+#define __align16 __attribute__((align(16)))
+#else
+#define __align16 __declspec(align(16))
+#endif
+
+#ifdef HAVE_SSE2
+
+#include <emmintrin.h>
+
+static const byte bitreverse[] =
+{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
+ 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+ 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
+ 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+ 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
+ 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+ 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
+ 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+ 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
+ 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+ 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
+ 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+ 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
+ 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+ 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
+ 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+ 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
+ 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+ 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
+ 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+ 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
+ 0x3F, 0xBF, 0x7F, 0xFF};
+#endif
+
+#if RAW_HT_DUMP
+/* This is slow thresholding, byte output for debug only */
+void
+gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride,
+ byte *halftone, int dithered_stride, int width,
+ int num_rows)
+{
+ int k, j;
+ byte *contone_ptr;
+ byte *thresh_ptr;
+ byte *halftone_ptr;
+
+ /* For the moment just do a very slow compare until we get
+ get this working */
+ for (j = 0; j < num_rows; j++) {
+ contone_ptr = contone;
+ thresh_ptr = threshold_strip + contone_stride * j;
+ halftone_ptr = halftone + dithered_stride * j;
+ for (k = 0; k < width; k++) {
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ halftone_ptr[k] = 0;
+ } else {
+ halftone_ptr[k] = 255;
+ }
+ }
+ }
+}
+#endif
+
+#ifndef HAVE_SSE2
+
+/* A simple case for use in the landscape mode. Could probably be coded up
+ faster */
+static void
+threshold_16_bit(byte *contone_ptr_in, byte *thresh_ptr_in, byte *ht_data)
+{
+ int k, j;
+ byte *contone_ptr = contone_ptr_in;
+ byte *thresh_ptr = thresh_ptr_in;
+ byte bit_init;
+
+ for (j = 0; j < 2; j++) {
+ bit_init = 0x80;
+ for (k = 0; k < 8; k++) {
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ ht_data[j] |= bit_init;
+ } else {
+ ht_data[j] &= ~bit_init;
+ }
+ bit_init >>= 1;
+ }
+ contone_ptr += 8;
+ thresh_ptr += 8;
+ }
+}
+#else
+/* Note this function has strict data alignment needs */
+static void
+threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
+{
+ __m128i input1;
+ __m128i input2;
+ register int result_int;
+ const unsigned int mask1 = 0x80808080;
+ __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
+
+ /* Load */
+ input1 = _mm_load_si128((const __m128i *)contone_ptr);
+ input2 = _mm_load_si128((const __m128i *) thresh_ptr);
+ /* Unsigned subtraction does Unsigned saturation so we
+ have to use the signed operation */
+ input1 = _mm_xor_si128(input1, sign_fix);
+ input2 = _mm_xor_si128(input2, sign_fix);
+ /* Subtract the two */
+ input2 = _mm_subs_epi8(input1, input2);
+ /* Grab the sign mask */
+ result_int = _mm_movemask_epi8(input2);
+ /* bit wise reversal on 16 bit word */
+ ht_data[0] = bitreverse[(result_int & 0xff)];
+ ht_data[1] = bitreverse[((result_int >> 8) & 0xff)];
+}
+
+/* Not so fussy on its alignment */
+static void
+threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
+{
+ __m128i input1;
+ __m128i input2;
+ int result_int;
+ byte *sse_data;
+ const unsigned int mask1 = 0x80808080;
+ __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
+
+ sse_data = (byte*) &(result_int);
+ /* Load */
+ input1 = _mm_loadu_si128((const __m128i *)contone_ptr);
+ input2 = _mm_loadu_si128((const __m128i *) thresh_ptr);
+ /* Unsigned subtraction does Unsigned saturation so we
+ have to use the signed operation */
+ input1 = _mm_xor_si128(input1, sign_fix);
+ input2 = _mm_xor_si128(input2, sign_fix);
+ /* Subtract the two */
+ input2 = _mm_subs_epi8(input1, input2);
+ /* Grab the sign mask */
+ result_int = _mm_movemask_epi8(input2);
+ /* bit wise reversal on 16 bit word */
+ ht_data[0] = bitreverse[sse_data[0]];
+ ht_data[1] = bitreverse[sse_data[1]];
+}
+#endif
+
+/* SSE2 and non-SSE2 implememntation of thresholding a row */
+void
+gx_ht_threshold_row_bit(byte *contone, byte *threshold_strip, int contone_stride,
+ byte *halftone, int dithered_stride, int width,
+ int num_rows, int offset_bits)
+{
+#ifndef HAVE_SSE2
+ int k, j;
+ byte *contone_ptr;
+ byte *thresh_ptr;
+ byte *halftone_ptr;
+ byte bit_init;
+ int ht_index;
+
+ /* For the moment just do a very slow compare until we get
+ get this working. This could use some serious optimization */
+ for (j = 0; j < num_rows; j++) {
+ contone_ptr = contone;
+ thresh_ptr = threshold_strip + contone_stride * j;
+ halftone_ptr = halftone + dithered_stride * j;
+ /* First get the left remainder portion. Put into MSBs of first byte */
+ bit_init = 0x80;
+ ht_index = -1;
+ for (k = 0; k < offset_bits; k++) {
+ if ( (k % 8) == 0) {
+ ht_index++;
+ }
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ halftone_ptr[ht_index] |= bit_init;
+ } else {
+ halftone_ptr[ht_index] &= ~bit_init;
+ }
+ if (bit_init == 1) {
+ bit_init = 0x80;
+ } else {
+ bit_init >>= 1;
+ }
+ }
+ bit_init = 0x80;
+ ht_index = -1;
+ if (offset_bits > 0) {
+ halftone_ptr += 2; /* Point to the next 16 bits of data */
+ }
+ /* Now get the rest, which will be 16 bit aligned. */
+ for (k = offset_bits; k < width; k++) {
+ if (((k - offset_bits) % 8) == 0) {
+ ht_index++;
+ }
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ halftone_ptr[ht_index] |= bit_init;
+ } else {
+ halftone_ptr[ht_index] &= ~bit_init;
+ }
+ if (bit_init == 1) {
+ bit_init = 0x80;
+ } else {
+ bit_init >>= 1;
+ }
+ }
+ }
+#else
+ byte *contone_ptr;
+ byte *thresh_ptr;
+ byte *halftone_ptr;
+ int num_tiles = (int) ceil((float) (width - offset_bits)/16.0);
+ int k, j;
+
+ for (j = 0; j < num_rows; j++) {
+ /* contone and thresh_ptr are 128 bit aligned. We do need to do this in
+ two steps to ensure that we pack the bits in an aligned fashion
+ into halftone_ptr. */
+ contone_ptr = contone;
+ thresh_ptr = threshold_strip + contone_stride * j;
+ halftone_ptr = halftone + dithered_stride * j;
+ if (offset_bits > 0) {
+ /* Since we allowed for 16 bits in our left remainder
+ we can go directly in to the destination. threshold_16_SSE
+ requires 128 bit alignment. contone_ptr and thresh_ptr
+ are set up so that after we move in by offset_bits elements
+ then we are 128 bit aligned. */
+ threshold_16_SSE_unaligned(contone_ptr, thresh_ptr,
+ halftone_ptr);
+ halftone_ptr += 2;
+ thresh_ptr += offset_bits;
+ contone_ptr += offset_bits;
+ }
+ /* Now we should have 128 bit aligned with our input data. Iterate
+ over sets of 16 going directly into our HT buffer. Sources and
+ halftone_ptr buffers should be padded to allow 15 bit overrun */
+ for (k = 0; k < num_tiles; k++) {
+ threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
+ thresh_ptr += 16;
+ contone_ptr += 16;
+ halftone_ptr += 2;
+ }
+ }
+#endif
+}
+
+
+/* This thresholds a buffer that is 16 wide by data_length tall */
+void
+gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
+ ht_landscape_info_t ht_landscape, byte *halftone,
+ int data_length)
+{
+ __align16 byte contone[16];
+ int position_start, position, curr_position;
+ int *widths = &(ht_landscape.widths[0]);
+ int local_widths[16];
+ int num_contone = ht_landscape.num_contones;
+ int k, j, w, contone_out_posit;
+ byte *contone_ptr, *thresh_ptr, *halftone_ptr;
+
+ /* Work through chunks of 16. */
+ /* Data may have come in left to right or right to left. */
+ if (ht_landscape.index > 0) {
+ position = position_start = 0;
+ } else {
+ position = position_start = ht_landscape.curr_pos + 1;
+ }
+ thresh_ptr = thresh_align;
+ halftone_ptr = halftone;
+ /* Copy the widths to a local array, and truncate the last one (which may
+ * be the first one!) if required. */
+ k = 0;
+ for (j = 0; j < num_contone; j++)
+ k += (local_widths[j] = widths[position_start+j]);
+ if (k > 16) {
+ if (ht_landscape.index > 0) {
+ local_widths[num_contone-1] -= k-16;
+ } else {
+ local_widths[0] -= k-16;
+ }
+ }
+ for (k = data_length; k > 0; k--) { /* Loop on rows */
+ contone_ptr = &(contone_align[position]); /* Point us to our row start */
+ curr_position = 0; /* We use this in keeping track of widths */
+ contone_out_posit = 0; /* Our index out */
+ for (j = num_contone; j > 0; j--) {
+ byte c = *contone_ptr;
+ for (w = local_widths[curr_position]; w > 0; w--) {
+ contone[contone_out_posit] = c;
+ contone_out_posit++;
+ }
+ curr_position++; /* Move us to the next position in our width array */
+ contone_ptr++; /* Move us to a new location in our contone buffer */
+ }
+ /* Now we have our left justified and expanded contone data for a single
+ set of 16. Go ahead and threshold these */
+#ifdef HAVE_SSE2
+ threshold_16_SSE(&(contone[0]), thresh_ptr, halftone_ptr);
+#else
+ threshold_16_bit(&(contone[0]), thresh_ptr, halftone_ptr);
+#endif
+ thresh_ptr += 16;
+ position += 16;
+ halftone_ptr += 2;
+ }
+}
diff --git a/gs/base/gxht_thresh.h b/gs/base/gxht_thresh.h
new file mode 100644
index 000000000..b7b6b6d40
--- /dev/null
+++ b/gs/base/gxht_thresh.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2001-2006 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied, modified
+ or distributed except as expressly authorized under the terms of that
+ license. Refer to licensing information at http://www.artifex.com/
+ or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
+ San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/* $Id: gsht_thresh.h $ */
+/* Threshhold based halftoning prototypes */
+
+#ifndef gsht_thresh_INCLUDED
+# define gsht_thresh_INCLUDED
+
+#define RAW_HT_DUMP 0
+
+#if RAW_HT_DUMP
+void gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip,
+ int contone_stride, byte *halftone,
+ int dithered_stride, int width, int num_rows);
+#endif
+void gx_ht_threshold_row_bit(byte *contone, byte *threshold_strip,
+ int contone_stride, byte *halftone,
+ int dithered_stride, int width, int num_rows,
+ int offset_bits);
+void gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
+ ht_landscape_info_t ht_landscape, byte *halftone,
+ int data_length);
+#endif /* gshtx_INCLUDED */
+
diff --git a/gs/base/gximage.h b/gs/base/gximage.h
index bc7a2fa92..f086db3e9 100644
--- a/gs/base/gximage.h
+++ b/gs/base/gximage.h
@@ -88,18 +88,6 @@ struct sample_map_s {
bool inverted;
};
-/* Used for bookkeeping ht buffer information in lanscape mode */
-typedef struct ht_landscape_info_s {
- int count;
- int widths[16];
- int xstart;
- int curr_pos;
- int index;
- int num_contones;
- bool offset_set;
- bool flipy;
- int y_pos;
-} ht_landscape_info_t;
#ifndef sample_map_DEFINED
#define sample_map_DEFINED
diff --git a/gs/base/gximono.c b/gs/base/gximono.c
index 8cf9ced57..fbd46f471 100644
--- a/gs/base/gximono.c
+++ b/gs/base/gximono.c
@@ -38,47 +38,11 @@
#include "gsicc_littlecms.h"
#include "gxcie.h"
#include "gscie.h"
+#include "gxht_thresh.h"
-#define RAW_HT_DUMP 0
#define USE_FAST_CODE 1
#define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x))))
-/* This should be moved someplace else later */
-#ifndef __WIN32__
-#define __align16 __attribute__((align(16)))
-#else
-#define __align16 __declspec(align(16))
-#endif
-
-#ifdef HAVE_SSE2
-
-#include <emmintrin.h>
-
-static const byte bitreverse[] =
-{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
- 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
- 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
- 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
- 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
- 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
- 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
- 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
- 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
- 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
- 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
- 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
- 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
- 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
- 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
- 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
- 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
- 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
- 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
- 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
- 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
- 0x3F, 0xBF, 0x7F, 0xFF};
-#endif
-
/* ------ Strategy procedure ------ */
/* Check the prototype. */
@@ -942,283 +906,6 @@ fill_threshhold_buffer(byte *dest_strip, byte *src_strip, int src_width,
memcpy(ptr_out_temp, src_strip, right_width);
}
-#if RAW_HT_DUMP
-/* This is slow thresholding, byte output for debug only */
-static void
-threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride,
- byte *halftone, int dithered_stride, int width,
- int num_rows)
-{
- int k, j;
- byte *contone_ptr;
- byte *thresh_ptr;
- byte *halftone_ptr;
-
- /* For the moment just do a very slow compare until we get
- get this working */
- for (j = 0; j < num_rows; j++) {
- contone_ptr = contone;
- thresh_ptr = threshold_strip + contone_stride * j;
- halftone_ptr = halftone + dithered_stride * j;
- for (k = 0; k < width; k++) {
- if (contone_ptr[k] < thresh_ptr[k]) {
- halftone_ptr[k] = 0;
- } else {
- halftone_ptr[k] = 255;
- }
- }
- }
-}
-#endif
-
-#ifndef HAVE_SSE2
-/* This is slow thresholding bit output */
-static void
-threshold_row_bit(byte *contone, byte *threshold_strip, int contone_stride,
- byte *halftone, int dithered_stride, int width,
- int num_rows, int offset_bits)
-{
- int k, j;
- byte *contone_ptr;
- byte *thresh_ptr;
- byte *halftone_ptr;
- byte bit_init;
- int ht_index;
-
- /* For the moment just do a very slow compare until we get
- get this working. This could use some serious optimization */
- for (j = 0; j < num_rows; j++) {
- contone_ptr = contone;
- thresh_ptr = threshold_strip + contone_stride * j;
- halftone_ptr = halftone + dithered_stride * j;
- /* First get the left remainder portion. Put into MSBs of first byte */
- bit_init = 0x80;
- ht_index = -1;
- for (k = 0; k < offset_bits; k++) {
- if ( (k % 8) == 0) {
- ht_index++;
- }
- if (contone_ptr[k] < thresh_ptr[k]) {
- halftone_ptr[ht_index] |= bit_init;
- } else {
- halftone_ptr[ht_index] &= ~bit_init;
- }
- if (bit_init == 1) {
- bit_init = 0x80;
- } else {
- bit_init >>= 1;
- }
- }
- bit_init = 0x80;
- ht_index = -1;
- if (offset_bits > 0) {
- halftone_ptr += 2; /* Point to the next 16 bits of data */
- }
- /* Now get the rest, which will be 16 bit aligned. */
- for (k = offset_bits; k < width; k++) {
- if (((k - offset_bits) % 8) == 0) {
- ht_index++;
- }
- if (contone_ptr[k] < thresh_ptr[k]) {
- halftone_ptr[ht_index] |= bit_init;
- } else {
- halftone_ptr[ht_index] &= ~bit_init;
- }
- if (bit_init == 1) {
- bit_init = 0x80;
- } else {
- bit_init >>= 1;
- }
- }
- }
-}
-
-/* A simple case for use in the landscape mode. Could probably be coded up
- faster */
-static void
-threshold_16_bit(byte *contone_ptr_in, byte *thresh_ptr_in, byte *ht_data)
-{
- int k, j;
- byte *contone_ptr = contone_ptr_in;
- byte *thresh_ptr = thresh_ptr_in;
- byte bit_init;
-
- for (j = 0; j < 2; j++) {
- bit_init = 0x80;
- for (k = 0; k < 8; k++) {
- if (contone_ptr[k] < thresh_ptr[k]) {
- ht_data[j] |= bit_init;
- } else {
- ht_data[j] &= ~bit_init;
- }
- bit_init >>= 1;
- }
- contone_ptr += 8;
- thresh_ptr += 8;
- }
-}
-#else
-/* Note this function has strict data alignment needs */
-static void
-threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
-{
- __m128i input1;
- __m128i input2;
- register int result_int;
- const unsigned int mask1 = 0x80808080;
- __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
-
- /* Load */
- input1 = _mm_load_si128((const __m128i *)contone_ptr);
- input2 = _mm_load_si128((const __m128i *) thresh_ptr);
- /* Unsigned subtraction does Unsigned saturation so we
- have to use the signed operation */
- input1 = _mm_xor_si128(input1, sign_fix);
- input2 = _mm_xor_si128(input2, sign_fix);
- /* Subtract the two */
- input2 = _mm_subs_epi8(input1, input2);
- /* Grab the sign mask */
- result_int = _mm_movemask_epi8(input2);
- /* bit wise reversal on 16 bit word */
- ht_data[0] = bitreverse[(result_int & 0xff)];
- ht_data[1] = bitreverse[((result_int >> 8) & 0xff)];
-}
-
-/* Not so fussy on its alignment */
-static void
-threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
-{
- __m128i input1;
- __m128i input2;
- int result_int;
- byte *sse_data;
- const unsigned int mask1 = 0x80808080;
- __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
-
- sse_data = (byte*) &(result_int);
- /* Load */
- input1 = _mm_loadu_si128((const __m128i *)contone_ptr);
- input2 = _mm_loadu_si128((const __m128i *) thresh_ptr);
- /* Unsigned subtraction does Unsigned saturation so we
- have to use the signed operation */
- input1 = _mm_xor_si128(input1, sign_fix);
- input2 = _mm_xor_si128(input2, sign_fix);
- /* Subtract the two */
- input2 = _mm_subs_epi8(input1, input2);
- /* Grab the sign mask */
- result_int = _mm_movemask_epi8(input2);
- /* bit wise reversal on 16 bit word */
- ht_data[0] = bitreverse[sse_data[0]];
- ht_data[1] = bitreverse[sse_data[1]];
-}
-
-/* This uses SSE2 simd operations to perform the thresholding operation.
- Intrinsics are used since in-line assm is not supported in Visual
- Studio on 64 bit machines, plus instrinsics are easily ported between
- Visual Studio and gcc. requires <emmintrin.h> */
-static void
-threshold_row_SSE(byte *contone, byte *threshold_strip, int contone_stride,
- byte *halftone, int dithered_stride, int width,
- int num_rows, int offset_bits)
-{
- byte *contone_ptr;
- byte *thresh_ptr;
- byte *halftone_ptr;
- int num_tiles = (int) ceil((float) (width - offset_bits)/16.0);
- int k, j;
-
- for (j = 0; j < num_rows; j++) {
- /* contone and thresh_ptr are 128 bit aligned. We do need to do this in
- two steps to ensure that we pack the bits in an aligned fashion
- into halftone_ptr. */
- contone_ptr = contone;
- thresh_ptr = threshold_strip + contone_stride * j;
- halftone_ptr = halftone + dithered_stride * j;
- if (offset_bits > 0) {
- /* Since we allowed for 16 bits in our left remainder
- we can go directly in to the destination. threshold_16_SSE
- requires 128 bit alignment. contone_ptr and thresh_ptr
- are set up so that after we move in by offset_bits elements
- then we are 128 bit aligned. */
- threshold_16_SSE_unaligned(contone_ptr, thresh_ptr,
- halftone_ptr);
- halftone_ptr += 2;
- thresh_ptr += offset_bits;
- contone_ptr += offset_bits;
- }
- /* Now we should have 128 bit aligned with our input data. Iterate
- over sets of 16 going directly into our HT buffer. Sources and
- halftone_ptr buffers should be padded to allow 15 bit overrun */
- for (k = 0; k < num_tiles; k++) {
- threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
- thresh_ptr += 16;
- contone_ptr += 16;
- halftone_ptr += 2;
- }
- }
-}
-#endif
-
-/* This thresholds a buffer that is 16 wide by data_length tall */
-static void
-threshold_landscape(byte *contone_align, byte *thresh_align,
- ht_landscape_info_t ht_landscape, byte *halftone,
- int data_length)
-{
- __align16 byte contone[16];
- int position_start, position, curr_position;
- int *widths = &(ht_landscape.widths[0]);
- int local_widths[16];
- int num_contone = ht_landscape.num_contones;
- int k, j, w, contone_out_posit;
- byte *contone_ptr, *thresh_ptr, *halftone_ptr;
-
- /* Work through chunks of 16. */
- /* Data may have come in left to right or right to left. */
- if (ht_landscape.index > 0) {
- position = position_start = 0;
- } else {
- position = position_start = ht_landscape.curr_pos + 1;
- }
- thresh_ptr = thresh_align;
- halftone_ptr = halftone;
- /* Copy the widths to a local array, and truncate the last one (which may
- * be the first one!) if required. */
- k = 0;
- for (j = 0; j < num_contone; j++)
- k += (local_widths[j] = widths[position_start+j]);
- if (k > 16) {
- if (ht_landscape.index > 0) {
- local_widths[num_contone-1] -= k-16;
- } else {
- local_widths[0] -= k-16;
- }
- }
- for (k = data_length; k > 0; k--) { /* Loop on rows */
- contone_ptr = &(contone_align[position]); /* Point us to our row start */
- curr_position = 0; /* We use this in keeping track of widths */
- contone_out_posit = 0; /* Our index out */
- for (j = num_contone; j > 0; j--) {
- byte c = *contone_ptr;
- for (w = local_widths[curr_position]; w > 0; w--) {
- contone[contone_out_posit] = c;
- contone_out_posit++;
- }
- curr_position++; /* Move us to the next position in our width array */
- contone_ptr++; /* Move us to a new location in our contone buffer */
- }
- /* Now we have our left justified and expanded contone data for a single
- set of 16. Go ahead and threshold these */
-#ifdef HAVE_SSE2
- threshold_16_SSE(&(contone[0]), thresh_ptr, halftone_ptr);
-#else
- threshold_16_bit(&(contone[0]), thresh_ptr, halftone_ptr);
-#endif
- thresh_ptr += 16;
- position += 16;
- halftone_ptr += 2;
- }
-}
/* If we are in here, we had data left over. Move it to the proper position
and get ht_landscape_info_t set properly */
@@ -1599,7 +1286,7 @@ flush:
}
/* Apply the threshold operation */
#if RAW_HT_DUMP
- threshold_row_byte(contone_align, thresh_align, contone_stride,
+ gx_ht_threshold_row_byte(contone_align, thresh_align, contone_stride,
halftone, dithered_stride, dest_width, vdi);
sprintf(file_name,"HT_Portrait_%d_%dx%dx%d.raw", penum->id, dest_width,
dest_height, spp_out);
@@ -1607,15 +1294,9 @@ flush:
fwrite(halftone,1,dest_width * vdi,fid);
fclose(fid);
#else
-#ifdef HAVE_SSE2
- threshold_row_SSE(contone_align, thresh_align, contone_stride,
+ gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride,
halftone, dithered_stride, dest_width, vdi,
offset_bits);
-#else
- threshold_row_bit(contone_align, thresh_align, contone_stride,
- halftone, dithered_stride, dest_width, vdi,
- offset_bits);
-#endif
/* Now do the copy mono operation */
/* First the left remainder bits */
if (offset_bits > 0) {
@@ -1712,7 +1393,7 @@ flush:
memcpy(ptr_out, thresh_align, 16 * tile_remainder);
}
/* Apply the threshold operation */
- threshold_landscape(contone_align, thresh_align,
+ gx_ht_threshold_landscape(contone_align, thresh_align,
penum->ht_landscape, halftone, data_length);
/* Perform the copy mono */
penum->ht_landscape.offset_set = false;
diff --git a/gs/base/lib.mak b/gs/base/lib.mak
index c0ad2e749..2a9b4663d 100644
--- a/gs/base/lib.mak
+++ b/gs/base/lib.mak
@@ -458,6 +458,7 @@ gxcspace_h=$(GLSRC)gxcspace.h\
$(gscspace_h) $(gsccolor_h) $(gscsel_h) $(gxfrac_h) $(gxcindex_h)
gxht_h=$(GLSRC)gxht.h $(gsht1_h) $(gsrefct_h) $(gxhttype_h) $(gxtmap_h) $(gscspace_h)
gxcie_h=$(GLSRC)gxcie.h $(gscie_h)
+gxht_thresh_h=$(GLSRC)gxht_thresh.h
gxpcolor_h=$(GLSRC)gxpcolor.h\
$(gspcolor_h) $(gxcspace_h) $(gxdevice_h) $(gxdevmem_h) $(gxpcache_h) $(gxblend_h)\
$(gxcpath_h) $(gxdcolor_h) $(gxiclass_h)
@@ -650,6 +651,10 @@ $(GLOBJ)gxhtbit.$(OBJ) : $(GLSRC)gxhtbit.c $(GXERR) $(memory__h)\
$(gxbitmap_h) $(gxdht_h) $(gxdhtres_h) $(gxhttile_h) $(gxtmap_h)
$(GLCC) $(GLO_)gxhtbit.$(OBJ) $(C_) $(GLSRC)gxhtbit.c
+$(GLOBJ)gxht_thresh.$(OBJ) : $(GLSRC)gxht_thresh.c $(GXERR) $(memory__h)\
+ $(gx_h) $(gsiparam_h) $(gxht_thresh_h) $(math__h)
+ $(GLCC) $(GLO_)gxht_thresh.$(OBJ) $(C_) $(GLSRC)gxht_thresh.c
+
$(GLOBJ)gxwts.$(OBJ) : $(GLSRC)gxwts.c $(GXERR) $(gxwts_h)\
$(stdpre_h) $(memory__h) $(gxstate_h) $(gxht_h) $(math__h) $(gxdevcli_h)\
$(gxdht_h)
@@ -685,7 +690,7 @@ $(GLOBJ)gximono.$(OBJ) : $(GLSRC)gximono.c $(GXERR) $(memory__h) $(gpcheck_h)\
$(gxarith_h) $(gxcmap_h) $(gxcpath_h) $(gxdcolor_h) $(gxdevice_h)\
$(gxdevmem_h) $(gxfixed_h) $(gximage_h) $(gxistate_h) $(gxmatrix_h)\
$(gzht_h) $(vdtrace_h) $(gsicc_h) $(gsicc_cache_h) $(gsicc_littlecms_h)\
- $(gxcie_h) $(gscie_h)
+ $(gxcie_h) $(gscie_h) $(gxht_thresh_h)
$(GLCC) $(GLO_)gximono.$(OBJ) $(C_) $(GLSRC)gximono.c
$(GLOBJ)gximask.$(OBJ) : $(GLSRC)gximask.c $(GXERR) $(memory__h) $(gserrors_h)\
@@ -1131,7 +1136,8 @@ LIB1x=$(GLOBJ)gxacpath.$(OBJ) $(GLOBJ)gxbcache.$(OBJ) $(GLOBJ)gxccache.$(OBJ)
LIB2x=$(GLOBJ)gxccman.$(OBJ) $(GLOBJ)gxchar.$(OBJ) $(GLOBJ)gxcht.$(OBJ)
LIB3x=$(GLOBJ)gxclip.$(OBJ) $(GLOBJ)gxcmap.$(OBJ) $(GLOBJ)gxcpath.$(OBJ)
LIB4x=$(GLOBJ)gxdcconv.$(OBJ) $(GLOBJ)gxdcolor.$(OBJ) $(GLOBJ)gxhldevc.$(OBJ)
-LIB5x=$(GLOBJ)gxfill.$(OBJ) $(GLOBJ)gxfdrop.$(OBJ) $(GLOBJ)gxht.$(OBJ) $(GLOBJ)gxhtbit.$(OBJ)
+LIB5x=$(GLOBJ)gxfill.$(OBJ) $(GLOBJ)gxfdrop.$(OBJ) $(GLOBJ)gxht.$(OBJ) $(GLOBJ)gxhtbit.$(OBJ)\
+ $(GLOBJ)gxht_thresh.$(OBJ)
LIB6x=$(GLOBJ)gxwts.$(OBJ) $(GLOBJ)gxidata.$(OBJ) $(GLOBJ)gxifast.$(OBJ) $(GLOBJ)gximage.$(OBJ)
LIB7x=$(GLOBJ)gximage1.$(OBJ) $(GLOBJ)gximono.$(OBJ) $(GLOBJ)gxipixel.$(OBJ) $(GLOBJ)gximask.$(OBJ)
LIB8x=$(GLOBJ)gxi12bit.$(OBJ) $(GLOBJ)gxi16bit.$(OBJ) $(GLOBJ)gxiscale.$(OBJ) $(GLOBJ)gxpaint.$(OBJ) $(GLOBJ)gxpath.$(OBJ) $(GLOBJ)gxpath2.$(OBJ)