diff options
| author | Michael Vrhel <michael.vrhel@artifex.com> | 2011-03-04 06:45:29 +0000 |
|---|---|---|
| committer | Michael Vrhel <michael.vrhel@artifex.com> | 2011-03-04 06:45:29 +0000 |
| commit | fdc21fee6c1679b641d9f296fafac9c1a4fff19d (patch) | |
| tree | 67a0d3620f075ce3f0baca742ad394464e20050c /gs | |
| parent | bf07d980ff2349ca540f9e87fd99e10729815b08 (diff) | |
Reorganization of threshold code to move all the thresh holding operations into a new file.
git-svn-id: http://svn.ghostscript.com/ghostscript/trunk@12238 a1074d23-0009-0410-80fe-cf8c14f379e6
Diffstat (limited to 'gs')
| -rw-r--r-- | gs/base/gsiparam.h | 13 | ||||
| -rw-r--r-- | gs/base/gxht_thresh.c | 328 | ||||
| -rw-r--r-- | gs/base/gxht_thresh.h | 35 | ||||
| -rw-r--r-- | gs/base/gximage.h | 12 | ||||
| -rw-r--r-- | gs/base/gximono.c | 327 | ||||
| -rw-r--r-- | gs/base/lib.mak | 10 |
6 files changed, 388 insertions, 337 deletions
diff --git a/gs/base/gsiparam.h b/gs/base/gsiparam.h index f31f2477f..eb6eed3bb 100644 --- a/gs/base/gsiparam.h +++ b/gs/base/gsiparam.h @@ -288,6 +288,19 @@ void gs_image_t_init_mask_adjust(gs_image_t * pim, bool write_1s, #define gs_image_t_init_mask(pim, write_1s)\ gs_image_t_init_mask_adjust(pim, write_1s, true) +/* Used for bookkeeping ht buffer information in lanscape mode */ +typedef struct ht_landscape_info_s { + int count; + int widths[16]; + int xstart; + int curr_pos; + int index; + int num_contones; + bool offset_set; + bool flipy; + int y_pos; +} ht_landscape_info_t; + /****** REMAINDER OF FILE UNDER CONSTRUCTION. PROCEED AT YOUR OWN RISK. ******/ diff --git a/gs/base/gxht_thresh.c b/gs/base/gxht_thresh.c new file mode 100644 index 000000000..0deb156fe --- /dev/null +++ b/gs/base/gxht_thresh.c @@ -0,0 +1,328 @@ +/* Copyright (C) 2011-2012 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied, modified
+ or distributed except as expressly authorized under the terms of that
+ license. Refer to licensing information at http://www.artifex.com/
+ or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
+ San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/*$Id: gxhts_thresh.c $ */
+/* Halftone thresholding code */
+
+#include "memory_.h"
+#include "gx.h"
+#include "gsiparam.h"
+#include "gxht_thresh.h"
+#include "math_.h"
+
+#ifndef __WIN32__
+#define __align16 __attribute__((align(16)))
+#else
+#define __align16 __declspec(align(16))
+#endif
+
+#ifdef HAVE_SSE2
+
+#include <emmintrin.h>
+
+static const byte bitreverse[] =
+{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
+ 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+ 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
+ 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+ 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
+ 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+ 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
+ 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+ 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
+ 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+ 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
+ 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+ 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
+ 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+ 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
+ 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+ 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
+ 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+ 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
+ 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+ 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
+ 0x3F, 0xBF, 0x7F, 0xFF};
+#endif
+
+#if RAW_HT_DUMP
+/* This is slow thresholding, byte output for debug only */
+void
+gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride,
+ byte *halftone, int dithered_stride, int width,
+ int num_rows)
+{
+ int k, j;
+ byte *contone_ptr;
+ byte *thresh_ptr;
+ byte *halftone_ptr;
+
+ /* For the moment just do a very slow compare until we get
+ get this working */
+ for (j = 0; j < num_rows; j++) {
+ contone_ptr = contone;
+ thresh_ptr = threshold_strip + contone_stride * j;
+ halftone_ptr = halftone + dithered_stride * j;
+ for (k = 0; k < width; k++) {
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ halftone_ptr[k] = 0;
+ } else {
+ halftone_ptr[k] = 255;
+ }
+ }
+ }
+}
+#endif
+
+#ifndef HAVE_SSE2
+
+/* A simple case for use in the landscape mode. Could probably be coded up
+ faster */
+static void
+threshold_16_bit(byte *contone_ptr_in, byte *thresh_ptr_in, byte *ht_data)
+{
+ int k, j;
+ byte *contone_ptr = contone_ptr_in;
+ byte *thresh_ptr = thresh_ptr_in;
+ byte bit_init;
+
+ for (j = 0; j < 2; j++) {
+ bit_init = 0x80;
+ for (k = 0; k < 8; k++) {
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ ht_data[j] |= bit_init;
+ } else {
+ ht_data[j] &= ~bit_init;
+ }
+ bit_init >>= 1;
+ }
+ contone_ptr += 8;
+ thresh_ptr += 8;
+ }
+}
+#else
+/* Note this function has strict data alignment needs */
+static void
+threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
+{
+ __m128i input1;
+ __m128i input2;
+ register int result_int;
+ const unsigned int mask1 = 0x80808080;
+ __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
+
+ /* Load */
+ input1 = _mm_load_si128((const __m128i *)contone_ptr);
+ input2 = _mm_load_si128((const __m128i *) thresh_ptr);
+ /* Unsigned subtraction does Unsigned saturation so we
+ have to use the signed operation */
+ input1 = _mm_xor_si128(input1, sign_fix);
+ input2 = _mm_xor_si128(input2, sign_fix);
+ /* Subtract the two */
+ input2 = _mm_subs_epi8(input1, input2);
+ /* Grab the sign mask */
+ result_int = _mm_movemask_epi8(input2);
+ /* bit wise reversal on 16 bit word */
+ ht_data[0] = bitreverse[(result_int & 0xff)];
+ ht_data[1] = bitreverse[((result_int >> 8) & 0xff)];
+}
+
+/* Not so fussy on its alignment */
+static void
+threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data)
+{
+ __m128i input1;
+ __m128i input2;
+ int result_int;
+ byte *sse_data;
+ const unsigned int mask1 = 0x80808080;
+ __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1);
+
+ sse_data = (byte*) &(result_int);
+ /* Load */
+ input1 = _mm_loadu_si128((const __m128i *)contone_ptr);
+ input2 = _mm_loadu_si128((const __m128i *) thresh_ptr);
+ /* Unsigned subtraction does Unsigned saturation so we
+ have to use the signed operation */
+ input1 = _mm_xor_si128(input1, sign_fix);
+ input2 = _mm_xor_si128(input2, sign_fix);
+ /* Subtract the two */
+ input2 = _mm_subs_epi8(input1, input2);
+ /* Grab the sign mask */
+ result_int = _mm_movemask_epi8(input2);
+ /* bit wise reversal on 16 bit word */
+ ht_data[0] = bitreverse[sse_data[0]];
+ ht_data[1] = bitreverse[sse_data[1]];
+}
+#endif
+
+/* SSE2 and non-SSE2 implememntation of thresholding a row */
+void
+gx_ht_threshold_row_bit(byte *contone, byte *threshold_strip, int contone_stride,
+ byte *halftone, int dithered_stride, int width,
+ int num_rows, int offset_bits)
+{
+#ifndef HAVE_SSE2
+ int k, j;
+ byte *contone_ptr;
+ byte *thresh_ptr;
+ byte *halftone_ptr;
+ byte bit_init;
+ int ht_index;
+
+ /* For the moment just do a very slow compare until we get
+ get this working. This could use some serious optimization */
+ for (j = 0; j < num_rows; j++) {
+ contone_ptr = contone;
+ thresh_ptr = threshold_strip + contone_stride * j;
+ halftone_ptr = halftone + dithered_stride * j;
+ /* First get the left remainder portion. Put into MSBs of first byte */
+ bit_init = 0x80;
+ ht_index = -1;
+ for (k = 0; k < offset_bits; k++) {
+ if ( (k % 8) == 0) {
+ ht_index++;
+ }
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ halftone_ptr[ht_index] |= bit_init;
+ } else {
+ halftone_ptr[ht_index] &= ~bit_init;
+ }
+ if (bit_init == 1) {
+ bit_init = 0x80;
+ } else {
+ bit_init >>= 1;
+ }
+ }
+ bit_init = 0x80;
+ ht_index = -1;
+ if (offset_bits > 0) {
+ halftone_ptr += 2; /* Point to the next 16 bits of data */
+ }
+ /* Now get the rest, which will be 16 bit aligned. */
+ for (k = offset_bits; k < width; k++) {
+ if (((k - offset_bits) % 8) == 0) {
+ ht_index++;
+ }
+ if (contone_ptr[k] < thresh_ptr[k]) {
+ halftone_ptr[ht_index] |= bit_init;
+ } else {
+ halftone_ptr[ht_index] &= ~bit_init;
+ }
+ if (bit_init == 1) {
+ bit_init = 0x80;
+ } else {
+ bit_init >>= 1;
+ }
+ }
+ }
+#else
+ byte *contone_ptr;
+ byte *thresh_ptr;
+ byte *halftone_ptr;
+ int num_tiles = (int) ceil((float) (width - offset_bits)/16.0);
+ int k, j;
+
+ for (j = 0; j < num_rows; j++) {
+ /* contone and thresh_ptr are 128 bit aligned. We do need to do this in
+ two steps to ensure that we pack the bits in an aligned fashion
+ into halftone_ptr. */
+ contone_ptr = contone;
+ thresh_ptr = threshold_strip + contone_stride * j;
+ halftone_ptr = halftone + dithered_stride * j;
+ if (offset_bits > 0) {
+ /* Since we allowed for 16 bits in our left remainder
+ we can go directly in to the destination. threshold_16_SSE
+ requires 128 bit alignment. contone_ptr and thresh_ptr
+ are set up so that after we move in by offset_bits elements
+ then we are 128 bit aligned. */
+ threshold_16_SSE_unaligned(contone_ptr, thresh_ptr,
+ halftone_ptr);
+ halftone_ptr += 2;
+ thresh_ptr += offset_bits;
+ contone_ptr += offset_bits;
+ }
+ /* Now we should have 128 bit aligned with our input data. Iterate
+ over sets of 16 going directly into our HT buffer. Sources and
+ halftone_ptr buffers should be padded to allow 15 bit overrun */
+ for (k = 0; k < num_tiles; k++) {
+ threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr);
+ thresh_ptr += 16;
+ contone_ptr += 16;
+ halftone_ptr += 2;
+ }
+ }
+#endif
+}
+
+
+/* This thresholds a buffer that is 16 wide by data_length tall */
+void
+gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
+ ht_landscape_info_t ht_landscape, byte *halftone,
+ int data_length)
+{
+ __align16 byte contone[16];
+ int position_start, position, curr_position;
+ int *widths = &(ht_landscape.widths[0]);
+ int local_widths[16];
+ int num_contone = ht_landscape.num_contones;
+ int k, j, w, contone_out_posit;
+ byte *contone_ptr, *thresh_ptr, *halftone_ptr;
+
+ /* Work through chunks of 16. */
+ /* Data may have come in left to right or right to left. */
+ if (ht_landscape.index > 0) {
+ position = position_start = 0;
+ } else {
+ position = position_start = ht_landscape.curr_pos + 1;
+ }
+ thresh_ptr = thresh_align;
+ halftone_ptr = halftone;
+ /* Copy the widths to a local array, and truncate the last one (which may
+ * be the first one!) if required. */
+ k = 0;
+ for (j = 0; j < num_contone; j++)
+ k += (local_widths[j] = widths[position_start+j]);
+ if (k > 16) {
+ if (ht_landscape.index > 0) {
+ local_widths[num_contone-1] -= k-16;
+ } else {
+ local_widths[0] -= k-16;
+ }
+ }
+ for (k = data_length; k > 0; k--) { /* Loop on rows */
+ contone_ptr = &(contone_align[position]); /* Point us to our row start */
+ curr_position = 0; /* We use this in keeping track of widths */
+ contone_out_posit = 0; /* Our index out */
+ for (j = num_contone; j > 0; j--) {
+ byte c = *contone_ptr;
+ for (w = local_widths[curr_position]; w > 0; w--) {
+ contone[contone_out_posit] = c;
+ contone_out_posit++;
+ }
+ curr_position++; /* Move us to the next position in our width array */
+ contone_ptr++; /* Move us to a new location in our contone buffer */
+ }
+ /* Now we have our left justified and expanded contone data for a single
+ set of 16. Go ahead and threshold these */
+#ifdef HAVE_SSE2
+ threshold_16_SSE(&(contone[0]), thresh_ptr, halftone_ptr);
+#else
+ threshold_16_bit(&(contone[0]), thresh_ptr, halftone_ptr);
+#endif
+ thresh_ptr += 16;
+ position += 16;
+ halftone_ptr += 2;
+ }
+}
diff --git a/gs/base/gxht_thresh.h b/gs/base/gxht_thresh.h new file mode 100644 index 000000000..b7b6b6d40 --- /dev/null +++ b/gs/base/gxht_thresh.h @@ -0,0 +1,35 @@ +/* Copyright (C) 2001-2006 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied, modified
+ or distributed except as expressly authorized under the terms of that
+ license. Refer to licensing information at http://www.artifex.com/
+ or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
+ San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/* $Id: gsht_thresh.h $ */
+/* Threshhold based halftoning prototypes */
+
+#ifndef gsht_thresh_INCLUDED
+# define gsht_thresh_INCLUDED
+
+#define RAW_HT_DUMP 0
+
+#if RAW_HT_DUMP
+void gx_ht_threshold_row_byte(byte *contone, byte *threshold_strip,
+ int contone_stride, byte *halftone,
+ int dithered_stride, int width, int num_rows);
+#endif
+void gx_ht_threshold_row_bit(byte *contone, byte *threshold_strip,
+ int contone_stride, byte *halftone,
+ int dithered_stride, int width, int num_rows,
+ int offset_bits);
+void gx_ht_threshold_landscape(byte *contone_align, byte *thresh_align,
+ ht_landscape_info_t ht_landscape, byte *halftone,
+ int data_length);
+#endif /* gshtx_INCLUDED */
+
diff --git a/gs/base/gximage.h b/gs/base/gximage.h index bc7a2fa92..f086db3e9 100644 --- a/gs/base/gximage.h +++ b/gs/base/gximage.h @@ -88,18 +88,6 @@ struct sample_map_s { bool inverted; }; -/* Used for bookkeeping ht buffer information in lanscape mode */ -typedef struct ht_landscape_info_s { - int count; - int widths[16]; - int xstart; - int curr_pos; - int index; - int num_contones; - bool offset_set; - bool flipy; - int y_pos; -} ht_landscape_info_t; #ifndef sample_map_DEFINED #define sample_map_DEFINED diff --git a/gs/base/gximono.c b/gs/base/gximono.c index 8cf9ced57..fbd46f471 100644 --- a/gs/base/gximono.c +++ b/gs/base/gximono.c @@ -38,47 +38,11 @@ #include "gsicc_littlecms.h" #include "gxcie.h" #include "gscie.h" +#include "gxht_thresh.h" -#define RAW_HT_DUMP 0 #define USE_FAST_CODE 1 #define fastfloor(x) (((int)(x)) - (((x)<0) && ((x) != (float)(int)(x)))) -/* This should be moved someplace else later */ -#ifndef __WIN32__ -#define __align16 __attribute__((align(16))) -#else -#define __align16 __declspec(align(16)) -#endif - -#ifdef HAVE_SSE2 - -#include <emmintrin.h> - -static const byte bitreverse[] = -{ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, - 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, - 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, - 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, - 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, - 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, - 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, - 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, - 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, - 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, - 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, - 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, - 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, - 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, - 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, - 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, - 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, - 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, - 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, - 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, - 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, - 0x3F, 0xBF, 0x7F, 0xFF}; -#endif - /* ------ Strategy procedure ------ */ /* Check the prototype. */ @@ -942,283 +906,6 @@ fill_threshhold_buffer(byte *dest_strip, byte *src_strip, int src_width, memcpy(ptr_out_temp, src_strip, right_width); } -#if RAW_HT_DUMP -/* This is slow thresholding, byte output for debug only */ -static void -threshold_row_byte(byte *contone, byte *threshold_strip, int contone_stride, - byte *halftone, int dithered_stride, int width, - int num_rows) -{ - int k, j; - byte *contone_ptr; - byte *thresh_ptr; - byte *halftone_ptr; - - /* For the moment just do a very slow compare until we get - get this working */ - for (j = 0; j < num_rows; j++) { - contone_ptr = contone; - thresh_ptr = threshold_strip + contone_stride * j; - halftone_ptr = halftone + dithered_stride * j; - for (k = 0; k < width; k++) { - if (contone_ptr[k] < thresh_ptr[k]) { - halftone_ptr[k] = 0; - } else { - halftone_ptr[k] = 255; - } - } - } -} -#endif - -#ifndef HAVE_SSE2 -/* This is slow thresholding bit output */ -static void -threshold_row_bit(byte *contone, byte *threshold_strip, int contone_stride, - byte *halftone, int dithered_stride, int width, - int num_rows, int offset_bits) -{ - int k, j; - byte *contone_ptr; - byte *thresh_ptr; - byte *halftone_ptr; - byte bit_init; - int ht_index; - - /* For the moment just do a very slow compare until we get - get this working. This could use some serious optimization */ - for (j = 0; j < num_rows; j++) { - contone_ptr = contone; - thresh_ptr = threshold_strip + contone_stride * j; - halftone_ptr = halftone + dithered_stride * j; - /* First get the left remainder portion. Put into MSBs of first byte */ - bit_init = 0x80; - ht_index = -1; - for (k = 0; k < offset_bits; k++) { - if ( (k % 8) == 0) { - ht_index++; - } - if (contone_ptr[k] < thresh_ptr[k]) { - halftone_ptr[ht_index] |= bit_init; - } else { - halftone_ptr[ht_index] &= ~bit_init; - } - if (bit_init == 1) { - bit_init = 0x80; - } else { - bit_init >>= 1; - } - } - bit_init = 0x80; - ht_index = -1; - if (offset_bits > 0) { - halftone_ptr += 2; /* Point to the next 16 bits of data */ - } - /* Now get the rest, which will be 16 bit aligned. */ - for (k = offset_bits; k < width; k++) { - if (((k - offset_bits) % 8) == 0) { - ht_index++; - } - if (contone_ptr[k] < thresh_ptr[k]) { - halftone_ptr[ht_index] |= bit_init; - } else { - halftone_ptr[ht_index] &= ~bit_init; - } - if (bit_init == 1) { - bit_init = 0x80; - } else { - bit_init >>= 1; - } - } - } -} - -/* A simple case for use in the landscape mode. Could probably be coded up - faster */ -static void -threshold_16_bit(byte *contone_ptr_in, byte *thresh_ptr_in, byte *ht_data) -{ - int k, j; - byte *contone_ptr = contone_ptr_in; - byte *thresh_ptr = thresh_ptr_in; - byte bit_init; - - for (j = 0; j < 2; j++) { - bit_init = 0x80; - for (k = 0; k < 8; k++) { - if (contone_ptr[k] < thresh_ptr[k]) { - ht_data[j] |= bit_init; - } else { - ht_data[j] &= ~bit_init; - } - bit_init >>= 1; - } - contone_ptr += 8; - thresh_ptr += 8; - } -} -#else -/* Note this function has strict data alignment needs */ -static void -threshold_16_SSE(byte *contone_ptr, byte *thresh_ptr, byte *ht_data) -{ - __m128i input1; - __m128i input2; - register int result_int; - const unsigned int mask1 = 0x80808080; - __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1); - - /* Load */ - input1 = _mm_load_si128((const __m128i *)contone_ptr); - input2 = _mm_load_si128((const __m128i *) thresh_ptr); - /* Unsigned subtraction does Unsigned saturation so we - have to use the signed operation */ - input1 = _mm_xor_si128(input1, sign_fix); - input2 = _mm_xor_si128(input2, sign_fix); - /* Subtract the two */ - input2 = _mm_subs_epi8(input1, input2); - /* Grab the sign mask */ - result_int = _mm_movemask_epi8(input2); - /* bit wise reversal on 16 bit word */ - ht_data[0] = bitreverse[(result_int & 0xff)]; - ht_data[1] = bitreverse[((result_int >> 8) & 0xff)]; -} - -/* Not so fussy on its alignment */ -static void -threshold_16_SSE_unaligned(byte *contone_ptr, byte *thresh_ptr, byte *ht_data) -{ - __m128i input1; - __m128i input2; - int result_int; - byte *sse_data; - const unsigned int mask1 = 0x80808080; - __m128i sign_fix = _mm_set_epi32(mask1, mask1, mask1, mask1); - - sse_data = (byte*) &(result_int); - /* Load */ - input1 = _mm_loadu_si128((const __m128i *)contone_ptr); - input2 = _mm_loadu_si128((const __m128i *) thresh_ptr); - /* Unsigned subtraction does Unsigned saturation so we - have to use the signed operation */ - input1 = _mm_xor_si128(input1, sign_fix); - input2 = _mm_xor_si128(input2, sign_fix); - /* Subtract the two */ - input2 = _mm_subs_epi8(input1, input2); - /* Grab the sign mask */ - result_int = _mm_movemask_epi8(input2); - /* bit wise reversal on 16 bit word */ - ht_data[0] = bitreverse[sse_data[0]]; - ht_data[1] = bitreverse[sse_data[1]]; -} - -/* This uses SSE2 simd operations to perform the thresholding operation. - Intrinsics are used since in-line assm is not supported in Visual - Studio on 64 bit machines, plus instrinsics are easily ported between - Visual Studio and gcc. requires <emmintrin.h> */ -static void -threshold_row_SSE(byte *contone, byte *threshold_strip, int contone_stride, - byte *halftone, int dithered_stride, int width, - int num_rows, int offset_bits) -{ - byte *contone_ptr; - byte *thresh_ptr; - byte *halftone_ptr; - int num_tiles = (int) ceil((float) (width - offset_bits)/16.0); - int k, j; - - for (j = 0; j < num_rows; j++) { - /* contone and thresh_ptr are 128 bit aligned. We do need to do this in - two steps to ensure that we pack the bits in an aligned fashion - into halftone_ptr. */ - contone_ptr = contone; - thresh_ptr = threshold_strip + contone_stride * j; - halftone_ptr = halftone + dithered_stride * j; - if (offset_bits > 0) { - /* Since we allowed for 16 bits in our left remainder - we can go directly in to the destination. threshold_16_SSE - requires 128 bit alignment. contone_ptr and thresh_ptr - are set up so that after we move in by offset_bits elements - then we are 128 bit aligned. */ - threshold_16_SSE_unaligned(contone_ptr, thresh_ptr, - halftone_ptr); - halftone_ptr += 2; - thresh_ptr += offset_bits; - contone_ptr += offset_bits; - } - /* Now we should have 128 bit aligned with our input data. Iterate - over sets of 16 going directly into our HT buffer. Sources and - halftone_ptr buffers should be padded to allow 15 bit overrun */ - for (k = 0; k < num_tiles; k++) { - threshold_16_SSE(contone_ptr, thresh_ptr, halftone_ptr); - thresh_ptr += 16; - contone_ptr += 16; - halftone_ptr += 2; - } - } -} -#endif - -/* This thresholds a buffer that is 16 wide by data_length tall */ -static void -threshold_landscape(byte *contone_align, byte *thresh_align, - ht_landscape_info_t ht_landscape, byte *halftone, - int data_length) -{ - __align16 byte contone[16]; - int position_start, position, curr_position; - int *widths = &(ht_landscape.widths[0]); - int local_widths[16]; - int num_contone = ht_landscape.num_contones; - int k, j, w, contone_out_posit; - byte *contone_ptr, *thresh_ptr, *halftone_ptr; - - /* Work through chunks of 16. */ - /* Data may have come in left to right or right to left. */ - if (ht_landscape.index > 0) { - position = position_start = 0; - } else { - position = position_start = ht_landscape.curr_pos + 1; - } - thresh_ptr = thresh_align; - halftone_ptr = halftone; - /* Copy the widths to a local array, and truncate the last one (which may - * be the first one!) if required. */ - k = 0; - for (j = 0; j < num_contone; j++) - k += (local_widths[j] = widths[position_start+j]); - if (k > 16) { - if (ht_landscape.index > 0) { - local_widths[num_contone-1] -= k-16; - } else { - local_widths[0] -= k-16; - } - } - for (k = data_length; k > 0; k--) { /* Loop on rows */ - contone_ptr = &(contone_align[position]); /* Point us to our row start */ - curr_position = 0; /* We use this in keeping track of widths */ - contone_out_posit = 0; /* Our index out */ - for (j = num_contone; j > 0; j--) { - byte c = *contone_ptr; - for (w = local_widths[curr_position]; w > 0; w--) { - contone[contone_out_posit] = c; - contone_out_posit++; - } - curr_position++; /* Move us to the next position in our width array */ - contone_ptr++; /* Move us to a new location in our contone buffer */ - } - /* Now we have our left justified and expanded contone data for a single - set of 16. Go ahead and threshold these */ -#ifdef HAVE_SSE2 - threshold_16_SSE(&(contone[0]), thresh_ptr, halftone_ptr); -#else - threshold_16_bit(&(contone[0]), thresh_ptr, halftone_ptr); -#endif - thresh_ptr += 16; - position += 16; - halftone_ptr += 2; - } -} /* If we are in here, we had data left over. Move it to the proper position and get ht_landscape_info_t set properly */ @@ -1599,7 +1286,7 @@ flush: } /* Apply the threshold operation */ #if RAW_HT_DUMP - threshold_row_byte(contone_align, thresh_align, contone_stride, + gx_ht_threshold_row_byte(contone_align, thresh_align, contone_stride, halftone, dithered_stride, dest_width, vdi); sprintf(file_name,"HT_Portrait_%d_%dx%dx%d.raw", penum->id, dest_width, dest_height, spp_out); @@ -1607,15 +1294,9 @@ flush: fwrite(halftone,1,dest_width * vdi,fid); fclose(fid); #else -#ifdef HAVE_SSE2 - threshold_row_SSE(contone_align, thresh_align, contone_stride, + gx_ht_threshold_row_bit(contone_align, thresh_align, contone_stride, halftone, dithered_stride, dest_width, vdi, offset_bits); -#else - threshold_row_bit(contone_align, thresh_align, contone_stride, - halftone, dithered_stride, dest_width, vdi, - offset_bits); -#endif /* Now do the copy mono operation */ /* First the left remainder bits */ if (offset_bits > 0) { @@ -1712,7 +1393,7 @@ flush: memcpy(ptr_out, thresh_align, 16 * tile_remainder); } /* Apply the threshold operation */ - threshold_landscape(contone_align, thresh_align, + gx_ht_threshold_landscape(contone_align, thresh_align, penum->ht_landscape, halftone, data_length); /* Perform the copy mono */ penum->ht_landscape.offset_set = false; diff --git a/gs/base/lib.mak b/gs/base/lib.mak index c0ad2e749..2a9b4663d 100644 --- a/gs/base/lib.mak +++ b/gs/base/lib.mak @@ -458,6 +458,7 @@ gxcspace_h=$(GLSRC)gxcspace.h\ $(gscspace_h) $(gsccolor_h) $(gscsel_h) $(gxfrac_h) $(gxcindex_h) gxht_h=$(GLSRC)gxht.h $(gsht1_h) $(gsrefct_h) $(gxhttype_h) $(gxtmap_h) $(gscspace_h) gxcie_h=$(GLSRC)gxcie.h $(gscie_h) +gxht_thresh_h=$(GLSRC)gxht_thresh.h gxpcolor_h=$(GLSRC)gxpcolor.h\ $(gspcolor_h) $(gxcspace_h) $(gxdevice_h) $(gxdevmem_h) $(gxpcache_h) $(gxblend_h)\ $(gxcpath_h) $(gxdcolor_h) $(gxiclass_h) @@ -650,6 +651,10 @@ $(GLOBJ)gxhtbit.$(OBJ) : $(GLSRC)gxhtbit.c $(GXERR) $(memory__h)\ $(gxbitmap_h) $(gxdht_h) $(gxdhtres_h) $(gxhttile_h) $(gxtmap_h) $(GLCC) $(GLO_)gxhtbit.$(OBJ) $(C_) $(GLSRC)gxhtbit.c +$(GLOBJ)gxht_thresh.$(OBJ) : $(GLSRC)gxht_thresh.c $(GXERR) $(memory__h)\ + $(gx_h) $(gsiparam_h) $(gxht_thresh_h) $(math__h) + $(GLCC) $(GLO_)gxht_thresh.$(OBJ) $(C_) $(GLSRC)gxht_thresh.c + $(GLOBJ)gxwts.$(OBJ) : $(GLSRC)gxwts.c $(GXERR) $(gxwts_h)\ $(stdpre_h) $(memory__h) $(gxstate_h) $(gxht_h) $(math__h) $(gxdevcli_h)\ $(gxdht_h) @@ -685,7 +690,7 @@ $(GLOBJ)gximono.$(OBJ) : $(GLSRC)gximono.c $(GXERR) $(memory__h) $(gpcheck_h)\ $(gxarith_h) $(gxcmap_h) $(gxcpath_h) $(gxdcolor_h) $(gxdevice_h)\ $(gxdevmem_h) $(gxfixed_h) $(gximage_h) $(gxistate_h) $(gxmatrix_h)\ $(gzht_h) $(vdtrace_h) $(gsicc_h) $(gsicc_cache_h) $(gsicc_littlecms_h)\ - $(gxcie_h) $(gscie_h) + $(gxcie_h) $(gscie_h) $(gxht_thresh_h) $(GLCC) $(GLO_)gximono.$(OBJ) $(C_) $(GLSRC)gximono.c $(GLOBJ)gximask.$(OBJ) : $(GLSRC)gximask.c $(GXERR) $(memory__h) $(gserrors_h)\ @@ -1131,7 +1136,8 @@ LIB1x=$(GLOBJ)gxacpath.$(OBJ) $(GLOBJ)gxbcache.$(OBJ) $(GLOBJ)gxccache.$(OBJ) LIB2x=$(GLOBJ)gxccman.$(OBJ) $(GLOBJ)gxchar.$(OBJ) $(GLOBJ)gxcht.$(OBJ) LIB3x=$(GLOBJ)gxclip.$(OBJ) $(GLOBJ)gxcmap.$(OBJ) $(GLOBJ)gxcpath.$(OBJ) LIB4x=$(GLOBJ)gxdcconv.$(OBJ) $(GLOBJ)gxdcolor.$(OBJ) $(GLOBJ)gxhldevc.$(OBJ) -LIB5x=$(GLOBJ)gxfill.$(OBJ) $(GLOBJ)gxfdrop.$(OBJ) $(GLOBJ)gxht.$(OBJ) $(GLOBJ)gxhtbit.$(OBJ) +LIB5x=$(GLOBJ)gxfill.$(OBJ) $(GLOBJ)gxfdrop.$(OBJ) $(GLOBJ)gxht.$(OBJ) $(GLOBJ)gxhtbit.$(OBJ)\ + $(GLOBJ)gxht_thresh.$(OBJ) LIB6x=$(GLOBJ)gxwts.$(OBJ) $(GLOBJ)gxidata.$(OBJ) $(GLOBJ)gxifast.$(OBJ) $(GLOBJ)gximage.$(OBJ) LIB7x=$(GLOBJ)gximage1.$(OBJ) $(GLOBJ)gximono.$(OBJ) $(GLOBJ)gxipixel.$(OBJ) $(GLOBJ)gximask.$(OBJ) LIB8x=$(GLOBJ)gxi12bit.$(OBJ) $(GLOBJ)gxi16bit.$(OBJ) $(GLOBJ)gxiscale.$(OBJ) $(GLOBJ)gxpaint.$(OBJ) $(GLOBJ)gxpath.$(OBJ) $(GLOBJ)gxpath2.$(OBJ) |
