summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Lillqvist <tml@collabora.com>2014-04-14 14:31:25 +0300
committerTor Lillqvist <tml@collabora.com>2014-04-14 17:21:19 +0300
commit0b6fb1f45a179e94bb39f49dd8f245812d753113 (patch)
tree5e697160e3c5e1cfa4a11057b2dd72c77f49f83d
parentc0f2dc1347cfbc121408959531998b217cd1c617 (diff)
Put the dict_ja and _zh data in files instead of code for iOS
Map the file(s) into memory on demand. The executable file of an app needs to be as small as possible. Including additional data files in an app bundle is fine. Change-Id: Ife9bfe99a2cf0473d459f38f50dfa3304b39e282
-rw-r--r--i18npool/CustomTarget_breakiterator.mk16
-rw-r--r--i18npool/Executable_gendict.mk8
-rw-r--r--i18npool/Library_i18npool.mk6
-rw-r--r--i18npool/README15
-rw-r--r--i18npool/source/breakiterator/gendict.cxx111
-rw-r--r--i18npool/source/breakiterator/xdictionary.cxx76
-rw-r--r--ios/CustomTarget_TiledLibreOffice_app.mk3
7 files changed, 196 insertions, 39 deletions
diff --git a/i18npool/CustomTarget_breakiterator.mk b/i18npool/CustomTarget_breakiterator.mk
index dbe6ab223531..fb7f0d97ae08 100644
--- a/i18npool/CustomTarget_breakiterator.mk
+++ b/i18npool/CustomTarget_breakiterator.mk
@@ -11,6 +11,20 @@ $(eval $(call gb_CustomTarget_CustomTarget,i18npool/breakiterator))
i18npool_BIDIR := $(call gb_CustomTarget_get_workdir,i18npool/breakiterator)
+ifeq ($(OS),IOS)
+
+$(call gb_CustomTarget_get_target,i18npool/breakiterator) : \
+ $(i18npool_BIDIR)/dict_ja.data $(i18npool_BIDIR)/dict_zh.data $(i18npool_BIDIR)/OpenOffice_dat.c
+
+$(i18npool_BIDIR)/dict_%.data : \
+ $(SRCDIR)/i18npool/source/breakiterator/data/%.dic \
+ $(call gb_Executable_get_runtime_dependencies,gendict) \
+ | $(i18npool_BIDIR)/.dir
+ $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),$(true),DIC,1)
+ $(call gb_Helper_abbreviate_dirs,\
+ $(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@))
+else
+
$(call gb_CustomTarget_get_target,i18npool/breakiterator) : \
$(i18npool_BIDIR)/dict_ja.cxx $(i18npool_BIDIR)/dict_zh.cxx $(i18npool_BIDIR)/OpenOffice_dat.c
@@ -22,6 +36,8 @@ $(i18npool_BIDIR)/dict_%.cxx : \
$(call gb_Helper_abbreviate_dirs,\
$(call gb_Helper_execute,gendict) $< $@ $(patsubst $(i18npool_BIDIR)/dict_%.cxx,%,$@))
+endif
+
i18npool_BRKTXTS := \
char_in.brk \
char.brk \
diff --git a/i18npool/Executable_gendict.mk b/i18npool/Executable_gendict.mk
index a738f64e0666..69230cf52c07 100644
--- a/i18npool/Executable_gendict.mk
+++ b/i18npool/Executable_gendict.mk
@@ -14,6 +14,14 @@ $(eval $(call gb_Executable_use_libraries,gendict,\
$(gb_UWINAPI) \
))
+ifeq ($(gb_Side),build)
+ifneq ($(shell grep OS=IOS $(BUILDDIR)/config_host.mk),)
+$(eval $(call gb_Executable_add_cxxflags,gendict,\
+ -DDICT_JA_ZH_IN_DATAFILE \
+))
+endif
+endif
+
$(eval $(call gb_Executable_add_exception_objects,gendict,\
i18npool/source/breakiterator/gendict \
))
diff --git a/i18npool/Library_i18npool.mk b/i18npool/Library_i18npool.mk
index 4f9e5999a17a..c095fa8cf3ed 100644
--- a/i18npool/Library_i18npool.mk
+++ b/i18npool/Library_i18npool.mk
@@ -35,6 +35,12 @@ $(eval $(call gb_Library_use_externals,i18npool,\
icu_headers \
))
+ifeq ($(OS),IOS)
+$(eval $(call gb_Library_add_cxxflags,i18npool,\
+ -DDICT_JA_ZH_IN_DATAFILE \
+))
+endif
+
$(eval $(call gb_Library_add_exception_objects,i18npool,\
i18npool/source/breakiterator/breakiterator_cjk \
i18npool/source/breakiterator/breakiterator_ctl \
diff --git a/i18npool/README b/i18npool/README
index 86f72d1babe2..9e1a04f3f329 100644
--- a/i18npool/README
+++ b/i18npool/README
@@ -1,6 +1,19 @@
Internationalisation (i18npool) framework ensures that the suite is adaptable to the requirements of different
-native languages, their local settings and customs, etc without source code modification.
+native languages, their local settings and customs, etc without source code modification. (Wow, that is such marketing-speak...)
Specifically for locale data documentation please see i18npool/source/localedata/data/locale.dtd
See also [http://wiki.documentfoundation.org/Category:I18n]
+
+On iOS we put the largest data generated here, the dict_ja and dict_zh
+stuff, into separate files and not into code to keep the size of an
+app binary down. Temporary test code:
+
+ static bool beenhere = false;
+ if (!beenhere) {
+ beenhere = true;
+ uno::Reference< uno::XComponentContext > xComponentContext(::cppu::defaultBootstrap_InitialComponentContext());
+ uno::Reference< lang::XMultiComponentFactory > xMultiComponentFactoryClient( xComponentContext->getServiceManager() );
+ uno::Reference< uno::XInterface > xInterface =
+ xMultiComponentFactoryClient->createInstanceWithContext( "com.sun.star.i18n.BreakIterator_ja", xComponentContext );
+ }
diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index eac6998929a3..c0f1e8ecd577 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -17,7 +17,6 @@
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
-
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -32,6 +31,22 @@ using std::vector;
using namespace ::rtl;
+// For iOS, where we must strive for a minimal executable size, we
+// keep the data produced by this utility not as large const tables in
+// source code but instead as separate data files, to be bundled with
+// an app, and mmapped in at run time.
+
+// To test this easier on a desktop OS, just make sure
+// DICT_JA_ZH_IN_DATAFILE is defined when building i18npool.
+
+#ifdef DICT_JA_ZH_IN_DATAFILE
+static sal_Int64 dataAreaOffset = 0;
+static sal_Int64 lenArrayOffset = 0;
+static sal_Int64 index1Offset = 0;
+static sal_Int64 index2Offset = 0;
+static sal_Int64 existMarkOffset = 0;
+#endif
+
/* Utility gendict:
"BreakIterator_CJK provides input string caching and dictionary searching for
@@ -60,12 +75,17 @@ static inline void set_exists(sal_uInt32 index)
static inline void printIncludes(FILE* source_fp)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", source_fp);
fputs("#include <sal/types.h>\n\n", source_fp);
+#else
+ (void) source_fp;
+#endif
}
static inline void printFunctions(FILE* source_fp, const char *lang)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("#ifndef DISABLE_DYNLOADING\n", source_fp);
fputs ("SAL_DLLPUBLIC_EXPORT const sal_uInt8* getExistMark() { return existMark; }\n", source_fp);
fputs ("SAL_DLLPUBLIC_EXPORT const sal_Int16* getIndex1() { return index1; }\n", source_fp);
@@ -79,12 +99,20 @@ static inline void printFunctions(FILE* source_fp, const char *lang)
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Int32* getLenArray_%s() { return lenArray; }\n", lang);
fprintf (source_fp, "SAL_DLLPUBLIC_EXPORT const sal_Unicode* getDataArea_%s() { return dataArea; }\n", lang);
fputs ("#endif\n", source_fp);
+#else
+ (void) source_fp;
+ (void) lang;
+#endif
}
static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& lenArray)
{
// generate main dict. data array
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp);
+#else
+ dataAreaOffset = ftell(source_fp);
+#endif
sal_Char str[1024];
sal_uInt32 lenArrayCurr = 0;
sal_Unicode current = 0;
@@ -114,28 +142,47 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
// first character is stored in charArray, so start from second
for (i = 1; i < len; i++, lenArrayCurr++) {
set_exists(u[i]);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%04x, ", u[i]);
if ((lenArrayCurr & 0x0f) == 0x0f)
fputs("\n\t", source_fp);
+#else
+ fwrite(&u[i], sizeof(u[i]), 1, source_fp);
+#endif
}
}
lenArray.push_back( lenArrayCurr ); // store last ending pointer
charArray[current+1] = lenArray.size();
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp);
+#endif
}
static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
+#else
+ lenArrayOffset = ftell(source_fp);
+ sal_uInt32 zero(0);
+ fwrite(&zero, sizeof(zero), 1, source_fp);
+#endif
for (size_t k = 0; k < lenArray.size(); k++)
{
if( !(k & 0xf) )
fputs("\n\t", source_fp);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
+#else
+ fwrite(&lenArray[k], sizeof(lenArray[k]), 1, source_fp);
+#endif
}
+
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp );
+#endif
}
/* FIXME?: what happens if in every range i there is at least one charArray != 0
@@ -143,23 +190,40 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenA
=> then in index2, the last range will be ignored incorrectly */
static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
+#else
+ index1Offset = ftell(source_fp);
+#endif
+
sal_Int16 count = 0;
for (sal_Int32 i = 0; i < 0x100; i++) {
sal_Int32 j = 0;
while( j < 0x100 && charArray[(i<<8) + j] == 0)
j++;
- fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
+ set[i] = (j < 0x100 ? count++ : 0xff);
+#ifndef DICT_JA_ZH_IN_DATAFILE
+ fprintf(source_fp, "0x%02x, ", set[i]);
if ((i & 0x0f) == 0x0f)
fputs ("\n\t", source_fp);
+#else
+ fwrite(&set[i], sizeof(set[i]), 1, source_fp);
+#endif
}
+
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("};\n", source_fp);
+#endif
}
static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("static const sal_Int32 index2[] = {\n\t", source_fp);
+#else
+ index2Offset = ftell(source_fp);
+#endif
sal_Int32 prev = 0;
for (sal_Int32 i = 0; i < 0x100; i++) {
if (set[i] != 0xff) {
@@ -170,28 +234,48 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
k++;
prev = charArray[(i<<8) + j];
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(k < 0x10000 ? charArray[k] + 1 : 0));
if ((j & 0x0f) == 0x0f)
fputs ("\n\t", source_fp);
+#else
+ sal_uInt32 n = (k < 0x10000 ? charArray[k] + 1 : 0);
+ fwrite(&n, sizeof(n), 1, source_fp);
+#endif
}
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("\n\t", source_fp);
+#endif
}
}
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs ("\n};\n", source_fp);
+#endif
}
/* Generates a bitmask for the existance of sal_Unicode values in dictionary;
it packs 8 sal_Bool values in 1 sal_uInt8 */
static inline void printExistsMask(FILE *source_fp)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
+#else
+ existMarkOffset = ftell(source_fp);
+#endif
for (unsigned int i = 0; i < 0x2000; i++)
{
+#ifndef DICT_JA_ZH_IN_DATAFILE
fprintf(source_fp, "0x%02x, ", exists[i]);
if ( (i & 0xf) == 0xf )
fputs("\n\t", source_fp);
+#else
+ fwrite(&exists[i], sizeof(exists[i]), 1, source_fp);
+#endif
}
+
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("\n};\n", source_fp);
+#endif
}
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
@@ -228,14 +312,25 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
sal_Int16 set[0x100];
printIncludes(source_fp);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("extern \"C\" {\n", source_fp);
- printDataArea(dictionary_fp, source_fp, lenArray);
- printLenArray(source_fp, lenArray);
- printIndex1(source_fp, set);
- printIndex2(source_fp, set);
- printExistsMask(source_fp);
- printFunctions(source_fp, argv[3]);
+#endif
+ printDataArea(dictionary_fp, source_fp, lenArray);
+ printLenArray(source_fp, lenArray);
+ printIndex1(source_fp, set);
+ printIndex2(source_fp, set);
+ printExistsMask(source_fp);
+ printFunctions(source_fp, argv[3]);
+#ifndef DICT_JA_ZH_IN_DATAFILE
fputs("}\n", source_fp);
+#else
+ // Put pointers to the tables at the end of the file...
+ fwrite(&dataAreaOffset, sizeof(dataAreaOffset), 1, source_fp);
+ fwrite(&lenArrayOffset, sizeof(lenArrayOffset), 1, source_fp);
+ fwrite(&index1Offset, sizeof(index1Offset), 1, source_fp);
+ fwrite(&index2Offset, sizeof(index2Offset), 1, source_fp);
+ fwrite(&existMarkOffset, sizeof(existMarkOffset), 1, source_fp);
+#endif
fclose(dictionary_fp);
fclose(source_fp);
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx
index 4cb51f9b5439..390b2cd7dcc4 100644
--- a/i18npool/source/breakiterator/xdictionary.cxx
+++ b/i18npool/source/breakiterator/xdictionary.cxx
@@ -17,28 +17,22 @@
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
+#include <config_folders.h>
-// xdictionary.cpp: implementation of the xdictionary class.
-
-
-
-
+#include <osl/file.h>
#include <rtl/ustrbuf.hxx>
-
+#include <rtl/bootstrap.hxx>
#include <com/sun/star/i18n/WordType.hpp>
#include <xdictionary.hxx>
#include <unicode/uchar.h>
#include <string.h>
#include <breakiteratorImpl.hxx>
-
-// Construction/Destruction
-
-
-
namespace com { namespace sun { namespace star { namespace i18n {
-#ifndef DISABLE_DYNLOADING
+#ifdef DICT_JA_ZH_IN_DATAFILE
+
+#elif !defined DISABLE_DYNLOADING
extern "C" { static void SAL_CALL thisModule() {} }
@@ -74,8 +68,44 @@ xdictionary::xdictionary(const sal_Char *lang) :
boundary(),
japaneseWordBreak( sal_False )
{
- index1 = 0;
-#ifndef DISABLE_DYNLOADING
+ existMark = NULL;
+ index1 = NULL;
+ index2 = NULL;
+ lenArray = NULL;
+ dataArea = NULL;
+
+#ifdef DICT_JA_ZH_IN_DATAFILE
+
+ if( strcmp( lang, "ja" ) == 0 || strcmp( lang, "zh" ) == 0 )
+ {
+ OUString sUrl( "$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/dict_" );
+ rtl::Bootstrap::expandMacros(sUrl);
+
+ if( strcmp( lang, "ja" ) == 0 )
+ sUrl += "ja.data";
+ else if( strcmp( lang, "zh" ) == 0 )
+ sUrl += "zh.data";
+
+ oslFileHandle aFileHandle;
+ sal_uInt64 nFileSize;
+ char *pMapping;
+ if( osl_openFile( sUrl.pData, &aFileHandle, osl_File_OpenFlag_Read ) == osl_File_E_None &&
+ osl_getFileSize( aFileHandle, &nFileSize) == osl_File_E_None &&
+ osl_mapFile( aFileHandle, (void **) &pMapping, nFileSize, 0, osl_File_MapFlag_RandomAccess ) == osl_File_E_None )
+ {
+ // We have the offsets to the parts of the file at its end, see gendict.cxx
+ sal_Int64 *pEOF = (sal_Int64*)(pMapping + nFileSize);
+
+ existMark = (sal_uInt8*) (pMapping + pEOF[-1]);
+ index2 = (sal_Int32*) (pMapping + pEOF[-2]);
+ index1 = (sal_Int16*) (pMapping + pEOF[-3]);
+ lenArray = (sal_Int32*) (pMapping + pEOF[-4]);
+ dataArea = (sal_Unicode*) (pMapping + pEOF[-5]);
+ }
+ }
+
+#elif !defined DISABLE_DYNLOADING
+
#ifdef SAL_DLLPREFIX
OUStringBuffer aBuf( strlen(lang) + 7 + 6 ); // mostly "lib*.so" (with * == dict_zh)
aBuf.appendAscii( SAL_DLLPREFIX );
@@ -97,16 +127,9 @@ xdictionary::xdictionary(const sal_Char *lang) :
func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString("getDataArea").pData );
dataArea = (sal_Unicode*) (*func)();
}
- else
- {
- existMark = NULL;
- index1 = NULL;
- index2 = NULL;
- lenArray = NULL;
- dataArea = NULL;
- }
#else
+
if( strcmp( lang, "ja" ) == 0 ) {
existMark = getExistMark_ja();
index1 = getIndex1_ja();
@@ -121,14 +144,7 @@ xdictionary::xdictionary(const sal_Char *lang) :
lenArray = getLenArray_zh();
dataArea = getDataArea_zh();
}
- else
- {
- existMark = NULL;
- index1 = NULL;
- index2 = NULL;
- lenArray = NULL;
- dataArea = NULL;
- }
+
#endif
for (sal_Int32 i = 0; i < CACHE_MAX; i++)
diff --git a/ios/CustomTarget_TiledLibreOffice_app.mk b/ios/CustomTarget_TiledLibreOffice_app.mk
index 82a960c11bda..618b4cc980ed 100644
--- a/ios/CustomTarget_TiledLibreOffice_app.mk
+++ b/ios/CustomTarget_TiledLibreOffice_app.mk
@@ -59,6 +59,9 @@ TiledLibreOffice_setup:
mkdir -p $(TiledLibreOffice_resource)/share/config
cp -R $(INSTDIR)/share/config/soffice.cfg $(TiledLibreOffice_resource)/share/config
+ # Japanese and Chinese dict files
+ cp $(WORKDIR)/CustomTarget/i18npool/breakiterator/dict_*.data $(TiledLibreOffice_resource)/share
+
# "registry"
cp -R $(INSTDIR)/share/registry $(TiledLibreOffice_resource)/share