From ff071078ee5f13f0e9d430d6783444a631d232a0 Mon Sep 17 00:00:00 2001 From: Thorsten Behrens Date: Thu, 2 Nov 2023 18:17:50 +0100 Subject: reprobuild: don't write timestamps to clucene index files Our embedded clucene by default write a random current-time millisecond value into version fields, in an attempt to randomise. Clearly this is not needed for our static help, and it also prevents builds from being reproducible. Change-Id: I011388b5bc72b5d86bc1900f5439036ede60c020 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158845 Tested-by: Jenkins Reviewed-by: Thorsten Behrens --- RepositoryExternal.mk | 1 + external/clucene/UnpackedTarball_clucene.mk | 1 + .../clucene/patches/clucene-reprobuild.patch.1 | 61 ++++++++++++++++++++++ helpcompiler/source/HelpIndexer.cxx | 5 ++ 4 files changed, 68 insertions(+) create mode 100644 external/clucene/patches/clucene-reprobuild.patch.1 diff --git a/RepositoryExternal.mk b/RepositoryExternal.mk index 9659ec7c163f..da11a7d153f2 100644 --- a/RepositoryExternal.mk +++ b/RepositoryExternal.mk @@ -2847,6 +2847,7 @@ ifneq ($(SYSTEM_CLUCENE),) define gb_LinkTarget__use_clucene $(call gb_LinkTarget_add_defs,$(1),\ $(filter-out -I% -isystem%,$(subst -isystem /,-isystem/,$(CLUCENE_CFLAGS))) \ + -DSYSTEM_CLUCENE \ ) $(call gb_LinkTarget_set_include,$(1),\ diff --git a/external/clucene/UnpackedTarball_clucene.mk b/external/clucene/UnpackedTarball_clucene.mk index 0d7b2a5c288e..3acdb35b5757 100644 --- a/external/clucene/UnpackedTarball_clucene.mk +++ b/external/clucene/UnpackedTarball_clucene.mk @@ -55,6 +55,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,clucene,\ external/clucene/patches/binary_function.patch \ external/clucene/patches/clucene-pure-virtual.patch \ external/clucene/patches/enumarith.patch \ + external/clucene/patches/clucene-reprobuild.patch.1 \ )) ifneq ($(OS),WNT) diff --git a/external/clucene/patches/clucene-reprobuild.patch.1 b/external/clucene/patches/clucene-reprobuild.patch.1 new file mode 100644 index 000000000000..2c5a0b95135f --- /dev/null +++ b/external/clucene/patches/clucene-reprobuild.patch.1 @@ -0,0 +1,61 @@ +diff -ur clucene.org/src/core/CLucene/index/IndexWriter.cpp clucene/src/core/CLucene/index/IndexWriter.cpp +--- clucene.org/src/core/CLucene/index/IndexWriter.cpp 2023-11-02 17:31:00.110168174 +0100 ++++ clucene/src/core/CLucene/index/IndexWriter.cpp 2023-11-02 17:33:22.507665912 +0100 +@@ -366,6 +366,10 @@ + } + } + ++void IndexWriter::setSegmentInfoStartVersion(int64_t startVersion) { ++ this->segmentInfos->setStartVersion(startVersion); ++} ++ + int32_t IndexWriter::getMaxBufferedDocs() { + ensureOpen(); + return docWriter->getMaxBufferedDocs(); +diff -ur clucene.org/src/core/CLucene/index/IndexWriter.h clucene/src/core/CLucene/index/IndexWriter.h +--- clucene.org/src/core/CLucene/index/IndexWriter.h 2023-11-02 17:31:00.113501525 +0100 ++++ clucene/src/core/CLucene/index/IndexWriter.h 2023-11-02 17:33:43.547787510 +0100 +@@ -336,6 +336,12 @@ + int64_t getWriteLockTimeout(); + + /** ++ * Sets the 0th segmentinfo version. Default is current system time ++ * in milliseconds ++ */ ++ void setSegmentInfoStartVersion(int64_t startVersion); ++ ++ /** + * Sets the maximum time to wait for a commit lock (in milliseconds). + */ + void setCommitLockTimeout(int64_t commitLockTimeout); +diff -ur clucene.org/src/core/CLucene/index/SegmentInfos.cpp clucene/src/core/CLucene/index/SegmentInfos.cpp +--- clucene.org/src/core/CLucene/index/SegmentInfos.cpp 2023-11-02 17:31:00.110168174 +0100 ++++ clucene/src/core/CLucene/index/SegmentInfos.cpp 2023-11-02 18:04:43.855243418 +0100 +@@ -662,6 +662,10 @@ + return IndexFileNames::fileNameFromGeneration( IndexFileNames::SEGMENTS, "", nextGeneration ); + } + ++ void SegmentInfos::setStartVersion(int64_t version) { ++ this->version = version; ++ } ++ + void SegmentInfos::clearto(size_t from, size_t end){ + size_t range = end - from; + if ( (infos.size() - from) >= range) { // Make sure we actually need to remove +diff -ur clucene.org/src/core/CLucene/index/_SegmentInfos.h clucene/src/core/CLucene/index/_SegmentInfos.h +--- clucene.org/src/core/CLucene/index/_SegmentInfos.h 2023-11-02 17:31:00.106834824 +0100 ++++ clucene/src/core/CLucene/index/_SegmentInfos.h 2023-11-02 18:04:51.178598463 +0100 +@@ -347,6 +347,13 @@ + */ + std::string getNextSegmentFileName(); + ++ /** ++ * Set version value to start from ++ ++ Defaults to current time in milliseconds ++ */ ++ void setStartVersion(int64_t version); ++ + /* public vector-like operations */ + //delete and clears objects 'from' from to 'to' + void clearto(size_t to, size_t end); diff --git a/helpcompiler/source/HelpIndexer.cxx b/helpcompiler/source/HelpIndexer.cxx index 44506bddfc31..65e46743b482 100644 --- a/helpcompiler/source/HelpIndexer.cxx +++ b/helpcompiler/source/HelpIndexer.cxx @@ -106,6 +106,11 @@ bool HelpIndexer::indexDocuments() analyzer.get(), true); #endif +#ifndef SYSTEM_CLUCENE + // avoid random values in index file, making help indices reproducible + writer->setSegmentInfoStartVersion(0); +#endif + //Double limit of tokens allowed, otherwise we'll get a too-many-tokens //exception for ja help. Could alternative ignore the exception and get //truncated results as per java-Lucene apparently -- cgit v1.2.3