summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2021-07-28 17:31:56 +0200
committerEike Rathke <erack@redhat.com>2021-07-28 18:56:29 +0200
commit516318113f0bd2b3c658aba9b285165e63a280e2 (patch)
tree3e33e570b0d62b36afa95045999d115fe005d126
parent24b06b9c6bdb777dff385b0fbfc81d55d3d013a1 (diff)
Resolves: tdf#76310 Preserve whitespace TAB, CR, LF in formula expressions
Allowed whitespace in ODFF and OOXML are U+0020 SPACE U+0009 CHARACTER TABULATION U+000A LINE FEED U+000D CARRIAGE RETURN Line feed and carriage return look a bit funny in the Function Wizard if part of a function's argument but work. Once a formula is edited, CR are converted to LF though, probably already in EditEngine, didn't investigate. Change-Id: I6278f6be48872e0710a3d74212db391dda249ed2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/119635 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
-rw-r--r--formula/source/core/api/FormulaCompiler.cxx29
-rw-r--r--formula/source/core/api/token.cxx39
-rw-r--r--formula/source/ui/dlg/formula.cxx8
-rw-r--r--include/formula/compiler.hxx41
-rw-r--r--include/formula/opcode.hxx2
-rw-r--r--include/formula/token.hxx20
-rw-r--r--offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl24
-rw-r--r--sc/inc/compiler.hxx20
-rw-r--r--sc/source/core/tool/compiler.cxx78
-rw-r--r--sc/source/core/tool/parclass.cxx1
-rw-r--r--sc/source/core/tool/token.cxx18
-rw-r--r--sc/source/filter/excel/xeformula.cxx8
-rw-r--r--sc/source/filter/excel/xlformula.cxx5
-rw-r--r--sc/source/ui/app/inputhdl.cxx2
-rw-r--r--sc/source/ui/unoobj/tokenuno.cxx13
-rw-r--r--sc/source/ui/view/viewfunc.cxx2
16 files changed, 240 insertions, 70 deletions
diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx
index be5ce09d132f..f7174807f0f4 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -475,7 +475,8 @@ uno::Sequence< sheet::FormulaOpCodeMapEntry > FormulaCompiler::OpCodeMap::create
{ FormulaMapGroupSpecialOffset::DB_AREA , ocDBArea } ,
/* TODO: { FormulaMapGroupSpecialOffset::TABLE_REF , ocTableRef } , */
{ FormulaMapGroupSpecialOffset::MACRO , ocMacro } ,
- { FormulaMapGroupSpecialOffset::COL_ROW_NAME , ocColRowName }
+ { FormulaMapGroupSpecialOffset::COL_ROW_NAME , ocColRowName } ,
+ { FormulaMapGroupSpecialOffset::WHITESPACE , ocWhitespace }
};
const size_t nCount = SAL_N_ELEMENTS(aMap);
// Preallocate vector elements.
@@ -1267,14 +1268,18 @@ bool FormulaCompiler::GetToken()
nWasColRowName = 1;
else
nWasColRowName = 0;
+ OpCode eTmpOp;
mpToken = maArrIterator.Next();
- while( mpToken && mpToken->GetOpCode() == ocSpaces )
+ while (mpToken && ((eTmpOp = mpToken->GetOpCode()) == ocSpaces || eTmpOp == ocWhitespace))
{
- // For significant whitespace remember last ocSpaces token. Usually
- // there's only one even for multiple spaces.
- pSpacesToken = mpToken;
- if ( nWasColRowName )
- nWasColRowName++;
+ if (eTmpOp == ocSpaces)
+ {
+ // For significant whitespace remember last ocSpaces token.
+ // Usually there's only one even for multiple spaces.
+ pSpacesToken = mpToken;
+ if ( nWasColRowName )
+ nWasColRowName++;
+ }
if ( bAutoCorrect && !pStack )
CreateStringFromToken( aCorrectedFormula, mpToken.get() );
mpToken = maArrIterator.Next();
@@ -2272,10 +2277,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
if( bSpaces )
rBuffer.append( ' ');
- if( eOp == ocSpaces )
+ if (eOp == ocSpaces || eOp == ocWhitespace)
{
bool bWriteSpaces = true;
- if (mxSymbols->isODFF())
+ if (eOp == ocSpaces && mxSymbols->isODFF())
{
const FormulaToken* p = maArrIterator.PeekPrevNoSpaces();
bool bIntersectionOp = (p && p->GetOpCode() == ocColRowName);
@@ -2316,7 +2321,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
sal_uInt8 n = t->GetByte();
for ( sal_uInt8 j=0; j<n; ++j )
{
- rBuffer.append( ' ');
+ if (eOp == ocWhitespace)
+ rBuffer.append( t->GetChar());
+ else
+ rBuffer.append( ' ');
}
}
}
@@ -2403,6 +2411,7 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
case ocPush:
case ocRange:
case ocSpaces:
+ case ocWhitespace:
break;
default:
nLevel = 0;
diff --git a/formula/source/core/api/token.cxx b/formula/source/core/api/token.cxx
index 0af1f63f0e5e..c5b69acf2c90 100644
--- a/formula/source/core/api/token.cxx
+++ b/formula/source/core/api/token.cxx
@@ -244,6 +244,13 @@ void FormulaToken::SetSheet( sal_Int16 )
assert( !"virtual dummy called" );
}
+sal_Unicode FormulaToken::GetChar() const
+{
+ // This Get is worth an assert.
+ assert( !"virtual dummy called" );
+ return 0;
+}
+
short* FormulaToken::GetJump() const
{
SAL_WARN( "formula.core", "FormulaToken::GetJump: virtual dummy called" );
@@ -348,6 +355,15 @@ bool FormulaToken::TextEqual( const FormulaToken& rToken ) const
// real implementations of virtual functions
+sal_uInt8 FormulaSpaceToken::GetByte() const { return nByte; }
+sal_Unicode FormulaSpaceToken::GetChar() const { return cChar; }
+bool FormulaSpaceToken::operator==( const FormulaToken& r ) const
+{
+ return FormulaToken::operator==( r ) && nByte == r.GetByte() &&
+ cChar == r.GetChar();
+}
+
+
sal_uInt8 FormulaByteToken::GetByte() const { return nByte; }
void FormulaByteToken::SetByte( sal_uInt8 n ) { nByte = n; }
ParamClass FormulaByteToken::GetInForceArray() const { return eInForceArray; }
@@ -425,6 +441,13 @@ bool FormulaTokenArray::AddFormulaToken(
AddStringXML( aStrVal );
else if ( eOpCode == ocExternal || eOpCode == ocMacro )
Add( new formula::FormulaExternalToken( eOpCode, aStrVal ) );
+ else if ( eOpCode == ocWhitespace )
+ {
+ // Simply ignore empty string.
+ // Convention is one character repeated.
+ if (!aStrVal.isEmpty())
+ Add( new formula::FormulaSpaceToken( static_cast<sal_uInt8>(aStrVal.getLength()), aStrVal[0]));
+ }
else
bError = true; // unexpected string: don't know what to do with it
}
@@ -1472,17 +1495,21 @@ FormulaTokenArray * FormulaTokenArray::RewriteMissing( const MissingConvention &
return pNewArr;
}
+namespace {
+inline bool isWhitespace( OpCode eOp ) { return eOp == ocSpaces || eOp == ocWhitespace; }
+}
+
bool FormulaTokenArray::MayReferenceFollow()
{
if ( pCode && nLen > 0 )
{
// ignore trailing spaces
sal_uInt16 i = nLen - 1;
- while ( i > 0 && pCode[i]->GetOpCode() == SC_OPCODE_SPACES )
+ while (i > 0 && isWhitespace( pCode[i]->GetOpCode()))
{
--i;
}
- if ( i > 0 || pCode[i]->GetOpCode() != SC_OPCODE_SPACES )
+ if (i > 0 || !isWhitespace( pCode[i]->GetOpCode()))
{
OpCode eOp = pCode[i]->GetOpCode();
if ( (SC_OPCODE_START_BIN_OP <= eOp && eOp < SC_OPCODE_STOP_BIN_OP ) ||
@@ -1756,7 +1783,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::NextNoSpaces()
{
if( mpFTA->GetArray() )
{
- while( (mnIndex < mpFTA->GetLen()) && (mpFTA->GetArray()[ mnIndex ]->GetOpCode() == ocSpaces) )
+ while ((mnIndex < mpFTA->GetLen()) && isWhitespace( mpFTA->GetArray()[ mnIndex ]->GetOpCode()))
++mnIndex;
if( mnIndex < mpFTA->GetLen() )
return mpFTA->GetArray()[ mnIndex++ ];
@@ -1793,7 +1820,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekNextNoSpaces() const
if( mpFTA->GetArray() && mnIndex < mpFTA->GetLen() )
{
sal_uInt16 j = mnIndex;
- while ( j < mpFTA->GetLen() && mpFTA->GetArray()[j]->GetOpCode() == ocSpaces )
+ while (j < mpFTA->GetLen() && isWhitespace( mpFTA->GetArray()[j]->GetOpCode()))
j++;
if ( j < mpFTA->GetLen() )
return mpFTA->GetArray()[ j ];
@@ -1809,9 +1836,9 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekPrevNoSpaces() const
if( mpFTA->GetArray() && mnIndex > 1 )
{
sal_uInt16 j = mnIndex - 2;
- while ( mpFTA->GetArray()[j]->GetOpCode() == ocSpaces && j > 0 )
+ while (isWhitespace( mpFTA->GetArray()[j]->GetOpCode()) && j > 0 )
j--;
- if ( j > 0 || mpFTA->GetArray()[j]->GetOpCode() != ocSpaces )
+ if (j > 0 || !isWhitespace( mpFTA->GetArray()[j]->GetOpCode()))
return mpFTA->GetArray()[ j ];
else
return nullptr;
diff --git a/formula/source/ui/dlg/formula.cxx b/formula/source/ui/dlg/formula.cxx
index 81931d8d586b..36b59d5eb0ec 100644
--- a/formula/source/ui/dlg/formula.cxx
+++ b/formula/source/ui/dlg/formula.cxx
@@ -389,6 +389,9 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
sal_Int32 nOldTokPos = 1;
sal_Int32 nPrevFuncPos = 1;
short nBracketCount = 0;
+ const sal_Int32 nOpPush = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode;
+ const sal_Int32 nOpSpaces = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode;
+ const sal_Int32 nOpWhitespace = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::WHITESPACE].Token.OpCode;
while ( pIter != pEnd )
{
const sal_Int32 eOp = pIter->OpCode;
@@ -401,8 +404,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
m_xBtnMatrix->set_active(true);
}
- if (eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode ||
- eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode)
+ if (eOp == nOpPush || eOp == nOpSpaces || eOp == nOpWhitespace)
{
const sal_Int32 n1 = nTokPos < 0 ? -1 : aFormString.indexOf( sep, nTokPos);
const sal_Int32 n2 = nTokPos < 0 ? -1 : aFormString.indexOf( ')', nTokPos);
@@ -444,7 +446,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
m_pFunctionOpCodesEnd,
[&eOp](const sheet::FormulaOpCodeMapEntry& aEntry) { return aEntry.Token.OpCode == eOp; });
- if ( bIsFunction && m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode != eOp )
+ if ( bIsFunction && nOpSpaces != eOp && nOpWhitespace != eOp )
{
nPrevFuncPos = nFuncPos;
nFuncPos = nOldTokPos;
diff --git a/include/formula/compiler.hxx b/include/formula/compiler.hxx
index baf3e23f6161..fcf7326d3e0f 100644
--- a/include/formula/compiler.hxx
+++ b/include/formula/compiler.hxx
@@ -40,26 +40,27 @@
#define SC_OPCODE_BAD 14
#define SC_OPCODE_STRINGXML 15
#define SC_OPCODE_SPACES 16
-#define SC_OPCODE_MAT_REF 17
-#define SC_OPCODE_DB_AREA 18 /* additional access operators */
-#define SC_OPCODE_TABLE_REF 19
-#define SC_OPCODE_MACRO 20
-#define SC_OPCODE_COL_ROW_NAME 21
-#define SC_OPCODE_COL_ROW_NAME_AUTO 22
-#define SC_OPCODE_PERCENT_SIGN 23 /* operator _follows_ value */
-#define SC_OPCODE_ARRAY_OPEN 24
-#define SC_OPCODE_ARRAY_CLOSE 25
-#define SC_OPCODE_ARRAY_ROW_SEP 26
-#define SC_OPCODE_ARRAY_COL_SEP 27 /* some convs use sep != col_sep */
-#define SC_OPCODE_TABLE_REF_OPEN 28
-#define SC_OPCODE_TABLE_REF_CLOSE 29
-#define SC_OPCODE_TABLE_REF_ITEM_ALL 30
-#define SC_OPCODE_TABLE_REF_ITEM_HEADERS 31
-#define SC_OPCODE_TABLE_REF_ITEM_DATA 32
-#define SC_OPCODE_TABLE_REF_ITEM_TOTALS 33
-#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 34
-#define SC_OPCODE_STOP_DIV 35
-#define SC_OPCODE_SKIP 36 /* used to skip raw tokens during string compilation */
+#define SC_OPCODE_WHITESPACE 17
+#define SC_OPCODE_MAT_REF 18
+#define SC_OPCODE_DB_AREA 19 /* additional access operators */
+#define SC_OPCODE_TABLE_REF 20
+#define SC_OPCODE_MACRO 21
+#define SC_OPCODE_COL_ROW_NAME 22
+#define SC_OPCODE_COL_ROW_NAME_AUTO 23
+#define SC_OPCODE_PERCENT_SIGN 24 /* operator _follows_ value */
+#define SC_OPCODE_ARRAY_OPEN 25
+#define SC_OPCODE_ARRAY_CLOSE 26
+#define SC_OPCODE_ARRAY_ROW_SEP 27
+#define SC_OPCODE_ARRAY_COL_SEP 28 /* some convs use sep != col_sep */
+#define SC_OPCODE_TABLE_REF_OPEN 29
+#define SC_OPCODE_TABLE_REF_CLOSE 30
+#define SC_OPCODE_TABLE_REF_ITEM_ALL 31
+#define SC_OPCODE_TABLE_REF_ITEM_HEADERS 32
+#define SC_OPCODE_TABLE_REF_ITEM_DATA 33
+#define SC_OPCODE_TABLE_REF_ITEM_TOTALS 34
+#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 35
+#define SC_OPCODE_STOP_DIV 36
+#define SC_OPCODE_SKIP 37 /* used to skip raw tokens during string compilation */
/*** error constants #... ***/
#define SC_OPCODE_START_ERRORS 40
diff --git a/include/formula/opcode.hxx b/include/formula/opcode.hxx
index 3123e8f3fa38..d92ae0b1d41d 100644
--- a/include/formula/opcode.hxx
+++ b/include/formula/opcode.hxx
@@ -53,6 +53,7 @@ enum OpCode : sal_uInt16
ocBad = SC_OPCODE_BAD,
ocStringXML = SC_OPCODE_STRINGXML,
ocSpaces = SC_OPCODE_SPACES,
+ ocWhitespace = SC_OPCODE_WHITESPACE,
ocMatRef = SC_OPCODE_MAT_REF,
ocTableRefItemAll = SC_OPCODE_TABLE_REF_ITEM_ALL,
ocTableRefItemHeaders = SC_OPCODE_TABLE_REF_ITEM_HEADERS,
@@ -545,6 +546,7 @@ inline std::string OpCodeEnumToString(OpCode eCode)
case ocBad: return "Bad";
case ocStringXML: return "StringXML";
case ocSpaces: return "Spaces";
+ case ocWhitespace: return "Whitespace";
case ocMatRef: return "MatRef";
case ocTableRefItemAll: return "TableRefItemAll";
case ocTableRefItemHeaders: return "TableRefItemHeaders";
diff --git a/include/formula/token.hxx b/include/formula/token.hxx
index 3fa00e89339f..77bf3eeb90ea 100644
--- a/include/formula/token.hxx
+++ b/include/formula/token.hxx
@@ -187,6 +187,7 @@ public:
virtual void SetIndex( sal_uInt16 n );
virtual sal_Int16 GetSheet() const;
virtual void SetSheet( sal_Int16 n );
+ virtual sal_Unicode GetChar() const;
virtual short* GetJump() const;
virtual const OUString& GetExternal() const;
virtual FormulaToken* GetFAPOrigToken() const;
@@ -225,6 +226,25 @@ inline void intrusive_ptr_release(const FormulaToken* p)
p->DecRef();
}
+class FORMULA_DLLPUBLIC FormulaSpaceToken : public FormulaToken
+{
+private:
+ sal_uInt8 nByte;
+ sal_Unicode cChar;
+public:
+ FormulaSpaceToken( sal_uInt8 n, sal_Unicode c ) :
+ FormulaToken( svByte, ocWhitespace ),
+ nByte( n ), cChar( c ) {}
+ FormulaSpaceToken( const FormulaSpaceToken& r ) :
+ FormulaToken( r ),
+ nByte( r.nByte ), cChar( r.cChar ) {}
+
+ virtual FormulaToken* Clone() const override { return new FormulaSpaceToken(*this); }
+ virtual sal_uInt8 GetByte() const override;
+ virtual sal_Unicode GetChar() const override;
+ virtual bool operator==( const FormulaToken& rToken ) const override;
+};
+
class FORMULA_DLLPUBLIC FormulaByteToken : public FormulaToken
{
private:
diff --git a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
index 89c21dca4328..4cb2699e9af6 100644
--- a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
+++ b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
@@ -140,10 +140,6 @@ constants FormulaMapGroupSpecialOffset
<p>The FormulaToken::Data member shall contain a
positive integer value of type `long` specifying the number
of space characters.</p>
-
- <p>Attention: This may change in next versions to support other
- characters than simple space characters (e.g. line feeds, horizontal
- tabulators, non-breakable spaces).</p>
*/
const long SPACES = 8;
@@ -176,6 +172,26 @@ constants FormulaMapGroupSpecialOffset
const long COL_ROW_NAME = 12;
+ /** Formula tokens containing the op-code obtained from this offset
+ describe whitespace characters within the string representation of a
+ formula.
+
+ <p>Whitespace characters in formulas are used for readability and do
+ not affect the result of the formula.</p>
+
+ <p>The FormulaToken::Data member shall contain a
+ `string` of one (repeated) whitespace character. The length of
+ the string determines the number of repetitions.</p>
+
+ <p>Allowed whitespace characters are SPACE (U+0020), CHARACTER
+ TABULATION (U+0009), LINE FEED (U+000A), and CARRIAGE RETURN
+ (U+000D). See also ODF v1.3 OpenFormula 5.14 Whitespace.</p>
+
+ @since LibreOffice 7.3
+ */
+ const long WHITESPACE = 13;
+
+
};
diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index d8935c7f7545..17e258dc3805 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -108,6 +108,10 @@ public:
union {
double nValue;
struct {
+ sal_uInt8 nCount;
+ sal_Unicode cChar;
+ } whitespace;
+ struct {
sal_uInt8 cByte;
formula::ParamClass eInForceArray;
} sbyte;
@@ -326,7 +330,21 @@ private:
bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const;
virtual void SetError(FormulaError nError) override;
- sal_Int32 NextSymbol(bool bInArray);
+
+ struct Whitespace final
+ {
+ sal_Int32 nCount;
+ sal_Unicode cChar;
+
+ Whitespace() : nCount(0), cChar(0x20) {}
+ void reset( sal_Unicode c ) { nCount = 0; cChar = c; }
+ };
+
+ static void addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces,
+ ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n = 1 );
+
+ std::vector<Whitespace> NextSymbol(bool bInArray);
+
bool IsValue( const OUString& );
bool IsOpCode( const OUString&, bool bInArray );
bool IsOpCode2( const OUString& );
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index 0d1dc9d52aa6..83eb2f4ab7db 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -336,11 +336,8 @@ ScCompiler::Convention::Convention( FormulaGrammar::AddressConvention eConv )
for (i = 0; i < 128; i++)
t[i] = ScCharFlags::Illegal;
-// tdf#56036: Allow tabs/newlines in imported formulas (for now simply treat them as (and convert to) space)
-// TODO: tdf#76310: allow saving newlines as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace")
-// This is compliant with the OASIS decision (see https://issues.oasis-open.org/browse/OFFICE-701)
-// Also, this would enable correct roundtrip from/to OOXML without losing tabs/newlines
-// This requires saving actual space characters in ocSpaces token, using them in UI and saving
+// Allow tabs/newlines.
+// Allow saving whitespace as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace").
/* tab */ t[ 9] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
/* lf */ t[10] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
/* cr */ t[13] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
@@ -2067,6 +2064,19 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz
return true;
}
+// static
+void ScCompiler::addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces,
+ ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n )
+{
+ if (rSpace.cChar != c)
+ {
+ if (rSpace.cChar && rSpace.nCount > 0)
+ rvSpaces.emplace_back(rSpace);
+ rSpace.reset(c);
+ }
+ rSpace.nCount += n;
+}
+
// NextSymbol
// Parses the formula into separate symbols for further processing.
@@ -2104,8 +2114,9 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz
// | other | Symbol=Symbol+char | GetString
//---------------+-------------------+-----------------------+---------------
-sal_Int32 ScCompiler::NextSymbol(bool bInArray)
+std::vector<ScCompiler::Whitespace> ScCompiler::NextSymbol(bool bInArray)
{
+ std::vector<Whitespace> vSpaces;
cSymbol[MAXSTRLEN] = 0; // end
sal_Unicode* pSym = cSymbol;
const sal_Unicode* const pStart = aFormula.getStr();
@@ -2116,7 +2127,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray)
bool bQuote = false;
mnRangeOpPosInSymbol = -1;
ScanState eState = ssGetChar;
- sal_Int32 nSpaces = 0;
+ Whitespace aSpace;
sal_Unicode cSep = mxSymbols->getSymbolChar( ocSep);
sal_Unicode cArrayColSep = mxSymbols->getSymbolChar( ocArrayColSep);
sal_Unicode cArrayRowSep = mxSymbols->getSymbolChar( ocArrayRowSep);
@@ -2129,6 +2140,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray)
int nDecSeps = 0;
bool bAutoIntersection = false;
+ size_t nAutoIntersectionSpacesPos = 0;
int nRefInName = 0;
bool bErrorConstantHadSlash = false;
mnPredetectedReference = 0;
@@ -2187,7 +2199,12 @@ Label_MaskStateMachine:
if (!bAutoIntersection)
{
++pSrc;
- nSpaces += 2; // must match the character count
+ // Add 2 because it must match the character count
+ // for bi18n.
+ addWhitespace( vSpaces, aSpace, 0x20, 2);
+ // Position of Whitespace where it will be added to
+ // vector.
+ nAutoIntersectionSpacesPos = vSpaces.size();
bAutoIntersection = true;
}
else
@@ -2267,7 +2284,7 @@ Label_MaskStateMachine:
}
else if( nMask & ScCharFlags::CharDontCare )
{
- nSpaces++;
+ addWhitespace( vSpaces, aSpace, c);
}
else if( nMask & ScCharFlags::CharIdent )
{ // try to get a simple ASCII identifier before calling
@@ -2731,10 +2748,15 @@ Label_MaskStateMachine:
cLast = c;
c = *pSrc;
}
+
+ if (aSpace.nCount && aSpace.cChar)
+ vSpaces.emplace_back(aSpace);
+
if ( bi18n )
{
const sal_Int32 nOldSrcPos = nSrcPos;
- nSrcPos = nSrcPos + nSpaces;
+ for (const auto& r : vSpaces)
+ nSrcPos += r.nCount;
// If group separator is not a possible operator and not one of any
// separators then it may be parsed away in numbers. This is
// specifically the case with NO-BREAK SPACE, which actually triggers
@@ -2835,9 +2857,9 @@ Label_MaskStateMachine:
}
if ( bAutoCorrect )
aCorrectedSymbol = OUString(cSymbol, pSym - cSymbol);
- if (bAutoIntersection && nSpaces > 1)
- --nSpaces; // replace '!!' with only one space
- return nSpaces;
+ if (bAutoIntersection && vSpaces[nAutoIntersectionSpacesPos].nCount > 1)
+ --vSpaces[nAutoIntersectionSpacesPos].nCount; // replace '!!' with only one space
+ return vSpaces;
}
// Convert symbol to token
@@ -4246,7 +4268,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
}
bool bAllowBooleans = bInArray;
- sal_Int32 nSpaces = NextSymbol(bInArray);
+ const std::vector<Whitespace> & vSpaces = NextSymbol(bInArray);
if (!cSymbol[0])
{
@@ -4266,15 +4288,31 @@ bool ScCompiler::NextNewToken( bool bInArray )
return false;
}
- if( nSpaces )
+ if (!vSpaces.empty())
{
ScRawToken aToken;
- aToken.SetOpCode( ocSpaces );
- aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(nSpaces, 255) );
- if( !static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ) )
+ for (const auto& rSpace : vSpaces)
{
- SetError(FormulaError::CodeOverflow);
- return false;
+ if (rSpace.cChar == 0x20)
+ {
+ // For now keep this a FormulaByteToken for the nasty
+ // significant whitespace intersection. This probably can be
+ // changed to a FormulaSpaceToken but then other places may
+ // need to be adapted.
+ aToken.SetOpCode( ocSpaces );
+ aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) );
+ }
+ else
+ {
+ aToken.SetOpCode( ocWhitespace );
+ aToken.whitespace.nCount = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) );
+ aToken.whitespace.cChar = rSpace.cChar;
+ }
+ if (!static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ))
+ {
+ SetError(FormulaError::CodeOverflow);
+ return false;
+ }
}
}
diff --git a/sc/source/core/tool/parclass.cxx b/sc/source/core/tool/parclass.cxx
index 8dd39016cb48..6c560b07b42f 100644
--- a/sc/source/core/tool/parclass.cxx
+++ b/sc/source/core/tool/parclass.cxx
@@ -74,6 +74,7 @@ const ScParameterClassification::RawData ScParameterClassification::pRawData[] =
{ ocSep, {{ Bounds }, 0, Bounds }},
{ ocSkip, {{ Bounds }, 0, Bounds }},
{ ocSpaces, {{ Bounds }, 0, Bounds }},
+ { ocWhitespace, {{ Bounds }, 0, Bounds }},
{ ocStop, {{ Bounds }, 0, Bounds }},
{ ocStringXML, {{ Bounds }, 0, Bounds }},
{ ocTableRef, {{ Bounds }, 0, Value }}, // or Reference?
diff --git a/sc/source/core/tool/token.cxx b/sc/source/core/tool/token.cxx
index 04355b86a8ec..f17cd9ac27ad 100644
--- a/sc/source/core/tool/token.cxx
+++ b/sc/source/core/tool/token.cxx
@@ -235,6 +235,11 @@ void ScRawToken::SetOpCode( OpCode e )
case ocTableRefClose:
eType = svSep;
break;
+ case ocWhitespace:
+ eType = svByte;
+ whitespace.nCount = 1;
+ whitespace.cChar = 0x20;
+ break;
default:
eType = svByte;
sbyte.cByte = 0;
@@ -349,7 +354,10 @@ FormulaToken* ScRawToken::CreateToken(ScSheetLimits& rLimits) const
switch ( GetType() )
{
case svByte :
- return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray );
+ if (eOp == ocWhitespace)
+ return new FormulaSpaceToken( whitespace.nCount, whitespace.cChar );
+ else
+ return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray );
case svDouble :
IF_NOT_OPCODE_ERROR( ocPush, FormulaDoubleToken);
return new FormulaDoubleToken( nValue );
@@ -1652,6 +1660,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
case ocMissing:
case ocBad:
case ocSpaces:
+ case ocWhitespace:
case ocSkip:
case ocPercentSign:
case ocErrNull:
@@ -2089,6 +2098,7 @@ FormulaToken* ScTokenArray::MergeArray( )
break;
case ocSpaces :
+ case ocWhitespace :
// ignore spaces
--nPrevRowSep; // shorten this row by 1
break;
@@ -5136,12 +5146,18 @@ OUString ScTokenArray::CreateString( sc::TokenStringContext& rCxt, const ScAddre
{
const FormulaToken* pToken = *p;
OpCode eOp = pToken->GetOpCode();
+ /* FIXME: why does this ignore the count of spaces? */
if (eOp == ocSpaces)
{
// TODO : Handle intersection operator '!!'.
aBuf.append(' ');
continue;
}
+ else if (eOp == ocWhitespace)
+ {
+ aBuf.append( pToken->GetChar());
+ continue;
+ }
if (eOp < rCxt.mxOpCodeMap->getSymbolCount())
aBuf.append(rCxt.mxOpCodeMap->getSymbol(eOp));
diff --git a/sc/source/filter/excel/xeformula.cxx b/sc/source/filter/excel/xeformula.cxx
index f2edeffb263c..f829529ca0db 100644
--- a/sc/source/filter/excel/xeformula.cxx
+++ b/sc/source/filter/excel/xeformula.cxx
@@ -826,9 +826,13 @@ const FormulaToken* XclExpFmlaCompImpl::PeekNextRawToken() const
bool XclExpFmlaCompImpl::GetNextToken( XclExpScToken& rTokData )
{
rTokData.mpScToken = GetNextRawToken();
- rTokData.mnSpaces = (rTokData.GetOpCode() == ocSpaces) ? rTokData.mpScToken->GetByte() : 0;
- while( rTokData.GetOpCode() == ocSpaces )
+ rTokData.mnSpaces = 0;
+ /* TODO: handle ocWhitespace characters? */
+ while (rTokData.GetOpCode() == ocSpaces || rTokData.GetOpCode() == ocWhitespace)
+ {
+ rTokData.mnSpaces += rTokData.mpScToken->GetByte();
rTokData.mpScToken = GetNextRawToken();
+ }
return rTokData.Is();
}
diff --git a/sc/source/filter/excel/xlformula.cxx b/sc/source/filter/excel/xlformula.cxx
index 1f974f47b38b..e2e082ac2651 100644
--- a/sc/source/filter/excel/xlformula.cxx
+++ b/sc/source/filter/excel/xlformula.cxx
@@ -867,8 +867,11 @@ void XclTokenArrayIterator::NextRawToken()
void XclTokenArrayIterator::SkipSpaces()
{
if( mbSkipSpaces )
- while( Is() && ((*this)->GetOpCode() == ocSpaces) )
+ {
+ OpCode eOp;
+ while( Is() && (((eOp = (*this)->GetOpCode()) == ocSpaces) || eOp == ocWhitespace) )
NextRawToken();
+ }
}
// strings and string lists ---------------------------------------------------
diff --git a/sc/source/ui/app/inputhdl.cxx b/sc/source/ui/app/inputhdl.cxx
index f01f93d46d70..b3e644fc8620 100644
--- a/sc/source/ui/app/inputhdl.cxx
+++ b/sc/source/ui/app/inputhdl.cxx
@@ -654,7 +654,7 @@ void ScInputHandler::DeleteRangeFinder()
static OUString GetEditText(const EditEngine* pEng)
{
- return ScEditUtil::GetSpaceDelimitedString(*pEng);
+ return ScEditUtil::GetMultilineString(*pEng);
}
static void lcl_RemoveTabs(OUString& rStr)
diff --git a/sc/source/ui/unoobj/tokenuno.cxx b/sc/source/ui/unoobj/tokenuno.cxx
index 33f005fff8d6..b07a04e12b04 100644
--- a/sc/source/ui/unoobj/tokenuno.cxx
+++ b/sc/source/ui/unoobj/tokenuno.cxx
@@ -32,6 +32,7 @@
#include <svl/itemprop.hxx>
#include <vcl/svapp.hxx>
+#include <comphelper/string.hxx>
#include <miscuno.hxx>
#include <convuno.hxx>
@@ -388,6 +389,18 @@ void ScTokenConversion::ConvertToTokenSequence( const ScDocument& rDoc,
// Only the count of spaces is stored as "long". Parameter count is ignored.
if ( eOpCode == ocSpaces )
rAPI.Data <<= static_cast<sal_Int32>(rToken.GetByte());
+ else if (eOpCode == ocWhitespace)
+ {
+ // Convention is one character repeated.
+ if (rToken.GetByte() == 1)
+ rAPI.Data <<= OUString( rToken.GetChar());
+ else
+ {
+ OUStringBuffer aBuf( rToken.GetByte());
+ comphelper::string::padToLength( aBuf, rToken.GetByte(), rToken.GetChar());
+ rAPI.Data <<= aBuf.makeStringAndClear();
+ }
+ }
else
rAPI.Data.clear(); // no data
break;
diff --git a/sc/source/ui/view/viewfunc.cxx b/sc/source/ui/view/viewfunc.cxx
index fbe8a0b3719c..4e5e149c9755 100644
--- a/sc/source/ui/view/viewfunc.cxx
+++ b/sc/source/ui/view/viewfunc.cxx
@@ -677,7 +677,7 @@ void ScViewFunc::EnterData( SCCOL nCol, SCROW nRow, SCTAB nTab,
}
// #i97726# always get text for "repeat" of undo action
- aString = ScEditUtil::GetSpaceDelimitedString(aEngine);
+ aString = ScEditUtil::GetMultilineString(aEngine);
// undo