diff options
Diffstat (limited to 'xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx')
-rw-r--r-- | xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx | 449 |
1 files changed, 449 insertions, 0 deletions
diff --git a/xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx b/xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx new file mode 100644 index 000000000000..055b59048722 --- /dev/null +++ b/xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx @@ -0,0 +1,449 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +// MARKER(update_precomp.py): autogen include statement, do not remove +#include "precompiled_xmlhelp.hxx" +#include <com/sun/star/ucb/Command.hpp> +#include <com/sun/star/ucb/XCommandEnvironment.hpp> +#include <com/sun/star/i18n/XExtendedTransliteration.hpp> +#include <com/sun/star/ucb/XCommandProcessor.hpp> +#include <com/sun/star/lang/Locale.hpp> +#include <com/sun/star/script/XInvocation.hpp> + +#ifndef INCLUDED_STL_ALGORITHM +#include <algorithm> +#define INCLUDED_STL_ALGORITHM +#endif +#ifndef INCLUDED_STL_SET +#include <set> +#define INCLUDED_STL_SET +#endif + +#include <qe/Query.hxx> +#include <qe/DocGenerator.hxx> +#include "resultsetforquery.hxx" +#include "databases.hxx" + +// For testing +// #define LOGGING + +using namespace std; +using namespace chelp; +using namespace xmlsearch::excep; +using namespace xmlsearch::qe; +using namespace com::sun::star; +using namespace com::sun::star::ucb; +using namespace com::sun::star::i18n; +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +struct HitItem +{ + rtl::OUString m_aURL; + float m_fScore; + + HitItem( void ) {} + HitItem( const rtl::OUString& aURL, float fScore ) + : m_aURL( aURL ) + , m_fScore( fScore ) + {} + bool operator < ( const HitItem& rHitItem ) const + { + return rHitItem.m_fScore < m_fScore; + } +}; + +ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >& xMSF, + const uno::Reference< XContentProvider >& xProvider, + sal_Int32 nOpenMode, + const uno::Sequence< beans::Property >& seq, + const uno::Sequence< NumberedSortingInfo >& seqSort, + URLParameter& aURLParameter, + Databases* pDatabases ) + : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ), + m_pDatabases( pDatabases ), + m_aURLParameter( aURLParameter ) +{ + Reference< XTransliteration > xTrans( + xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ), + UNO_QUERY ); + Locale aLocale( aURLParameter.get_language(), + rtl::OUString(), + rtl::OUString() ); + if(xTrans.is()) + xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE, + aLocale ); + + // Access Lucene via XInvocation + Reference< script::XInvocation > xInvocation( + xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ), + UNO_QUERY ); + + vector< vector< rtl::OUString > > queryList; + { + sal_Int32 idx; + rtl::OUString query = m_aURLParameter.get_query(); + while( query.getLength() ) + { + idx = query.indexOf( sal_Unicode( ' ' ) ); + if( idx == -1 ) + idx = query.getLength(); + + vector< rtl::OUString > currentQuery; + rtl::OUString tmp(query.copy( 0,idx )); + rtl:: OUString toliterate = tmp; + if(xTrans.is()) { + Sequence<sal_Int32> aSeq; + toliterate = xTrans->transliterate( + tmp,0,tmp.getLength(),aSeq); + } + + currentQuery.push_back( toliterate ); + queryList.push_back( currentQuery ); + + int nCpy = 1 + idx; + if( nCpy >= query.getLength() ) + query = rtl::OUString(); + else + query = query.copy( 1 + idx ); + } + } + + vector< rtl::OUString > aCompleteResultVector; + if( xInvocation.is() ) + { + rtl::OUString scope = m_aURLParameter.get_scope(); + bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 ); + sal_Int32 hitCount = m_aURLParameter.get_hitCount(); + +#ifdef LOGGING + FILE* pFile = fopen( "d:\\resultset_out.txt", "w" ); +#endif + + IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() ); + rtl::OUString idxDir; + bool bExtension = false; + int iDir = 0; + vector< vector<HitItem>* > aIndexFolderResultVectorVector; + + bool bTemporary; + while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 ) + { + vector<HitItem> aIndexFolderResultVector; + + try + { + vector< vector<HitItem>* > aQueryListResultVectorVector; + set< rtl::OUString > aSet,aCurrent,aResultSet; + + int nQueryListSize = queryList.size(); + if( nQueryListSize > 1 ) + hitCount = 2000; + + for( int i = 0; i < nQueryListSize; ++i ) + { + vector<HitItem>* pQueryResultVector; + if( nQueryListSize > 1 ) + { + pQueryResultVector = new vector<HitItem>(); + aQueryListResultVectorVector.push_back( pQueryResultVector ); + } + else + { + pQueryResultVector = &aIndexFolderResultVector; + } + pQueryResultVector->reserve( hitCount ); + + int nParamCount = bCaptionsOnly ? 7 : 6; + Sequence<uno::Any> aParamsSeq( nParamCount ); + + aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) ); + aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() ); + + aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) ); + rtl::OUString aSystemPath; + osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath ); + aParamsSeq[3] = uno::makeAny( aSystemPath ); + + aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) ); + + const std::vector< rtl::OUString >& aListItem = queryList[i]; + ::rtl::OUString aNewQueryStr = aListItem[0]; + aParamsSeq[5] = uno::makeAny( aNewQueryStr ); + + if( bCaptionsOnly ) + aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) ); + + Sequence< sal_Int16 > aOutParamIndex; + Sequence< uno::Any > aOutParam; + + uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ), + aParamsSeq, aOutParamIndex, aOutParam ); + + Sequence< float > aScoreSeq; + int nScoreCount = 0; + int nOutParamCount = aOutParam.getLength(); + if( nOutParamCount == 1 ) + { + const uno::Any* pScoreAnySeq = aOutParam.getConstArray(); + if( pScoreAnySeq[0] >>= aScoreSeq ) + nScoreCount = aScoreSeq.getLength(); + } + + Sequence<rtl::OUString> aRetSeq; + if( aRet >>= aRetSeq ) + { + if( nQueryListSize > 1 ) + aSet.clear(); + + const rtl::OUString* pRetSeq = aRetSeq.getConstArray(); + int nCount = aRetSeq.getLength(); + if( nCount > hitCount ) + nCount = hitCount; + for( int j = 0 ; j < nCount ; ++j ) + { + float fScore = 0.0; + if( j < nScoreCount ) + fScore = aScoreSeq[j]; + + rtl::OUString aURL = pRetSeq[j]; + pQueryResultVector->push_back( HitItem( aURL, fScore ) ); + if( nQueryListSize > 1 ) + aSet.insert( aURL ); + +#ifdef LOGGING + if( pFile ) + { + rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8)); + fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() ); + } +#endif + } + } + + // intersect + if( nQueryListSize > 1 ) + { + if( i == 0 ) + { + aResultSet = aSet; + } + else + { + aCurrent = aResultSet; + aResultSet.clear(); + set_intersection( aSet.begin(),aSet.end(), + aCurrent.begin(),aCurrent.end(), + inserter(aResultSet,aResultSet.begin())); + } + } + } + + // Combine results in aIndexFolderResultVector + if( nQueryListSize > 1 ) + { + for( int n = 0 ; n < nQueryListSize ; ++n ) + { + vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n]; + vector<HitItem>& rQueryResultVector = *pQueryResultVector; + + int nItemCount = rQueryResultVector.size(); + for( int i = 0 ; i < nItemCount ; ++i ) + { + const HitItem& rItem = rQueryResultVector[ i ]; + set< rtl::OUString >::iterator it; + if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() ) + { + HitItem aItemCopy( rItem ); + aItemCopy.m_fScore /= nQueryListSize; // To get average score + if( n == 0 ) + { + // Use first pass to create entry + aIndexFolderResultVector.push_back( aItemCopy ); + +#ifdef LOGGING + if( pFile ) + { + rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); + fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() ); + } +#endif + } + else + { + // Find entry in vector + int nCount = aIndexFolderResultVector.size(); + for( int j = 0 ; j < nCount ; ++j ) + { + HitItem& rFindItem = aIndexFolderResultVector[ j ]; + if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) ) + { +#ifdef LOGGING + if( pFile ) + { + rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); + fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i, + rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() ); + } +#endif + + rFindItem.m_fScore += aItemCopy.m_fScore; + break; + } + } + } + } + } + + delete pQueryResultVector; + } + + sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() ); + } + + vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector ); + aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector ); + aIndexFolderResultVector.clear(); + } + catch( const Exception& ) + { + } + + ++iDir; + + if( bTemporary ) + aIndexFolderIt.deleteTempIndexFolder( idxDir ); + + } // Iterator + + + int nVectorCount = aIndexFolderResultVectorVector.size(); + vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount]; + for( int j = 0 ; j < nVectorCount ; ++j ) + pCurrentVectorIndex[j] = 0; + +#ifdef LOGGING + if( pFile ) + { + for( int k = 0 ; k < nVectorCount ; ++k ) + { + vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; + int nItemCount = rIndexFolderVector.size(); + + fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount ); + + for( int i = 0 ; i < nItemCount ; ++i ) + { + const HitItem& rItem = rIndexFolderVector[ i ]; + rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8)); + fprintf( pFile, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() ); + } + } + } +#endif + + sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount(); + sal_Int32 nHitCount = 0; + while( nHitCount < nTotalHitCount ) + { + int iVectorWithBestScore = -1; + float fBestScore = 0.0; + for( int k = 0 ; k < nVectorCount ; ++k ) + { + vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; + if( pCurrentVectorIndex[k] < rIndexFolderVector.size() ) + { + const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ]; + + if( fBestScore < rItem.m_fScore ) + { + fBestScore = rItem.m_fScore; + iVectorWithBestScore = k; + } + } + } + + if( iVectorWithBestScore == -1 ) // No item left at all + break; + + vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore]; + const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ]; + + pCurrentVectorIndex[iVectorWithBestScore]++; + + aCompleteResultVector.push_back( rItem.m_aURL ); + ++nHitCount; + } + + delete[] pCurrentVectorIndex; + for( int n = 0 ; n < nVectorCount ; ++n ) + { + vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n]; + delete pIndexFolderVector; + } + +#ifdef LOGGING + fclose( pFile ); +#endif + } + + sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength(); + rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" ); + + int nResultCount = aCompleteResultVector.size(); + for( int r = 0 ; r < nResultCount ; ++r ) + { + rtl::OUString aURL = aCompleteResultVector[r]; + rtl::OUString aResultStr = replWith + aURL.copy(replIdx); + m_aPath.push_back( aResultStr ); + } + + m_aItems.resize( m_aPath.size() ); + m_aIdents.resize( m_aPath.size() ); + + Command aCommand; + aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" ); + aCommand.Argument <<= m_sProperty; + + for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow ) + { + m_aPath[m_nRow] = + m_aPath[m_nRow] + + rtl::OUString::createFromAscii( "?Language=" ) + + m_aURLParameter.get_language() + + rtl::OUString::createFromAscii( "&System=" ) + + m_aURLParameter.get_system(); + + uno::Reference< XContent > content = queryContent(); + if( content.is() ) + { + uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY ); + cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>= + } + } + m_nRow = 0xffffffff; +} |