/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /************************************************************************* * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * Copyright 2000, 2010 Oracle and/or its affiliates. * * OpenOffice.org - a multi-platform office productivity suite * * This file is part of OpenOffice.org. * * OpenOffice.org is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License version 3 * only, as published by the Free Software Foundation. * * OpenOffice.org is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License version 3 for more details * (a copy is included in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU Lesser General Public License * version 3 along with OpenOffice.org. If not, see * * for a copy of the LGPLv3 License. * ************************************************************************/ // MARKER(update_precomp.py): autogen include statement, do not remove #include "precompiled_xmlhelp.hxx" #include #include #include #include #include #include #ifndef INCLUDED_STL_ALGORITHM #include #define INCLUDED_STL_ALGORITHM #endif #ifndef INCLUDED_STL_SET #include #define INCLUDED_STL_SET #endif #include #include #include "resultsetforquery.hxx" #include "databases.hxx" // For testing // #define LOGGING using namespace std; using namespace chelp; using namespace xmlsearch::excep; using namespace xmlsearch::qe; using namespace com::sun::star; using namespace com::sun::star::ucb; using namespace com::sun::star::i18n; using namespace com::sun::star::uno; using namespace com::sun::star::lang; struct HitItem { rtl::OUString m_aURL; float m_fScore; HitItem( void ) {} HitItem( const rtl::OUString& aURL, float fScore ) : m_aURL( aURL ) , m_fScore( fScore ) {} bool operator < ( const HitItem& rHitItem ) const { return rHitItem.m_fScore < m_fScore; } }; ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >& xMSF, const uno::Reference< XContentProvider >& xProvider, sal_Int32 nOpenMode, const uno::Sequence< beans::Property >& seq, const uno::Sequence< NumberedSortingInfo >& seqSort, URLParameter& aURLParameter, Databases* pDatabases ) : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ), m_pDatabases( pDatabases ), m_aURLParameter( aURLParameter ) { Reference< XTransliteration > xTrans( xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ), UNO_QUERY ); Locale aLocale( aURLParameter.get_language(), rtl::OUString(), rtl::OUString() ); if(xTrans.is()) xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE, aLocale ); // Access Lucene via XInvocation Reference< script::XInvocation > xInvocation( xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ), UNO_QUERY ); vector< vector< rtl::OUString > > queryList; { sal_Int32 idx; rtl::OUString query = m_aURLParameter.get_query(); while( query.getLength() ) { idx = query.indexOf( sal_Unicode( ' ' ) ); if( idx == -1 ) idx = query.getLength(); vector< rtl::OUString > currentQuery; rtl::OUString tmp(query.copy( 0,idx )); rtl:: OUString toliterate = tmp; if(xTrans.is()) { Sequence aSeq; toliterate = xTrans->transliterate( tmp,0,tmp.getLength(),aSeq); } currentQuery.push_back( toliterate ); queryList.push_back( currentQuery ); int nCpy = 1 + idx; if( nCpy >= query.getLength() ) query = rtl::OUString(); else query = query.copy( 1 + idx ); } } vector< rtl::OUString > aCompleteResultVector; if( xInvocation.is() ) { rtl::OUString scope = m_aURLParameter.get_scope(); bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 ); sal_Int32 hitCount = m_aURLParameter.get_hitCount(); #ifdef LOGGING FILE* pFile = fopen( "d:\\resultset_out.txt", "w" ); #endif IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() ); rtl::OUString idxDir; bool bExtension = false; int iDir = 0; vector< vector* > aIndexFolderResultVectorVector; bool bTemporary; while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 ) { vector aIndexFolderResultVector; try { vector< vector* > aQueryListResultVectorVector; set< rtl::OUString > aSet,aCurrent,aResultSet; int nQueryListSize = queryList.size(); if( nQueryListSize > 1 ) hitCount = 2000; for( int i = 0; i < nQueryListSize; ++i ) { vector* pQueryResultVector; if( nQueryListSize > 1 ) { pQueryResultVector = new vector(); aQueryListResultVectorVector.push_back( pQueryResultVector ); } else { pQueryResultVector = &aIndexFolderResultVector; } pQueryResultVector->reserve( hitCount ); int nParamCount = bCaptionsOnly ? 7 : 6; Sequence aParamsSeq( nParamCount ); aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) ); aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() ); aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) ); rtl::OUString aSystemPath; osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath ); aParamsSeq[3] = uno::makeAny( aSystemPath ); aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) ); const std::vector< rtl::OUString >& aListItem = queryList[i]; ::rtl::OUString aNewQueryStr = aListItem[0]; aParamsSeq[5] = uno::makeAny( aNewQueryStr ); if( bCaptionsOnly ) aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) ); Sequence< sal_Int16 > aOutParamIndex; Sequence< uno::Any > aOutParam; uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ), aParamsSeq, aOutParamIndex, aOutParam ); Sequence< float > aScoreSeq; int nScoreCount = 0; int nOutParamCount = aOutParam.getLength(); if( nOutParamCount == 1 ) { const uno::Any* pScoreAnySeq = aOutParam.getConstArray(); if( pScoreAnySeq[0] >>= aScoreSeq ) nScoreCount = aScoreSeq.getLength(); } Sequence aRetSeq; if( aRet >>= aRetSeq ) { if( nQueryListSize > 1 ) aSet.clear(); const rtl::OUString* pRetSeq = aRetSeq.getConstArray(); int nCount = aRetSeq.getLength(); if( nCount > hitCount ) nCount = hitCount; for( int j = 0 ; j < nCount ; ++j ) { float fScore = 0.0; if( j < nScoreCount ) fScore = aScoreSeq[j]; rtl::OUString aURL = pRetSeq[j]; pQueryResultVector->push_back( HitItem( aURL, fScore ) ); if( nQueryListSize > 1 ) aSet.insert( aURL ); #ifdef LOGGING if( pFile ) { rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8)); fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() ); } #endif } } // intersect if( nQueryListSize > 1 ) { if( i == 0 ) { aResultSet = aSet; } else { aCurrent = aResultSet; aResultSet.clear(); set_intersection( aSet.begin(),aSet.end(), aCurrent.begin(),aCurrent.end(), inserter(aResultSet,aResultSet.begin())); } } } // Combine results in aIndexFolderResultVector if( nQueryListSize > 1 ) { for( int n = 0 ; n < nQueryListSize ; ++n ) { vector* pQueryResultVector = aQueryListResultVectorVector[n]; vector& rQueryResultVector = *pQueryResultVector; int nItemCount = rQueryResultVector.size(); for( int i = 0 ; i < nItemCount ; ++i ) { const HitItem& rItem = rQueryResultVector[ i ]; set< rtl::OUString >::iterator it; if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() ) { HitItem aItemCopy( rItem ); aItemCopy.m_fScore /= nQueryListSize; // To get average score if( n == 0 ) { // Use first pass to create entry aIndexFolderResultVector.push_back( aItemCopy ); #ifdef LOGGING if( pFile ) { rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() ); } #endif } else { // Find entry in vector int nCount = aIndexFolderResultVector.size(); for( int j = 0 ; j < nCount ; ++j ) { HitItem& rFindItem = aIndexFolderResultVector[ j ]; if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) ) { #ifdef LOGGING if( pFile ) { rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i, rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() ); } #endif rFindItem.m_fScore += aItemCopy.m_fScore; break; } } } } } delete pQueryResultVector; } sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() ); } vector* pIndexFolderHitItemVector = new vector( aIndexFolderResultVector ); aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector ); aIndexFolderResultVector.clear(); } catch( const Exception& ) { } ++iDir; if( bTemporary ) aIndexFolderIt.deleteTempIndexFolder( idxDir ); } // Iterator int nVectorCount = aIndexFolderResultVectorVector.size(); vector::size_type* pCurrentVectorIndex = new vector::size_type[nVectorCount]; for( int j = 0 ; j < nVectorCount ; ++j ) pCurrentVectorIndex[j] = 0; #ifdef LOGGING if( pFile ) { for( int k = 0 ; k < nVectorCount ; ++k ) { vector& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; int nItemCount = rIndexFolderVector.size(); fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount ); for( int i = 0 ; i < nItemCount ; ++i ) { const HitItem& rItem = rIndexFolderVector[ i ]; rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8)); fprintf( pFile, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() ); } } } #endif sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount(); sal_Int32 nHitCount = 0; while( nHitCount < nTotalHitCount ) { int iVectorWithBestScore = -1; float fBestScore = 0.0; for( int k = 0 ; k < nVectorCount ; ++k ) { vector& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; if( pCurrentVectorIndex[k] < rIndexFolderVector.size() ) { const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ]; if( fBestScore < rItem.m_fScore ) { fBestScore = rItem.m_fScore; iVectorWithBestScore = k; } } } if( iVectorWithBestScore == -1 ) // No item left at all break; vector& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore]; const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ]; pCurrentVectorIndex[iVectorWithBestScore]++; aCompleteResultVector.push_back( rItem.m_aURL ); ++nHitCount; } delete[] pCurrentVectorIndex; for( int n = 0 ; n < nVectorCount ; ++n ) { vector* pIndexFolderVector = aIndexFolderResultVectorVector[n]; delete pIndexFolderVector; } #ifdef LOGGING fclose( pFile ); #endif } sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength(); rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" ); int nResultCount = aCompleteResultVector.size(); for( int r = 0 ; r < nResultCount ; ++r ) { rtl::OUString aURL = aCompleteResultVector[r]; rtl::OUString aResultStr = replWith + aURL.copy(replIdx); m_aPath.push_back( aResultStr ); } m_aItems.resize( m_aPath.size() ); m_aIdents.resize( m_aPath.size() ); Command aCommand; aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" ); aCommand.Argument <<= m_sProperty; for( m_nRow = 0; sal::static_int_cast( m_nRow ) < m_aPath.size(); ++m_nRow ) { m_aPath[m_nRow] = m_aPath[m_nRow] + rtl::OUString::createFromAscii( "?Language=" ) + m_aURLParameter.get_language() + rtl::OUString::createFromAscii( "&System=" ) + m_aURLParameter.get_system(); uno::Reference< XContent > content = queryContent(); if( content.is() ) { uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY ); cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>= } } m_nRow = 0xffffffff; } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */