summaryrefslogtreecommitdiff
path: root/xmloff/source/core/RDFaImportHelper.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'xmloff/source/core/RDFaImportHelper.cxx')
-rw-r--r--xmloff/source/core/RDFaImportHelper.cxx501
1 files changed, 501 insertions, 0 deletions
diff --git a/xmloff/source/core/RDFaImportHelper.cxx b/xmloff/source/core/RDFaImportHelper.cxx
new file mode 100644
index 000000000000..611889497812
--- /dev/null
+++ b/xmloff/source/core/RDFaImportHelper.cxx
@@ -0,0 +1,501 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+#include "precompiled_xmloff.hxx"
+
+#include "RDFaImportHelper.hxx"
+
+#include <xmloff/xmlimp.hxx>
+#include <xmloff/nmspmap.hxx>
+
+#include <comphelper/sequenceasvector.hxx>
+
+#include <tools/string.hxx> // for GetAbsoluteReference
+
+#include <com/sun/star/rdf/URI.hpp>
+#include <com/sun/star/rdf/XDocumentMetadataAccess.hpp>
+#include <com/sun/star/rdf/XDocumentRepository.hpp>
+
+#include <rtl/ustring.hxx>
+
+#include <boost/bind.hpp>
+#include <boost/iterator_adaptors.hpp>
+#ifndef BOOST_ITERATOR_ADAPTOR_DWA053000_HPP_ // from iterator_adaptors.hpp
+// N.B.: the check for the header guard _of a specific version of boost_
+// is here so this may work on different versions of boost,
+// which sadly put the goods in different header files
+#include <boost/iterator/transform_iterator.hpp>
+#endif
+
+#include <map>
+#include <iterator>
+#include <functional>
+#include <algorithm>
+
+
+using namespace ::com::sun::star;
+
+namespace xmloff {
+
+/** a bit of context for parsing RDFa attributes */
+class SAL_DLLPRIVATE RDFaReader
+{
+ const SvXMLImport & m_rImport;
+
+ const SvXMLImport & GetImport() const { return m_rImport; }
+
+ //FIXME: this is an ugly hack to workaround buggy SvXMLImport::GetAbsolute
+ ::rtl::OUString GetAbsoluteReference(::rtl::OUString const & i_rURI) const
+ {
+ if (!i_rURI.getLength() || i_rURI[0] == '#')
+ {
+ return GetImport().GetBaseURL() + i_rURI;
+ }
+ else
+ {
+ return GetImport().GetAbsoluteReference(i_rURI);
+ }
+ }
+
+public:
+ RDFaReader(SvXMLImport const & i_rImport)
+ : m_rImport(i_rImport)
+ { }
+
+ // returns URI or blank node!
+ ::rtl::OUString ReadCURIE(::rtl::OUString const & i_rCURIE) const;
+
+ std::vector< ::rtl::OUString >
+ ReadCURIEs(::rtl::OUString const & i_rCURIEs) const;
+
+ ::rtl::OUString
+ ReadURIOrSafeCURIE( ::rtl::OUString const & i_rURIOrSafeCURIE) const;
+};
+
+/** helper to insert RDFa statements into the RDF repository */
+class SAL_DLLPRIVATE RDFaInserter
+{
+ const uno::Reference<uno::XComponentContext> m_xContext;
+ uno::Reference< rdf::XDocumentRepository > m_xRepository;
+
+ typedef ::std::map< ::rtl::OUString, uno::Reference< rdf::XBlankNode > >
+ BlankNodeMap_t;
+
+ BlankNodeMap_t m_BlankNodeMap;
+
+public:
+ RDFaInserter(uno::Reference<uno::XComponentContext> const & i_xContext,
+ uno::Reference< rdf::XDocumentRepository > const & i_xRepository)
+ : m_xContext(i_xContext)
+ , m_xRepository(i_xRepository)
+ {}
+
+ uno::Reference< rdf::XBlankNode >
+ LookupBlankNode(::rtl::OUString const & i_rNodeId );
+
+ uno::Reference< rdf::XURI >
+ MakeURI( ::rtl::OUString const & i_rURI) const;
+
+ uno::Reference< rdf::XResource>
+ MakeResource( ::rtl::OUString const & i_rResource);
+
+ void InsertRDFaEntry(struct RDFaEntry const & i_rEntry);
+};
+
+/** store parsed RDFa attributes */
+struct SAL_DLLPRIVATE ParsedRDFaAttributes
+{
+ ::rtl::OUString m_About;
+ ::std::vector< ::rtl::OUString > m_Properties;
+ ::rtl::OUString m_Content;
+ ::rtl::OUString m_Datatype;
+
+ ParsedRDFaAttributes(
+ ::rtl::OUString const & i_rAbout,
+ ::std::vector< ::rtl::OUString > const & i_rProperties,
+ ::rtl::OUString const & i_rContent,
+ ::rtl::OUString const & i_rDatatype)
+ : m_About(i_rAbout)
+ , m_Properties(i_rProperties)
+ , m_Content(i_rContent)
+ , m_Datatype(i_rDatatype)
+ { }
+};
+
+/** store metadatable object and its RDFa attributes */
+struct SAL_DLLPRIVATE RDFaEntry
+{
+ uno::Reference<rdf::XMetadatable> m_xObject;
+ ::boost::shared_ptr<ParsedRDFaAttributes> m_pRDFaAttributes;
+
+ RDFaEntry(uno::Reference<rdf::XMetadatable> const & i_xObject,
+ ::boost::shared_ptr<ParsedRDFaAttributes> const& i_pRDFaAttributes)
+ : m_xObject(i_xObject)
+ , m_pRDFaAttributes(i_pRDFaAttributes)
+ { }
+};
+
+////////////////////////////////////////////////////////////////////////////
+
+
+static inline bool isWS(const sal_Unicode i_Char)
+{
+ return ('\t' == i_Char) || ('\n' == i_Char) || ('\r' == i_Char)
+ || (' ' == i_Char);
+}
+
+static ::rtl::OUString splitAtWS(::rtl::OUString & io_rString)
+{
+ const sal_Int32 len( io_rString.getLength() );
+ sal_Int32 idxstt(0);
+ while ((idxstt < len) && ( isWS(io_rString[idxstt])))
+ ++idxstt; // skip leading ws
+ sal_Int32 idxend(idxstt);
+ while ((idxend < len) && (!isWS(io_rString[idxend])))
+ ++idxend; // the CURIE
+ const ::rtl::OUString ret(io_rString.copy(idxstt, idxend - idxstt));
+ io_rString = io_rString.copy(idxend); // rest
+ return ret;
+}
+
+::rtl::OUString
+RDFaReader::ReadCURIE(::rtl::OUString const & i_rCURIE) const
+{
+ // the RDFa spec says that a prefix is required (it may be empty: ":foo")
+ const sal_Int32 idx( i_rCURIE.indexOf(':') );
+ if (idx >= 0)
+ {
+ ::rtl::OUString Prefix;
+ ::rtl::OUString LocalName;
+ ::rtl::OUString Namespace;
+ sal_uInt16 nKey( GetImport().GetNamespaceMap()._GetKeyByAttrName(
+ i_rCURIE, &Prefix, &LocalName, &Namespace) );
+ if (Prefix.equalsAscii("_"))
+ {
+ // eeek, it's a bnode!
+ // "_" is not a valid URI scheme => we can identify bnodes
+ return i_rCURIE;
+ }
+ else
+ {
+ OSL_ENSURE(XML_NAMESPACE_NONE != nKey, "no namespace?");
+ if ((XML_NAMESPACE_UNKNOWN != nKey) &&
+ (XML_NAMESPACE_XMLNS != nKey))
+ {
+ // N.B.: empty LocalName is valid!
+ const ::rtl::OUString URI(Namespace + LocalName);
+// return GetImport().GetAbsoluteReference(URI);
+ return GetAbsoluteReference(URI);
+ }
+ else
+ {
+ OSL_TRACE( "ReadCURIE: invalid CURIE: invalid prefix" );
+ return ::rtl::OUString();
+ }
+ }
+ }
+ else
+ {
+ OSL_TRACE( "ReadCURIE: invalid CURIE: no prefix" );
+ return ::rtl::OUString();
+ }
+}
+
+::std::vector< ::rtl::OUString >
+RDFaReader::ReadCURIEs(::rtl::OUString const & i_rCURIEs) const
+{
+ std::vector< ::rtl::OUString > vec;
+ ::rtl::OUString CURIEs(i_rCURIEs);
+ do {
+ ::rtl::OUString curie( splitAtWS(CURIEs) );
+ if (curie.getLength())
+ {
+ const ::rtl::OUString uri(ReadCURIE(curie));
+ if (uri.getLength())
+ {
+ vec.push_back(uri);
+ }
+ }
+ }
+ while (CURIEs.getLength());
+ if (!vec.size())
+ {
+ OSL_TRACE( "ReadCURIEs: invalid CURIEs" );
+ }
+ return vec;
+}
+
+::rtl::OUString
+RDFaReader::ReadURIOrSafeCURIE(::rtl::OUString const & i_rURIOrSafeCURIE) const
+{
+ const sal_Int32 len(i_rURIOrSafeCURIE.getLength());
+ if (len && (i_rURIOrSafeCURIE[0] == '['))
+ {
+ if ((len >= 2) && (i_rURIOrSafeCURIE[len - 1] == ']'))
+ {
+ return ReadCURIE(i_rURIOrSafeCURIE.copy(1, len - 2));
+ }
+ else
+ {
+ OSL_TRACE( "ReadURIOrSafeCURIE: invalid SafeCURIE" );
+ return ::rtl::OUString();
+ }
+ }
+ else
+ {
+ if (i_rURIOrSafeCURIE.matchAsciiL("_:", 2)) // blank node
+ {
+ OSL_TRACE( "ReadURIOrSafeCURIE: invalid URI: scheme is _" );
+ return ::rtl::OUString();
+ }
+ else
+ {
+// return GetImport().GetAbsoluteReference(i_rURIOrSafeCURIE);
+ return GetAbsoluteReference(i_rURIOrSafeCURIE);
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+uno::Reference< rdf::XBlankNode >
+RDFaInserter::LookupBlankNode(::rtl::OUString const & i_rNodeId )
+{
+ uno::Reference< rdf::XBlankNode > & rEntry( m_BlankNodeMap[ i_rNodeId ] );
+ if (!rEntry.is())
+ {
+ rEntry = m_xRepository->createBlankNode();
+ }
+ return rEntry;
+}
+
+uno::Reference< rdf::XURI >
+RDFaInserter::MakeURI( ::rtl::OUString const & i_rURI) const
+{
+ if (i_rURI.matchAsciiL("_:", 2)) // blank node
+ {
+ OSL_TRACE("MakeURI: cannot create URI for blank node");
+ return 0;
+ }
+ else
+ {
+ try
+ {
+ return rdf::URI::create( m_xContext, i_rURI );
+ }
+ catch (uno::Exception &)
+ {
+ OSL_ENSURE(false, "MakeURI: cannot create URI");
+ return 0;
+ }
+ }
+}
+
+uno::Reference< rdf::XResource>
+RDFaInserter::MakeResource( ::rtl::OUString const & i_rResource)
+{
+ if (i_rResource.matchAsciiL("_:", 2)) // blank node
+ {
+ // we cannot use the blank node label as-is: it must be distinct
+ // from labels in other graphs, so create fresh ones per XML stream
+ // N.B.: content.xml and styles.xml are distinct graphs
+ ::rtl::OUString name( i_rResource.copy(2) );
+ const uno::Reference< rdf::XBlankNode > xBNode( LookupBlankNode(name) );
+ OSL_ENSURE(xBNode.is(), "no blank node?");
+ return uno::Reference<rdf::XResource>( xBNode, uno::UNO_QUERY);
+ }
+ else
+ {
+ return uno::Reference<rdf::XResource>( MakeURI( i_rResource ),
+ uno::UNO_QUERY);
+ }
+}
+
+/** i wrote this because c++ implementations cannot agree on which variant
+ of boost::bind and std::mem_fun_ref applied to Reference::is compiles */
+class ref_is_null :
+ public ::std::unary_function<sal_Bool, const uno::Reference<rdf::XURI> & >
+{
+public:
+ sal_Bool operator() (const uno::Reference<rdf::XURI> & i_rRef)
+ {
+ return !i_rRef.is();
+ }
+};
+
+void RDFaInserter::InsertRDFaEntry(
+ struct RDFaEntry const & i_rEntry)
+{
+ OSL_ENSURE(i_rEntry.m_xObject.is(),
+ "InsertRDFaEntry: invalid arg: null object");
+ if (!i_rEntry.m_xObject.is()) return;
+
+ const uno::Reference< rdf::XResource > xSubject(
+ MakeResource( i_rEntry.m_pRDFaAttributes->m_About ) );
+ if (!xSubject.is())
+ {
+ return; // invalid
+ }
+
+ ::comphelper::SequenceAsVector< uno::Reference< rdf::XURI > > predicates;
+
+ predicates.reserve(i_rEntry.m_pRDFaAttributes->m_Properties.size());
+
+ ::std::remove_copy_if(
+ ::boost::make_transform_iterator(
+ i_rEntry.m_pRDFaAttributes->m_Properties.begin(),
+ ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
+ // argh, this must be the same type :(
+ ::boost::make_transform_iterator(
+ i_rEntry.m_pRDFaAttributes->m_Properties.end(),
+ ::boost::bind(&RDFaInserter::MakeURI, this, _1)),
+ ::std::back_inserter(predicates),
+ ref_is_null() );
+ // compiles only on wntmsci12
+// ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool>(&uno::Reference<rdf::XURI>::is, _1)));
+ // compiles on unxsoli4, wntsci12, but not unxlngi6
+// ::boost::bind( ::std::logical_not<sal_Bool>(), ::boost::bind<sal_Bool, com::sun::star::uno::Reference<rdf::XURI> >(&uno::Reference<rdf::XURI>::is, _1)));
+ // compiles on unxsoli4, unxlngi6, but not wntsci12
+// ::std::not1( ::std::mem_fun_ref(&uno::Reference<rdf::XURI>::is)) );
+
+ if (!predicates.size())
+ {
+ return; // invalid
+ }
+
+ uno::Reference<rdf::XURI> xDatatype;
+ if (i_rEntry.m_pRDFaAttributes->m_Datatype.getLength())
+ {
+ xDatatype = MakeURI( i_rEntry.m_pRDFaAttributes->m_Datatype );
+ }
+
+ try
+ {
+ // N.B.: this will call xMeta->ensureMetadataReference, which is why
+ // this must be done _after_ importing the whole XML file,
+ // to prevent collision between generated ids and ids in the file
+ m_xRepository->setStatementRDFa(xSubject, predicates.getAsConstList(),
+ i_rEntry.m_xObject,
+ i_rEntry.m_pRDFaAttributes->m_Content, xDatatype);
+ }
+ catch (uno::Exception &)
+ {
+ OSL_ENSURE(false, "InsertRDFaEntry: setStatementRDFa failed?");
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+RDFaImportHelper::RDFaImportHelper(const SvXMLImport & i_rImport)
+ : m_rImport(i_rImport)
+{
+}
+
+RDFaImportHelper::~RDFaImportHelper()
+{
+}
+
+::boost::shared_ptr<ParsedRDFaAttributes>
+RDFaImportHelper::ParseRDFa(
+ ::rtl::OUString const & i_rAbout,
+ ::rtl::OUString const & i_rProperty,
+ ::rtl::OUString const & i_rContent,
+ ::rtl::OUString const & i_rDatatype)
+{
+ if (!i_rProperty.getLength())
+ {
+ OSL_TRACE("AddRDFa: invalid input: xhtml:property empty");
+ return ::boost::shared_ptr<ParsedRDFaAttributes>();
+ }
+ // must parse CURIEs here: need namespace declaration context
+ RDFaReader reader(GetImport());
+ const ::rtl::OUString about( reader.ReadURIOrSafeCURIE(i_rAbout) );
+ if (!about.getLength()) {
+ return ::boost::shared_ptr<ParsedRDFaAttributes>();
+ }
+ const ::std::vector< ::rtl::OUString > properties(
+ reader.ReadCURIEs(i_rProperty) );
+ if (!properties.size()) {
+ return ::boost::shared_ptr<ParsedRDFaAttributes>();
+ }
+ const ::rtl::OUString datatype( i_rDatatype.getLength()
+ ? reader.ReadCURIE(i_rDatatype)
+ : ::rtl::OUString() );
+ return ::boost::shared_ptr<ParsedRDFaAttributes>(
+ new ParsedRDFaAttributes(about, properties, i_rContent, datatype));
+}
+
+void
+RDFaImportHelper::AddRDFa(
+ uno::Reference<rdf::XMetadatable> const & i_xObject,
+ ::boost::shared_ptr<ParsedRDFaAttributes> & i_pRDFaAttributes)
+{
+ if (!i_xObject.is())
+ {
+ OSL_ENSURE(false, "AddRDFa: invalid arg: null textcontent");
+ return;
+ }
+ if (!i_pRDFaAttributes.get())
+ {
+ OSL_ENSURE(false, "AddRDFa: invalid arg: null RDFa attributes");
+ return;
+ }
+ m_RDFaEntries.push_back(RDFaEntry(i_xObject, i_pRDFaAttributes));
+}
+
+void
+RDFaImportHelper::ParseAndAddRDFa(
+ uno::Reference<rdf::XMetadatable> const & i_xObject,
+ ::rtl::OUString const & i_rAbout,
+ ::rtl::OUString const & i_rProperty,
+ ::rtl::OUString const & i_rContent,
+ ::rtl::OUString const & i_rDatatype)
+{
+ ::boost::shared_ptr<ParsedRDFaAttributes> pAttributes(
+ ParseRDFa(i_rAbout, i_rProperty, i_rContent, i_rDatatype) );
+ if (pAttributes.get())
+ {
+ AddRDFa(i_xObject, pAttributes);
+ }
+}
+
+void RDFaImportHelper::InsertRDFa(
+ uno::Reference< rdf::XRepositorySupplier> const & i_xModel)
+{
+ OSL_ENSURE(i_xModel.is(), "InsertRDFa: invalid arg: model null");
+ if (!i_xModel.is()) return;
+ const uno::Reference< rdf::XDocumentRepository > xRepository(
+ i_xModel->getRDFRepository(), uno::UNO_QUERY);
+ OSL_ENSURE(xRepository.is(), "InsertRDFa: no DocumentRepository?");
+ if (!xRepository.is()) return;
+ RDFaInserter inserter(GetImport().GetComponentContext(), xRepository);
+ ::std::for_each(m_RDFaEntries.begin(), m_RDFaEntries.end(),
+ ::boost::bind(&RDFaInserter::InsertRDFaEntry, &inserter, _1));
+}
+
+} // namespace xmloff
+