summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Holesovsky <kendy@suse.cz>2013-01-29 18:06:53 +0100
committerJan Holesovsky <kendy@suse.cz>2013-01-31 02:35:52 +0100
commita341d834d76b3c06e4e767efa2dc09f9ac8ca8ba (patch)
treed59806933ff11ea02b5f0d169544cd61af193113
parentfe9d587d1b544a2faa1a994fdcfaee0bfa58210f (diff)
Dense B+ tree: Data structure update from the B+ tree.
It turns out that the 'classic' B+ tree is not exactly what we need for the BipPtrArray replacement; but when we do a small change, it fits perfectly. The problem with B+ tree is that it is designed for non-sequential keys, not for a sequence of keys (ints) without holes. Even worse - the insert() on B+ tree does not shift the rest of the values 'to the right' which is what we need here. But if we modify the nodes so that instead of absolute key values we use relative ones, and to count the exact index we de-facto sum the indexes of the nodes that lead to the node, we get exactly what we want - insert that shifts the rest of the values to the right is possible in O(log_order(n)). Due to the lack of name for this B+ tree modification, I name it 'dense B+ tree' - maybe there already is an existing name for this structure, but I do not recall one. Change-Id: If1cd769760aa7a2a6b027b9fa54d883540d875ad
-rw-r--r--sw/inc/densebplustree.cxx124
-rw-r--r--sw/inc/densebplustree.hxx50
2 files changed, 119 insertions, 55 deletions
diff --git a/sw/inc/densebplustree.cxx b/sw/inc/densebplustree.cxx
index 93ecf1fc383d..ec779bd10ea9 100644
--- a/sw/inc/densebplustree.cxx
+++ b/sw/inc/densebplustree.cxx
@@ -7,128 +7,158 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
-#include <bplustree.hxx>
+#include <densebplustree.hxx>
#include <cassert>
+/** The tree node order
+
+This affects how big are the metadata and data nodes; the higher the value,
+the flatter the structure is. It is necessary to find a good compromise
+between fragmentation (too low value) and not being too flat (too high value).
+
+50 seems to be a good value so far, but we can change it easily.
+*/
+static const int sOrder = 7;
+
/** B+ tree node implementation.
It has to be able to act as an internal node, as well as the leaf node.
*/
template < class Key, class Value >
-struct BPlusTreeNode
+struct DenseBPlusTreeNode
{
- /// The tree node order
- static const int sOrder = 7; /// TODO find out the optimal value here :-)
-
/// The number of children / data entries.
int m_nUsed;
/// The B+ tree has the data only in leaves, so we have to distinguish between internal nodes and leaves.
bool m_bIsInternal : 1;
- /// Keys for this node (meaning intervals in case of internal nodes, real keys otherwise)
- Key m_pKeys[ sOrder ];
+ /** Keys for this node.
+
+ In the internal nodes, the appropriate values get incremented as we
+ insert more and more, and parts of the tree are shifted to the right.
+
+ The real index of a value (when we find it) is a sum of all the
+ appropriate m_pKey's that lead to the value.
+
+ In principle, the m_pKeys should always have 0 in m_pKeys[0], let's
+ implicitly assume that, and not store it at all.
+ */
+ Key m_pKeys[ sOrder - 1 ];
union {
/// Internal node, contains only pointers to other nodes
- BPlusTreeNode m_pChildren[ sOrder ];
+ DenseBPlusTreeNode m_pChildren[ sOrder ];
/// Leaf node, contains data.
Value m_pValues[ sOrder ];
};
/// Pointer to the next node (valid only for the leaf nodes).
- BPlusTreeNode *m_pNext;
+ DenseBPlusTreeNode *m_pNext;
- BPlusTreeNode() : m_nUsed( 0 ), m_pNext( NULL ) {}
+ DenseBPlusTreeNode() : m_nUsed( 0 ), m_bIsInternal( false ), m_pNext( NULL ) {}
};
template < class Key, class Value >
-BPlusTree< Key, Value >::BPlusTree()
- : m_pRoot( new BPlusTreeNode< Key, Value > )
+DenseBPlusTree< Key, Value >::DenseBPlusTree()
+ : m_pRoot( new TreeNode )
{
}
template < class Key, class Value >
-BPlusTree< Key, Value >::~BPlusTree()
+DenseBPlusTree< Key, Value >::~DenseBPlusTree()
{
// TODO
}
template < class Key, class Value >
-Key BPlusTree< Key, Value >::Count() const
+Key DenseBPlusTree< Key, Value >::Count() const
{
// TODO
}
template < class Key, class Value >
-void BPlusTree< Key, Value >::Insert( const Value& rValue, Key nPos )
+void DenseBPlusTree< Key, Value >::Insert( const Value& rValue, Key nPos )
{
// TODO
}
template < class Key, class Value >
-void BPlusTree< Key, Value >::Remove( Key nPos, Key nNumber )
+void DenseBPlusTree< Key, Value >::Remove( Key nPos, Key nNumber )
{
// TODO
}
template < class Key, class Value >
-void BPlusTree< Key, Value >::Move( Key nFrom, Key nTo )
+void DenseBPlusTree< Key, Value >::Move( Key nFrom, Key nTo )
{
// TODO
}
template < class Key, class Value >
-void BPlusTree< Key, Value >::Replace( Key nPos, const Value& rValue )
+void DenseBPlusTree< Key, Value >::Replace( Key nPos, const Value& rValue )
{
- // TODO
+ assert( m_pRoot->m_nUsed > 0 );
+
+ NodeWithIndex aLeaf = searchLeaf( nPos, NULL );
+
+ aLeaf.pNode->m_pValues[ aLeaf.nIndex ] = rValue;
}
template < class Key, class Value >
-const Value& BPlusTree< Key, Value >::operator[]( Key nPos ) const
+const Value& DenseBPlusTree< Key, Value >::operator[]( Key nPos ) const
{
assert( m_pRoot->m_nUsed > 0 );
- BPlusTreeNode< Key, Value > *pNode = m_pRoot;
+ NodeWithIndex aLeaf = searchLeaf( nPos, NULL );
- // recursion is nice for the alg. description, but for implementation, we
- // want to unwind it
- while ( pNode->m_bIsInternal )
- {
- for ( int i = 0; i < pNode->m_nUsed - 1 ; ++i )
- {
- if ( nPos < pNode->m_pKeys[ i ] )
- {
- pNode = pNode->m_pChildren[ i ];
- break;
- }
- }
- pNode = pNode->m_pChildren[ pNode->m_nUsed - 1 ];
- }
-
- // now we have the leaf node
- for ( int i = 0; i < pNode->m_nUsed; ++i )
- {
- if ( nPos == pNode->m_pKeys[ i ] )
- return pNode->m_pValues[ i ];
- }
-
- // we do not allow not finding a value so far
- assert( false );
+ return aLeaf.pNode->m_pValues[ aLeaf.nIndex ];
}
template < class Key, class Value >
-void BPlusTree< Key, Value >::ForEach( FnForEach fn, void* pArgs )
+void DenseBPlusTree< Key, Value >::ForEach( FnForEach fn, void* pArgs )
{
// TODO
}
template < class Key, class Value >
-void BPlusTree< Key, Value >::ForEach( Key nStart, Key nEnd, FnForEach fn, void* pArgs )
+void DenseBPlusTree< Key, Value >::ForEach( Key nStart, Key nEnd, FnForEach fn, void* pArgs )
{
// TODO
}
+template < class Key, class Value >
+typename DenseBPlusTree< Key, Value >::NodeWithIndex DenseBPlusTree< Key, Value >::searchLeaf( Key nPos, std::stack< NodeWithIndex > *pParents )
+{
+ if ( m_pRoot->m_nUsed == 0 )
+ return NodeWithIndex( m_pRoot, -1 );
+
+ TreeNode *pNode = m_pRoot;
+
+ // recursion is nice for the alg. description, but for implementation, we
+ // want to unwind it
+ while ( pNode->m_bIsInternal )
+ {
+ int i = 0;
+ while ( i < pNode->m_nUsed - 1 && pNode->m_pKeys[ i ] <= nPos )
+ ++i;
+
+ // m_pKeys in children are relative
+ if ( i > 0 )
+ nPos -= pNode->m_pKeys[ i - 1 ];
+
+ if ( pParents )
+ pParents->push_back( NodeWithIndex( pNode, i ) );
+
+ pNode = pNode->m_pChildren[ i ];
+ }
+
+ // now we have the leaf node, check that we are not out of bounds
+ assert( nPos < pNode->m_nUsed );
+
+ return NodeWithIndex( pNode, nPos );
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/inc/densebplustree.hxx b/sw/inc/densebplustree.hxx
index c42d5d1e89ce..cf0c9ff0571a 100644
--- a/sw/inc/densebplustree.hxx
+++ b/sw/inc/densebplustree.hxx
@@ -24,30 +24,46 @@
#include <osl/diagnose.h>
#include <swdllapi.h>
-template < class Key, class Value > struct BPlusTreeNode;
+#include <stack>
-/** B+ Tree implementation (to replace the original BigPtrArray).
+template < class Key, class Value > struct DenseBPlusTreeNode;
-For more information about B+ Tree, please see eg. wikipedia:
+/** Dense B+ tree implementation (to replace the original BigPtrArray).
+
+This structure is a modification of a B+ tree, see eg. wikipedia:
http://en.wikipedia.org/wiki/B%2B_tree
+for the B+ tree implementation.
+
+The problem with 'classic' B+ tree is that it is designed for key/value access
+where the key typically is not a sequence of numbers. Consequently, it does
+not easily allow inserting in a sense that 'the rest of the values shift to
+the right' - but that is the operation that we need to do effectively.
+
+We do a small modification to the B+ tree implementation to make it 'dense' -
+the keys are supposed to be a sequence, and if you insert a value, all shifts
+to the right. The trick to achieve it is that the values that are stored in
+the internal nodes are not absolute, but are relative; so whatever is in the
+parents is propagated to the children. That way, shifting half of the tree by
+1 to the right after insertion is just a matter of adding 1 to the appropriate
+parent.
As part of the removal of BigPtrArray (and consequent refactor of the related
-code), this BPlusTree is supposed to be a drop-in replacement, with some of
+code), this structur is supposed to be a drop-in replacement, with some of
the functionality templatized for easier use.
Key is sal_uLong in the BigPtrArray implementation.
Value is supposed to be SwNodePtr initially.
*/
template < class Key, class Value >
-class SW_DLLPUBLIC BPlusTree
+class SW_DLLPUBLIC DenseBPlusTree
{
public:
/// Callback function to be called during ForEach.
typedef bool (*FnForEach)( const Value&, void* pArgs );
public:
- BPlusTree();
- ~BPlusTree();
+ DenseBPlusTree();
+ ~DenseBPlusTree();
/// Number of elements.
Key Count() const;
@@ -74,7 +90,25 @@ public:
void ForEach( Key nStart, Key nEnd, FnForEach fn, void* pArgs = NULL );
private:
- BPlusTreeNode< Key, Value > *m_pRoot;
+ typedef DenseBPlusTreeNode< Key, Value > TreeNode;
+
+ /// We need to know the exact path from the root to the leaf, including the indexes for various operations
+ struct NodeWithIndex {
+ TreeNode *pNode;
+ Key nIndex;
+
+ NodeWithIndex( TreeNode *p, Key n ) : pNode( p ), nIndex( n ) {}
+ };
+
+ /// Root of the tree.
+ TreeNode *m_pRoot;
+
+ /** Search for the leaf node containing nPos.
+
+ @return the leaf node containing nPos
+ @param pParents stack of parents of the returned tree node so that we can traverse it back to the root
+ */
+ NodeWithIndex searchLeaf( Key nPos, std::stack< NodeWithIndex > *pParents = NULL );
};
#endif // SW_BPLUSTREE_HXX