greenplumn CTreeMap 源码

  • 2022-08-18
  • 浏览 (206)

greenplumn CTreeMap 代码

文件路径:/src/backend/gporca/libgpopt/include/gpopt/search/CTreeMap.h

//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright (C) 2011 EMC Corp.
//
//	@filename:
//		CTreeMap.h
//
//	@doc:
//		Map of tree components to count, rank, and unrank abstract trees;
//
//		For description of algorithm, see also:
//
//			F. Waas, C. Galindo-Legaria, "Counting, Enumerating, and
//			Sampling of Execution Plans in a Cost-Based Query Optimizer",
//			ACM SIGMOD, 2000
//---------------------------------------------------------------------------
#ifndef GPOPT_CTreeMap_H
#define GPOPT_CTreeMap_H

#include "gpos/base.h"
#include "gpos/common/CHashMap.h"
#include "gpos/common/CHashMapIter.h"


namespace gpopt
{
using namespace gpos;

//---------------------------------------------------------------------------
//	@class:
//		CTreeMap
//
//	@doc:
//		Lookup table for counting/unranking of trees;
//
//		Enables client to associate objects of type T (e.g. CGroupExpression)
//		with a topology solely given by edges between the object. The
//		unranking utilizes client provided function and generates results of
//		type R (e.g., CExpression);
//		U is a global context accessible to recursive rehydrate calls.
//		Pointers to objects of type U are passed through PrUnrank calls to the
//		rehydrate function of type PrFn.
//
//---------------------------------------------------------------------------
template <class T, class R, class U, ULONG (*HashFn)(const T *),
		  BOOL (*EqFn)(const T *, const T *)>
class CTreeMap
{
	// array of source pointers (sources owned by 3rd party)
	using DrgPt = CDynamicPtrArray<T, CleanupNULL>;

	// array of result pointers (results owned by the tree we unrank)
	using DrgPr = CDynamicPtrArray<R, CleanupRelease<R>>;

	// generic rehydrate function
	using PrFn = R *(*) (CMemoryPool *, T *, DrgPr *, U *);

private:
	// fwd declaration
	class CTreeNode;

	// arrays of internal nodes
	using CTreeNodeArray = CDynamicPtrArray<CTreeNode, CleanupNULL>;
	using CTreeNode2dArray = CDynamicPtrArray<CTreeNodeArray, CleanupRelease>;

	//---------------------------------------------------------------------------
	//	@class:
	//		STreeLink
	//
	//	@doc:
	//		Internal structure to monitor tree links for duplicate detection
	//		purposes
	//
	//---------------------------------------------------------------------------
	struct STreeLink
	{
	private:
		// parent node
		const T *m_ptParent;

		// child index
		ULONG m_ulChildIndex;

		// child node
		const T *m_ptChild;

	public:
		// ctor
		STreeLink(const T *ptParent, ULONG child_index, const T *ptChild)
			: m_ptParent(ptParent),
			  m_ulChildIndex(child_index),
			  m_ptChild(ptChild)
		{
			GPOS_ASSERT(nullptr != ptParent);
			GPOS_ASSERT(nullptr != ptChild);
		}

		// dtor
		virtual ~STreeLink() = default;

		// hash function
		static ULONG
		HashValue(const STreeLink *ptlink)
		{
			ULONG ulHashParent = HashFn(ptlink->m_ptParent);
			ULONG ulHashChild = HashFn(ptlink->m_ptChild);
			ULONG ulHashChildIndex =
				gpos::HashValue<ULONG>(&ptlink->m_ulChildIndex);

			return CombineHashes(ulHashParent,
								 CombineHashes(ulHashChild, ulHashChildIndex));
		}

		// equality function
		static BOOL
		Equals(const STreeLink *ptlink1, const STreeLink *ptlink2)
		{
			return EqFn(ptlink1->m_ptParent, ptlink2->m_ptParent) &&
				   EqFn(ptlink1->m_ptChild, ptlink2->m_ptChild) &&
				   ptlink1->m_ulChildIndex == ptlink2->m_ulChildIndex;
		}
	};	// struct STreeLink

	//---------------------------------------------------------------------------
	//	@class:
	//		CTreeNode
	//
	//	@doc:
	//		Internal structure to manage source objects and their topology
	//
	//---------------------------------------------------------------------------
	class CTreeNode
	{
	private:
		// state of tree node during counting alternatives
		enum ENodeState
		{
			EnsUncounted,  // counting not initiated
			EnsCounting,   // counting in progress
			EnsCounted,	   // counting complete

			EnsSentinel
		};

		// memory pool
		CMemoryPool *m_mp;

		// id of node
		ULONG m_ul;

		// element
		const T *m_value;

		// array of children arrays
		CTreeNode2dArray *m_pdrgdrgptn;

		// number of trees rooted in this node
		ULLONG m_ullCount;

		// number of incoming edges
		ULONG m_ulIncoming;

		// node state used for counting alternatives
		ENodeState m_ens;

		// total tree count for a given child
		ULLONG
		UllCount(ULONG ulChild)
		{
			GPOS_CHECK_STACK_SIZE;

			ULLONG ull = 0;

			ULONG ulCandidates = (*m_pdrgdrgptn)[ulChild]->Size();
			for (ULONG ulAlt = 0; ulAlt < ulCandidates; ulAlt++)
			{
				CTreeNode *ptn = (*(*m_pdrgdrgptn)[ulChild])[ulAlt];
				ULLONG ullCount = ptn->UllCount();
				ull = gpos::Add(ull, ullCount);
			}

			return ull;
		}

		// rehydrate tree
		R *
		PrUnrank(CMemoryPool *mp, PrFn prfn, U *pU, ULONG ulChild,
				 ULLONG ullRank)
		{
			GPOS_CHECK_STACK_SIZE;
			GPOS_ASSERT(ullRank < UllCount(ulChild));

			CTreeNodeArray *pdrgptn = (*m_pdrgdrgptn)[ulChild];
			ULONG ulCandidates = pdrgptn->Size();

			CTreeNode *ptn = nullptr;

			for (ULONG ul = 0; ul < ulCandidates; ul++)
			{
				ptn = (*pdrgptn)[ul];
				ULLONG ullLocalCount = ptn->UllCount();

				if (ullRank < ullLocalCount)
				{
					// ullRank is now local rank for the child
					break;
				}

				ullRank -= ullLocalCount;
			}

			GPOS_ASSERT(nullptr != ptn);
			return ptn->PrUnrank(mp, prfn, pU, ullRank);
		}

	public:
		// ctor
		CTreeNode(CMemoryPool *mp, ULONG ul, const T *value)
			: m_mp(mp),
			  m_ul(ul),
			  m_value(value),
			  m_pdrgdrgptn(nullptr),
			  m_ullCount(gpos::ullong_max),
			  m_ulIncoming(0),
			  m_ens(EnsUncounted)
		{
			m_pdrgdrgptn = GPOS_NEW(mp) CTreeNode2dArray(mp);
		}

		// dtor
		~CTreeNode()
		{
			m_pdrgdrgptn->Release();
		}

		// add child alternative
		void
		Add(ULONG ulPos, CTreeNode *ptn)
		{
			GPOS_ASSERT(!FCounted() &&
						"Adding edges after counting not meaningful");

			// insert any child arrays skipped so far; make sure we have a dense
			// array up to the position of ulPos
			ULONG length = m_pdrgdrgptn->Size();
			for (ULONG ul = length; ul <= ulPos; ul++)
			{
				CTreeNodeArray *pdrg = GPOS_NEW(m_mp) CTreeNodeArray(m_mp);
				m_pdrgdrgptn->Append(pdrg);
			}

			// increment count of incoming edges
			ptn->m_ulIncoming++;

			// insert to appropriate array
			CTreeNodeArray *pdrg = (*m_pdrgdrgptn)[ulPos];
			GPOS_ASSERT(nullptr != pdrg);
			pdrg->Append(ptn);
		}

		// accessor
		const T *
		Value() const
		{
			return m_value;
		}

		// number of trees rooted in this node
		ULLONG
		UllCount()
		{
			GPOS_CHECK_STACK_SIZE;

			GPOS_ASSERT(EnsCounting != m_ens && "cycle in graph detected");

			if (!FCounted())
			{
				// initiate counting on current node
				m_ens = EnsCounting;

				ULLONG ullCount = 1;

				ULONG arity = m_pdrgdrgptn->Size();
				for (ULONG ulChild = 0; ulChild < arity; ulChild++)
				{
					ULLONG ull = UllCount(ulChild);
					if (0 == ull)
					{
						// if current child has no alternatives, the parent cannot have alternatives
						ullCount = 0;
						break;
					}

					// otherwise, multiply number of child alternatives by current count
					ullCount = gpos::Multiply(ullCount, ull);
				}

				// counting is complete
				m_ullCount = ullCount;
				m_ens = EnsCounted;
			}

			return m_ullCount;
		}

		// check if count has been determined for this node
		BOOL
		FCounted() const
		{
			return (EnsCounted == m_ens);
		}

		// number of incoming edges
		ULONG
		UlIncoming() const
		{
			return m_ulIncoming;
		}

		// unrank tree of a given rank with a given rehydrate function
		R *
		PrUnrank(CMemoryPool *mp, PrFn prfn, U *pU, ULLONG ullRank)
		{
			GPOS_CHECK_STACK_SIZE;

			R *pr = nullptr;

			if (0 == this->m_ul)
			{
				// global root, just unrank 0-th child
				pr = PrUnrank(mp, prfn, pU, 0 /* ulChild */, ullRank);
			}
			else
			{
				DrgPr *pdrg = GPOS_NEW(mp) DrgPr(mp);

				ULLONG ullRankRem = ullRank;

				ULONG ulChildren = m_pdrgdrgptn->Size();
				for (ULONG ulChild = 0; ulChild < ulChildren; ulChild++)
				{
					ULLONG ullLocalCount = UllCount(ulChild);
					GPOS_ASSERT(0 < ullLocalCount);
					ULLONG ullLocalRank = ullRankRem % ullLocalCount;

					pdrg->Append(PrUnrank(mp, prfn, pU, ulChild, ullLocalRank));

					ullRankRem /= ullLocalCount;
				}

				pr = prfn(mp, const_cast<T *>(this->Value()), pdrg, pU);
			}

			return pr;
		}

#ifdef GPOS_DEBUG

		// debug print
		IOstream &
		OsPrint(IOstream &os)
		{
			ULONG ulChildren = m_pdrgdrgptn->Size();

			os << "=== Node " << m_ul << " [" << *Value()
			   << "] ===" << std::endl
			   << "# children: " << ulChildren << std::endl
			   << "# count: " << this->UllCount() << std::endl;

			for (ULONG ul = 0; ul < ulChildren; ul++)
			{
				os << "--- child: #" << ul << " ---" << std::endl;
				ULONG ulAlt = (*m_pdrgdrgptn)[ul]->Size();

				for (ULONG ulChild = 0; ulChild < ulAlt; ulChild++)
				{
					CTreeNode *ptn = (*(*m_pdrgdrgptn)[ul])[ulChild];
					os << "  -> " << ptn->m_ul << " [" << *ptn->Value() << "]"
					   << std::endl;
				}
			}

			return os;
		}

#endif	// GPOS_DEBUG
	};

	// memory pool
	CMemoryPool *m_mp;

	// counter for nodes
	ULONG m_ulCountNodes;

	// counter for links
	ULONG m_ulCountLinks;

	// rehydrate function
	PrFn m_prfn;

	// universal root (internally used only)
	CTreeNode *m_ptnRoot;

	// map of all nodes
	using TMap = gpos::CHashMap<T, CTreeNode, HashFn, EqFn, CleanupNULL,
								CleanupDelete<CTreeNode>>;
	using TMapIter = gpos::CHashMapIter<T, CTreeNode, HashFn, EqFn, CleanupNULL,
										CleanupDelete<CTreeNode>>;

	// map of created links
	using LinkMap =
		CHashMap<STreeLink, BOOL, STreeLink::HashValue, STreeLink::Equals,
				 CleanupDelete<STreeLink>, CleanupDelete<BOOL>>;

	TMap *m_ptmap;

	// map of nodes to outgoing links
	LinkMap *m_plinkmap;

	// recursive count starting in given node
	ULLONG UllCount(CTreeNode *ptn);

	// Convert to corresponding treenode, create treenode as necessary
	CTreeNode *
	Ptn(const T *value)
	{
		GPOS_ASSERT(nullptr != value);
		CTreeNode *ptn = m_ptmap->Find(value);

		if (nullptr == ptn)
		{
			ptn = GPOS_NEW(m_mp) CTreeNode(m_mp, ++m_ulCountNodes, value);
			(void) m_ptmap->Insert(const_cast<T *>(value), ptn);
		}

		return ptn;
	}

	// private copy ctor
	CTreeMap(const CTreeMap &);

public:
	// ctor
	CTreeMap(CMemoryPool *mp, PrFn prfn)
		: m_mp(mp),
		  m_ulCountNodes(0),
		  m_ulCountLinks(0),
		  m_prfn(prfn),
		  m_ptnRoot(nullptr),
		  m_ptmap(nullptr),
		  m_plinkmap(nullptr)
	{
		GPOS_ASSERT(nullptr != mp);
		GPOS_ASSERT(nullptr != prfn);

		m_ptmap = GPOS_NEW(mp) TMap(mp);
		m_plinkmap = GPOS_NEW(mp) LinkMap(mp);

		// insert dummy node as global root -- the only node with NULL payload
		m_ptnRoot =
			GPOS_NEW(mp) CTreeNode(mp, 0 /* ulCounter */, nullptr /* value */);
	}

	// dtor
	~CTreeMap()
	{
		m_ptmap->Release();
		m_plinkmap->Release();

		GPOS_DELETE(m_ptnRoot);
	}

	// insert edge as n-th child
	void
	Insert(const T *ptParent, ULONG ulPos, const T *ptChild)
	{
		GPOS_ASSERT(ptParent != ptChild);

		// exit function if link already exists
		STreeLink *ptlink = GPOS_NEW(m_mp) STreeLink(ptParent, ulPos, ptChild);
		if (nullptr != m_plinkmap->Find(ptlink))
		{
			GPOS_DELETE(ptlink);
			return;
		}

		CTreeNode *ptnParent = Ptn(ptParent);
		CTreeNode *ptnChild = Ptn(ptChild);

		ptnParent->Add(ulPos, ptnChild);
		++m_ulCountLinks;

		// add created link to links map
		BOOL fInserted GPOS_ASSERTS_ONLY =
			m_plinkmap->Insert(ptlink, GPOS_NEW(m_mp) BOOL(true));
		GPOS_ASSERT(fInserted);
	}

	// insert a root node
	void
	InsertRoot(const T *value)
	{
		GPOS_ASSERT(NULL != value);
		GPOS_ASSERT(NULL != m_ptnRoot);

		// add logical root as 0-th child to global root
		m_ptnRoot->Add(0 /*ulPos*/, Ptn(value));
	}

	// count all possible combinations
	ULLONG
	UllCount()
	{
		// first, hookup all logical root nodes to the global root
		TMapIter mi(m_ptmap);
		ULONG ulNodes = 0;
		for (ulNodes = 0; mi.Advance(); ulNodes++)
		{
			CTreeNode *ptn = const_cast<CTreeNode *>(mi.Value());

			if (0 == ptn->UlIncoming())
			{
				// add logical root as 0-th child to global root
				m_ptnRoot->Add(0 /*ulPos*/, ptn);
			}
		}

		// special case of empty map
		if (0 == ulNodes)
		{
			return 0;
		}

		return m_ptnRoot->UllCount();
	}

	// unrank a specific tree
	R *
	PrUnrank(CMemoryPool *mp, U *pU, ULLONG ullRank) const
	{
		return m_ptnRoot->PrUnrank(mp, m_prfn, pU, ullRank);
	}

	// return number of nodes
	ULONG
	UlNodes() const
	{
		return m_ulCountNodes;
	}

	// return number of links
	ULONG
	UlLinks() const
	{
		return m_ulCountLinks;
	}

#ifdef GPOS_DEBUG

	// retrieve count for individual element
	ULLONG
	UllCount(const T *value)
	{
		CTreeNode *ptn = m_ptmap->Find(value);
		GPOS_ASSERT(nullptr != ptn);

		return ptn->UllCount();
	}

	// debug print of entire map
	IOstream &
	OsPrint(IOstream &os) const
	{
		TMapIter mi(m_ptmap);
		ULONG ulNodes = 0;
		for (ulNodes = 0; mi.Advance(); ulNodes++)
		{
			CTreeNode *ptn = const_cast<CTreeNode *>(mi.Value());
			(void) ptn->OsPrint(os);
		}

		os << "total number of nodes: " << ulNodes << std::endl;

		return os;
	}

#endif	// GPOS_DEBUG

};	// class CTreeMap

}  // namespace gpopt

#endif	// !GPOPT_CTreeMap_H

// EOF

相关信息

greenplumn 源码目录

相关文章

greenplumn CBinding 源码

greenplumn CGroup 源码

greenplumn CGroupExpression 源码

greenplumn CGroupProxy 源码

greenplumn CJob 源码

greenplumn CJobFactory 源码

greenplumn CJobGroup 源码

greenplumn CJobGroupExploration 源码

greenplumn CJobGroupExpression 源码

greenplumn CJobGroupExpressionExploration 源码

0  赞