greenplumn CGroup 源码

  • 2022-08-18
  • 浏览 (9)

greenplumn CGroup 代码


//	Greenplum Database
//	Copyright (C) 2009 - 2011 EMC Corp.
//	@filename:
//		CGroup.h
//	@doc:
//		Group of equivalent expressions in the Memo structure
#ifndef GPOPT_CGroup_H
#define GPOPT_CGroup_H

#include "gpos/base.h"
#include "gpos/common/CDynamicPtrArray.h"
#include "gpos/common/CSyncHashtable.h"
#include "gpos/common/CSyncList.h"

#include "gpopt/base/CCostContext.h"
#include "gpopt/base/COptimizationContext.h"
#include "gpopt/base/CReqdPropPlan.h"
#include "gpopt/operators/CLogical.h"
#include "gpopt/search/CJobQueue.h"
#include "gpopt/search/CTreeMap.h"

#define GPOPT_INVALID_GROUP_ID gpos::ulong_max

namespace gpopt
using namespace gpos;
using namespace gpnaucrates;

// forward declarations
class CGroup;
class CGroupExpression;
class CDrvdProp;
class CDrvdPropCtxtPlan;
class CReqdPropRelational;
class CExpression;

// type definitions
// array of groups
using CGroupArray = CDynamicPtrArray<CGroup, CleanupNULL>;

// map required plan props to cost lower bound of corresponding plan
using ReqdPropPlanToCostMap =
	CHashMap<CReqdPropPlan, CCost, CReqdPropPlan::UlHashForCostBounding,
			 CleanupRelease<CReqdPropPlan>, CleanupDelete<CCost>>;

// optimization levels in ascending order,
// under a given optimization context, group expressions in higher levels
// must be optimized before group expressions in lower levels,
// a group expression sets its level in CGroupExpression::SetOptimizationLevel()
enum EOptimizationLevel
	EolLow = 0,	 // low optimization level, this is the default level
	EolHigh,	 // high optimization level


//	@class:
//		CGroup
//	@doc:
//		Group of equivalent expressions in the Memo structure
class CGroup : public CRefCount, public DbgPrintMixin<CGroup>
	friend class CGroupProxy;

	// type definition of optimization context hash table
	using ShtOC = CSyncHashtable<COptimizationContext, COptimizationContext>;

	// states of a group
	enum EState
		estUnexplored,	// initial state

		estExploring,  // ongoing exploration
		estExplored,   // done exploring

		estImplementing,  // ongoing implementation
		estImplemented,	  // done implementing

		estOptimizing,	// ongoing optimization
		estOptimized,	// done optimizing


	// definition of hash table iter
	using ShtIter =
		CSyncHashtableIter<COptimizationContext, COptimizationContext>;

	// definition of hash table iter accessor
	using ShtAccIter =
		CSyncHashtableAccessByIter<COptimizationContext, COptimizationContext>;

	// definition of hash table accessor
	using ShtAcc =
		CSyncHashtableAccessByKey<COptimizationContext, COptimizationContext>;

	//	@class:
	//		SContextLink
	//	@doc:
	//		Internal structure to remember processed links in plan enumeration
	struct SContextLink
		// cost context in a parent group
		CCostContext *m_pccParent;

		// index used when treating current group as a child of group expression
		ULONG m_ulChildIndex;

		// optimization context used to locate group expressions in
		// current group to be linked with parent group expression
		COptimizationContext *m_poc;

		// ctor
		SContextLink(CCostContext *pccParent, ULONG child_index,
					 COptimizationContext *poc);

		// dtor
		virtual ~SContextLink();

		// hash function
		static ULONG HashValue(const SContextLink *pclink);

		// equality function
		static BOOL Equals(const SContextLink *pclink1,
						   const SContextLink *pclink2);

	};	// struct SContextLink

	// map of processed links in TreeMap structure
	using LinkMap = CHashMap<SContextLink, BOOL, SContextLink::HashValue,
							 SContextLink::Equals, CleanupDelete<SContextLink>,

	// map of computed stats objects during costing
	using OptCtxtToIStatisticsMap = CHashMap<
		COptimizationContext, IStatistics, COptimizationContext::UlHashForStats,
		CleanupRelease<COptimizationContext>, CleanupRelease<IStatistics>>;

	// memory pool
	CMemoryPool *m_mp;

	// id is used when printing memo contents
	ULONG m_id;

	// true if group hold scalar expressions
	BOOL m_fScalar;

	// join keys for outer child (only for scalar groups) (used by hash & merge joins)
	CExpressionArray *m_pdrgpexprJoinKeysOuter;

	// join keys for inner child (only for scalar groups) (used by hash & merge joins)
	CExpressionArray *m_pdrgpexprJoinKeysInner;

	// join op families (only for scalar groups) (used by hash & merge joins)
	IMdIdArray *m_join_opfamilies;

	// list of group expressions
	CList<CGroupExpression> m_listGExprs;

	// list of duplicate group expressions identified by group merge
	CList<CGroupExpression> m_listDupGExprs;

	// group derived properties
	CDrvdProp *m_pdp;

	// group stats
	IStatistics *m_pstats;

	// scalar expression for stat derivation (subqueries substituted with a dummy)
	CExpression *m_pexprScalarRep;

	// scalar expression above is exactly the same as the scalar expr in the group
	BOOL m_pexprScalarRepIsExact;

	// dummy cost context used in scalar groups for plan enumeration
	CCostContext *m_pccDummy;

	// pointer to group containing the group expressions
	// of all duplicate groups
	CGroup *m_pgroupDuplicate;

	// map of processed links
	LinkMap *m_plinkmap;

	// map of computed stats during costing
	OptCtxtToIStatisticsMap *m_pstatsmap;

	// hashtable of optimization contexts
	ShtOC m_sht;

	// number of group expressions
	ULONG m_ulGExprs;

	// map of cost lower bounds
	ReqdPropPlanToCostMap *m_pcostmap;

	// number of optimization contexts
	ULONG_PTR m_ulpOptCtxts;

	// current state
	EState m_estate;

	// maximum optimization level of member group expressions
	EOptimizationLevel m_eolMax;

	// were new logical operators added to the group?
	BOOL m_fHasNewLogicalOperators;

	// the id of the CTE producer (if any)
	ULONG m_ulCTEProducerId;

	// does the group have any CTE consumer
	BOOL m_fCTEConsumer;

	// exploration job queue
	CJobQueue m_jqExploration;

	// implementation job queue
	CJobQueue m_jqImplementation;

	// cleanup optimization contexts on destruction
	void CleanupContexts();

	// increment number of optimization contexts
		return m_ulpOptCtxts++;

	// the following functions are only accessed through group proxy

	// setter of group id
	void SetId(ULONG id);

	// setter of group state
	void SetState(EState estNewState);

	// set hash join keys
	void SetJoinKeys(CExpressionArray *pdrgpexprOuter,
					 CExpressionArray *pdrgpexprInner,
					 IMdIdArray *join_opfamilies);

	// insert new group expression
	void Insert(CGroupExpression *pgexpr);

	// move duplicate group expression to duplicates list
	void MoveDuplicateGExpr(CGroupExpression *pgexpr);

	// initialize group's properties
	void InitProperties(CDrvdProp *pdp);

	// initialize group's stats
	void InitStats(IStatistics *stats);

	// retrieve first group expression
	CGroupExpression *PgexprFirst();

	// retrieve next group expression
	CGroupExpression *PgexprNext(CGroupExpression *pgexpr);

	// return true if first promise is better than second promise
	static BOOL FBetterPromise(CMemoryPool *mp, CLogical::EStatPromise espFst,
							   CGroupExpression *pgexprFst,
							   CLogical::EStatPromise espSnd,
							   CGroupExpression *pgexprSnd);

	// derive stats recursively on child groups
	static CLogical::EStatPromise EspDerive(CMemoryPool *pmpLocal,
											CMemoryPool *pmpGlobal,
											CGroupExpression *pgexpr,
											CReqdPropRelational *prprel,
											IStatisticsArray *stats_ctxt,
											BOOL fDeriveChildStats);

	// reset computed stats
	void ResetStats();

	// helper function to add links in child groups
	static void RecursiveBuildTreeMap(
		CMemoryPool *mp, COptimizationContext *poc, CCostContext *pccParent,
		CGroupExpression *pgexprCurrent, ULONG child_index,
		CTreeMap<CCostContext, CExpression, CDrvdPropCtxtPlan,
				 CCostContext::HashValue, CCostContext::Equals> *ptmap);

	// print scalar group properties
	IOstream &OsPrintGrpScalarProps(IOstream &os, const CHAR *szPrefix) const;

	// print group properties
	IOstream &OsPrintGrpProps(IOstream &os, const CHAR *szPrefix) const;

	// print group optimization contexts
	IOstream &OsPrintGrpOptCtxts(IOstream &os, const CHAR *szPrefix) const;

	// initialize and return empty stats for this group
	IStatistics *PstatsInitEmpty(CMemoryPool *pmpGlobal);

	// find the group expression having the best stats promise
	CGroupExpression *PgexprBestPromise(CMemoryPool *pmpLocal,
										CMemoryPool *pmpGlobal,
										CReqdPropRelational *prprelInput,
										IStatisticsArray *stats_ctxt);

	CGroup(const CGroup &) = delete;

	// ctor
	CGroup(CMemoryPool *mp, BOOL fScalar = false);

	// dtor
	~CGroup() override;

	// id accessor
	Id() const
		return m_id;

	// group properties accessor
	CDrvdProp *
	Pdp() const
		return m_pdp;

	// group stats accessor
	IStatistics *Pstats() const;

	// attempt initializing stats with the given stat object
	BOOL FInitStats(IStatistics *stats);

	// append given stats object to group stats
	void AppendStats(CMemoryPool *mp, IStatistics *stats);

	// accessor of maximum optimization level of member group expressions
	EolMax() const
		return m_eolMax;

	// does group hold scalar expressions ?
	FScalar() const
		return m_fScalar;

	// join keys of outer child
	CExpressionArray *
	PdrgpexprJoinKeysOuter() const
		return m_pdrgpexprJoinKeysOuter;

	// join keys of inner child
	CExpressionArray *
	PdrgpexprJoinKeysInner() const
		return m_pdrgpexprJoinKeysInner;

	// join op families
	IMdIdArray *
	JoinOpfamilies() const
		return m_join_opfamilies;

	// return a representative cached scalar expression usable for stat derivation etc.
	CExpression *
	PexprScalarRep() const
		return m_pexprScalarRep;

	// is the value returned by PexprScalarRep() exact?
	FScalarRepIsExact() const
		return m_pexprScalarRepIsExact;

	// return dummy cost context for scalar group
	CCostContext *
	PccDummy() const

		return m_pccDummy;

	// hash function
	ULONG HashValue() const;

	// number of group expressions accessor
	UlGExprs() const
		return m_ulGExprs;

	// optimization contexts hash table accessor
	ShtOC &
		return m_sht;

	// exploration job queue accessor
	CJobQueue *
		return &m_jqExploration;

	// implementation job queue accessor
	CJobQueue *
		return &m_jqImplementation;

	// has group been explored?
	FExplored() const
		return estExplored <= m_estate;

	// has group been implemented?
	FImplemented() const
		return estImplemented <= m_estate;

	// has group been optimized?
	FOptimized() const
		return estOptimized <= m_estate;

	// were new logical operators added to the group?
	FHasNewLogicalOperators() const
		return m_fHasNewLogicalOperators;

	// reset has new logical operators flag
		m_fHasNewLogicalOperators = false;

	// reset group state
	void ResetGroupState();

	// Check if we need to reset computed stats
	BOOL FResetStats();

	// returns true if stats can be derived on this group
	BOOL FStatsDerivable(CMemoryPool *mp);

	// reset group job queues
	void ResetGroupJobQueues();

	// check if group has duplicates
	FDuplicateGroup() const
		return nullptr != m_pgroupDuplicate;

	// duplicate group accessor
	CGroup *
	PgroupDuplicate() const
		return m_pgroupDuplicate;

	// resolve master duplicate group;
	// this is the group that will host all expressions in current group after merging
	void ResolveDuplicateMaster();

	// add duplicate group
	void AddDuplicateGrp(CGroup *pgroup);

	// merge group with its duplicate - not thread-safe
	void MergeGroup();

	// lookup a given context in contexts hash table
	COptimizationContext *PocLookup(CMemoryPool *mp, CReqdPropPlan *prpp,
									ULONG ulSearchStageIndex);

	// lookup the best context across all stages for the given required properties
	COptimizationContext *PocLookupBest(CMemoryPool *mp, ULONG ulSearchStages,
										CReqdPropPlan *prpp);

	// find a context by id
	COptimizationContext *Ppoc(ULONG id) const;

	// insert given context into contexts hash table
	COptimizationContext *PocInsert(COptimizationContext *poc);

	// update the best group cost under the given optimization context
	void UpdateBestCost(COptimizationContext *poc, CCostContext *pcc);

	// lookup best expression under given optimization context
	CGroupExpression *PgexprBest(COptimizationContext *poc);

	// materialize a scalar expression for stat derivation if this is a scalar group
	void CreateScalarExpression();

	// materialize a dummy cost context attached to the first group expression
	void CreateDummyCostContext();

	// return the CTE producer ID in the group (if any)
	UlCTEProducerId() const
		return m_ulCTEProducerId;

	// check if there are any CTE producers in the group
	FHasCTEProducer() const
		return (gpos::ulong_max != m_ulCTEProducerId);

	// check if there are any CTE consumers in the group
	FHasAnyCTEConsumer() const
		return m_fCTEConsumer;

	// derive statistics recursively on group
	IStatistics *PstatsRecursiveDerive(CMemoryPool *pmpLocal,
									   CMemoryPool *pmpGlobal,
									   CReqdPropRelational *prprel,
									   IStatisticsArray *stats_ctxt);

	// find group expression with best stats promise and the same given children
	CGroupExpression *PgexprBestPromise(CMemoryPool *mp,
										CGroupExpression *pgexprToMatch);

	// link parent group expression to group members
	void BuildTreeMap(
		CMemoryPool *mp, COptimizationContext *poc, CCostContext *pccParent,
		ULONG child_index,
		CTreeMap<CCostContext, CExpression, CDrvdPropCtxtPlan,
				 CCostContext::HashValue, CCostContext::Equals> *ptmap);

	// reset link map used in plan enumeration
	void ResetLinkMap();

	// retrieve the group expression containing a CTE Consumer operator
	CGroupExpression *PgexprAnyCTEConsumer();

	// compute stats during costing
	IStatistics *PstatsCompute(COptimizationContext *poc,
							   CExpressionHandle &exprhdl,
							   CGroupExpression *pgexpr);

	// compute cost lower bound for the plan satisfying given required properties
	CCost CostLowerBound(CMemoryPool *mp, CReqdPropPlan *prppInput);

	// matching of pairs of arrays of groups
	static BOOL FMatchGroups(CGroupArray *pdrgpgroupFst,
							 CGroupArray *pdrgpgroupSnd);

	// matching of pairs of arrays of groups while skipping scalar groups
	static BOOL FMatchNonScalarGroups(CGroupArray *pdrgpgroupFst,
									  CGroupArray *pdrgpgroupSnd);

	// determine if a pair of groups are duplicates
	static BOOL FDuplicateGroups(CGroup *pgroupFst, CGroup *pgroupSnd);

	// print function
	IOstream &OsPrint(IOstream &os) const;

	// slink for group list in memo
	SLink m_link;

	// debug print; for interactive debugging sessions only
	void DbgPrintWithProperties();

};	// class CGroup

}  // namespace gpopt

#endif	// !GPOPT_CGroup_H

// EOF


greenplumn 源码目录


greenplumn CBinding 源码

greenplumn CGroupExpression 源码

greenplumn CGroupProxy 源码

greenplumn CJob 源码

greenplumn CJobFactory 源码

greenplumn CJobGroup 源码

greenplumn CJobGroupExploration 源码

greenplumn CJobGroupExpression 源码

greenplumn CJobGroupExpressionExploration 源码

greenplumn CJobGroupExpressionImplementation 源码

0  赞