greenplumn CXformUtils 源码

  • 2022-08-18
  • 浏览 (290)

greenplumn CXformUtils 代码

文件路径:/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp

//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright 2012 EMC Corp.
//
//	@filename:
//		CXformUtils.cpp
//
//	@doc:
//		Implementation of xform utilities
//---------------------------------------------------------------------------


#include "gpopt/xforms/CXformUtils.h"

#include "gpos/base.h"
#include "gpos/error/CMessage.h"
#include "gpos/error/CMessageRepository.h"
#include "gpos/memory/CAutoMemoryPool.h"

#include "gpopt/base/CConstraintConjunction.h"
#include "gpopt/base/CConstraintNegation.h"
#include "gpopt/base/CKeyCollection.h"
#include "gpopt/base/CUtils.h"
#include "gpopt/exception.h"
#include "gpopt/operators/CExpressionHandle.h"
#include "gpopt/operators/CLogicalAssert.h"
#include "gpopt/operators/CLogicalBitmapTableGet.h"
#include "gpopt/operators/CLogicalCTEConsumer.h"
#include "gpopt/operators/CLogicalCTEProducer.h"
#include "gpopt/operators/CLogicalDynamicBitmapTableGet.h"
#include "gpopt/operators/CLogicalDynamicGet.h"
#include "gpopt/operators/CLogicalGbAgg.h"
#include "gpopt/operators/CLogicalGbAggDeduplicate.h"
#include "gpopt/operators/CLogicalGet.h"
#include "gpopt/operators/CLogicalInnerJoin.h"
#include "gpopt/operators/CLogicalNAryJoin.h"
#include "gpopt/operators/CLogicalPartitionSelector.h"
#include "gpopt/operators/CLogicalRowTrigger.h"
#include "gpopt/operators/CLogicalSelect.h"
#include "gpopt/operators/CLogicalSequenceProject.h"
#include "gpopt/operators/CPhysicalInnerHashJoin.h"
#include "gpopt/operators/CScalarAssertConstraint.h"
#include "gpopt/operators/CScalarAssertConstraintList.h"
#include "gpopt/operators/CScalarBitmapBoolOp.h"
#include "gpopt/operators/CScalarBitmapIndexProbe.h"
#include "gpopt/operators/CScalarCmp.h"
#include "gpopt/operators/CScalarDMLAction.h"
#include "gpopt/operators/CScalarIdent.h"
#include "gpopt/operators/CScalarIf.h"
#include "gpopt/operators/CScalarProjectElement.h"
#include "gpopt/operators/CScalarProjectList.h"
#include "gpopt/operators/CScalarSubquery.h"
#include "gpopt/operators/CScalarSubqueryAll.h"
#include "gpopt/operators/CScalarSubqueryQuantified.h"
#include "gpopt/operators/CScalarValuesList.h"
#include "gpopt/operators/CScalarWindowFunc.h"
#include "gpopt/optimizer/COptimizerConfig.h"
#include "gpopt/search/CGroupExpression.h"
#include "gpopt/search/CGroupProxy.h"
#include "gpopt/xforms/CDecorrelator.h"
#include "gpopt/xforms/CSubqueryHandler.h"
#include "gpopt/xforms/CXformExploration.h"
#include "naucrates/base/CDatumInt8GPDB.h"
#include "naucrates/md/CMDIdGPDB.h"
#include "naucrates/md/CMDTriggerGPDB.h"
#include "naucrates/md/IMDCheckConstraint.h"
#include "naucrates/md/IMDScalarOp.h"
#include "naucrates/md/IMDTrigger.h"
#include "naucrates/md/IMDTypeBool.h"
#include "naucrates/md/IMDTypeInt4.h"
#include "naucrates/md/IMDTypeInt8.h"
#include "naucrates/md/IMDTypeOid.h"
#include "naucrates/statistics/CFilterStatsProcessor.h"

using namespace gpopt;

// predicates less selective than this threshold
// (selectivity is greater than this number) lead to
// disqualification of a btree index on an AO table
#define AO_TABLE_BTREE_INDEX_SELECTIVITY_THRESHOLD 0.10

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::ExfpLogicalJoin2PhysicalJoin
//
//	@doc:
//		Check the applicability of logical join to physical join xform
//
//---------------------------------------------------------------------------
CXform::EXformPromise
CXformUtils::ExfpLogicalJoin2PhysicalJoin(CExpressionHandle &exprhdl)
{
	// if scalar predicate has a subquery, we must have an
	// equivalent logical Apply expression created during exploration;
	// no need for generating a physical join
	if (exprhdl.DeriveHasSubquery(2))
	{
		return CXform::ExfpNone;
	}

	return CXform::ExfpHigh;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::ExfpSemiJoin2CrossProduct
//
//	@doc:
//		Check the applicability of semi join to cross product xform
//
//---------------------------------------------------------------------------
CXform::EXformPromise
CXformUtils::ExfpSemiJoin2CrossProduct(CExpressionHandle &exprhdl)
{
#ifdef GPOS_DEBUG
	COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
#endif	// GPOS_DEBUG
	GPOS_ASSERT(COperator::EopLogicalLeftSemiJoin == op_id ||
				COperator::EopLogicalLeftAntiSemiJoin == op_id ||
				COperator::EopLogicalLeftAntiSemiJoinNotIn == op_id);

	CColRefSet *pcrsUsed = exprhdl.DeriveUsedColumns(2);
	CColRefSet *pcrsOuterOutput =
		exprhdl.DeriveOutputColumns(0 /*child_index*/);
	if (0 == pcrsUsed->Size() || !pcrsOuterOutput->ContainsAll(pcrsUsed))
	{
		// xform is inapplicable of join predicate uses columns from join's inner child
		return CXform::ExfpNone;
	}

	return CXform::ExfpHigh;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::ExfpExpandJoinOrder
//
//	@doc:
//		Check the applicability of N-ary join expansion
//
//---------------------------------------------------------------------------
CXform::EXformPromise
CXformUtils::ExfpExpandJoinOrder(CExpressionHandle &exprhdl,
								 const CXform *xform)
{
	// With optimizer_join_order set to 'query' or 'exhaustive', the
	// 'query' join order will expand the join even if it contains
	// outer refs, using another method to get the promise.
	// Therefore we also allow expansion for 'exhaustive2'
	// when we have outer refs.
	if (exprhdl.DeriveHasSubquery(exprhdl.Arity() - 1) ||
		(exprhdl.HasOuterRefs() &&
		 CXform::ExfExpandNAryJoinDPv2 != xform->Exfid()))
	{
		// subqueries must be unnested before applying xform
		return CXform::ExfpNone;
	}

#ifdef GPOS_DEBUG
	CAutoMemoryPool amp;
	GPOS_ASSERT(!FJoinPredOnSingleChild(amp.Pmp(), exprhdl) &&
				"join predicates are not pushed down");
#endif	// GPOS_DEBUG

	if (nullptr != exprhdl.Pgexpr())
	{
		// if handle is attached to a group expression, transformation is applied
		// to the Memo and we need to check if stats are derivable on child groups
		CGroup *pgroup = exprhdl.Pgexpr()->Pgroup();
		CAutoMemoryPool amp;
		CMemoryPool *mp = amp.Pmp();
		if (!pgroup->FStatsDerivable(mp))
		{
			// stats must be derivable before applying xforms
			return CXform::ExfpNone;
		}

		const ULONG arity = exprhdl.Arity();
		for (ULONG ul = 0; ul < arity; ul++)
		{
			CGroup *pgroupChild = (*exprhdl.Pgexpr())[ul];
			if (!pgroupChild->FScalar() && !pgroupChild->FStatsDerivable(mp))
			{
				// stats must be derivable on every child
				return CXform::ExfpNone;
			}
		}
	}

	return CXform::ExfpHigh;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FInlinableCTE
//
//	@doc:
//		Check whether a CTE should be inlined
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FInlinableCTE(ULONG ulCTEId)
{
	CCTEInfo *pcteinfo = COptCtxt::PoctxtFromTLS()->Pcteinfo();
	CExpression *pexprProducer = pcteinfo->PexprCTEProducer(ulCTEId);
	GPOS_ASSERT(nullptr != pexprProducer);
	CFunctionProp *pfp = pexprProducer->DeriveFunctionProperties();

	CPartInfo *ppartinfoCTEProducer = pexprProducer->DerivePartitionInfo();
	GPOS_ASSERT(nullptr != ppartinfoCTEProducer);

	return IMDFunction::EfsVolatile > pfp->Efs() &&
		   !pfp->NeedsSingletonExecution() &&
		   (0 == ppartinfoCTEProducer->UlConsumers() ||
			1 == pcteinfo->UlConsumers(ulCTEId));
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PcrsFKey
//
//	@doc:
//		Helper to construct a foreign key by collecting columns that appear
//		in equality predicates with primary key columns;
//		return NULL if no foreign key could be constructed
//
//---------------------------------------------------------------------------
CColRefSet *
CXformUtils::PcrsFKey(
	CMemoryPool *mp,
	CExpressionArray *pdrgpexpr,  // array of scalar conjuncts
	CColRefSet *prcsOutput,		  // output columns of outer expression
	CColRefSet *pcrsKey			  // a primary key of a inner expression
)
{
	GPOS_ASSERT(nullptr != pdrgpexpr);
	GPOS_ASSERT(nullptr != pcrsKey);
	GPOS_ASSERT(nullptr != prcsOutput);

	// collected columns that are part of primary key and used in equality predicates
	CColRefSet *pcrsKeyParts = GPOS_NEW(mp) CColRefSet(mp);

	// FK columns
	CColRefSet *pcrsFKey = GPOS_NEW(mp) CColRefSet(mp);
	const ULONG ulConjuncts = pdrgpexpr->Size();
	for (ULONG ul = 0; ul < ulConjuncts; ul++)
	{
		CExpression *pexprConjunct = (*pdrgpexpr)[ul];
		if (!CPredicateUtils::FPlainEquality(pexprConjunct))
		{
			continue;
		}

		CColRef *pcrFst = const_cast<CColRef *>(
			CScalarIdent::PopConvert((*pexprConjunct)[0]->Pop())->Pcr());
		CColRef *pcrSnd = const_cast<CColRef *>(
			CScalarIdent::PopConvert((*pexprConjunct)[1]->Pop())->Pcr());
		if (pcrsKey->FMember(pcrFst) && prcsOutput->FMember(pcrSnd))
		{
			pcrsKeyParts->Include(pcrFst);
			pcrsFKey->Include(pcrSnd);
		}
		else if (pcrsKey->FMember(pcrSnd) && prcsOutput->FMember(pcrFst))
		{
			pcrsFKey->Include(pcrFst);
			pcrsKeyParts->Include(pcrSnd);
		}
	}

	// check if collected key parts constitute a primary key
	if (!pcrsKeyParts->Equals(pcrsKey))
	{
		// did not succeeded in building foreign key
		pcrsFKey->Release();
		pcrsFKey = nullptr;
	}
	pcrsKeyParts->Release();

	return pcrsFKey;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PcrsFKey
//
//	@doc:
//		Return a foreign key pointing from outer expression to inner
//		expression;
//		return NULL if no foreign key could be extracted
//
//---------------------------------------------------------------------------
CColRefSet *
CXformUtils::PcrsFKey(CMemoryPool *mp, CExpression *pexprOuter,
					  CExpression *pexprInner, CExpression *pexprScalar)
{
	// get inner expression key
	CKeyCollection *pkc = pexprInner->DeriveKeyCollection();
	if (nullptr == pkc)
	{
		// inner expression has no key
		return nullptr;
	}
	// get outer expression output columns
	CColRefSet *prcsOutput = pexprOuter->DeriveOutputColumns();

	CExpressionArray *pdrgpexpr =
		CPredicateUtils::PdrgpexprConjuncts(mp, pexprScalar);
	CColRefSet *pcrsFKey = nullptr;

	const ULONG ulKeys = pkc->Keys();
	for (ULONG ulKey = 0; ulKey < ulKeys; ulKey++)
	{
		CColRefArray *pdrgpcrKey = pkc->PdrgpcrKey(mp, ulKey);

		CColRefSet *pcrsKey = GPOS_NEW(mp) CColRefSet(mp);
		pcrsKey->Include(pdrgpcrKey);
		pdrgpcrKey->Release();

		// attempt to construct a foreign key based on current primary key
		pcrsFKey = PcrsFKey(mp, pdrgpexpr, prcsOutput, pcrsKey);
		pcrsKey->Release();

		if (nullptr != pcrsFKey)
		{
			// succeeded in finding FK
			break;
		}
	}

	pdrgpexpr->Release();

	return pcrsFKey;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprRedundantSelectForDynamicIndex
//
//	@doc:
// 		Add a redundant SELECT node on top of Dynamic (Bitmap) IndexGet to
//		to be able to use index predicate in partition elimination
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprRedundantSelectForDynamicIndex(
	CMemoryPool *mp,
	CExpression *
		pexpr  // input expression is a dynamic (bitmap) IndexGet with an optional Select on top
)
{
	GPOS_ASSERT(nullptr != pexpr);

	COperator::EOperatorId op_id = pexpr->Pop()->Eopid();
	GPOS_ASSERT(COperator::EopLogicalDynamicIndexGet == op_id ||
				COperator::EopLogicalDynamicBitmapTableGet == op_id ||
				COperator::EopLogicalSelect == op_id);

	CExpression *pexprRedundantScalar = nullptr;
	if (COperator::EopLogicalDynamicIndexGet == op_id ||
		COperator::EopLogicalDynamicBitmapTableGet == op_id)
	{
		// no residual predicate, use index lookup predicate only
		pexpr->AddRef();
		// reuse index lookup predicate
		(*pexpr)[0]->AddRef();
		pexprRedundantScalar = (*pexpr)[0];

		return GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CLogicalSelect(mp),
										pexpr, pexprRedundantScalar);
	}

	// there is a residual predicate in a SELECT node on top of DynamicIndexGet,
	// we create a conjunction of both residual predicate and index lookup predicate
	CExpression *pexprChild = (*pexpr)[0];
#ifdef GPOS_DEBUG
	COperator::EOperatorId eopidChild = pexprChild->Pop()->Eopid();
	GPOS_ASSERT(COperator::EopLogicalDynamicIndexGet == eopidChild ||
				COperator::EopLogicalDynamicBitmapTableGet == eopidChild);
#endif	// GPOS_DEBUG

	CExpression *pexprIndexLookupPred = (*pexprChild)[0];
	CExpression *pexprResidualPred = (*pexpr)[1];
	pexprRedundantScalar = CPredicateUtils::PexprConjunction(
		mp, pexprIndexLookupPred, pexprResidualPred);

	pexprChild->AddRef();

	return GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CLogicalSelect(mp),
									pexprChild, pexprRedundantScalar);
}


#ifdef GPOS_DEBUG
//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FSwapableJoinType
//
//	@doc:
//		Check whether the given join type is swapable
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FSwapableJoinType(COperator::EOperatorId op_id)
{
	return (COperator::EopLogicalLeftSemiJoin == op_id ||
			COperator::EopLogicalLeftAntiSemiJoin == op_id ||
			COperator::EopLogicalLeftAntiSemiJoinNotIn == op_id ||
			COperator::EopLogicalInnerJoin == op_id);
}
#endif	// GPOS_DEBUG

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprSwapJoins
//
//	@doc:
//		Compute a swap of the two given joins;
//		the function returns null if swapping cannot be done
//
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprSwapJoins(CMemoryPool *mp, CExpression *pexprTopJoin,
							CExpression *pexprBottomJoin)
{
#ifdef GPOS_DEBUG
	COperator::EOperatorId eopidTop = pexprTopJoin->Pop()->Eopid();
	COperator::EOperatorId eopidBottom = pexprBottomJoin->Pop()->Eopid();
#endif	// GPOS_DEBUG

	GPOS_ASSERT(FSwapableJoinType(eopidTop) && FSwapableJoinType(eopidBottom));
	GPOS_ASSERT_IMP(
		COperator::EopLogicalInnerJoin == eopidTop,
		COperator::EopLogicalLeftSemiJoin == eopidBottom ||
			COperator::EopLogicalLeftAntiSemiJoin == eopidBottom ||
			COperator::EopLogicalLeftAntiSemiJoinNotIn == eopidBottom);

	// get used columns by the join predicate of top join
	CColRefSet *pcrsUsed = (*pexprTopJoin)[2]->DeriveUsedColumns();

	// get output columns of bottom join's children
	const CColRefSet *pcrsBottomOuter =
		(*pexprBottomJoin)[0]->DeriveOutputColumns();
	const CColRefSet *pcrsBottomInner =
		(*pexprBottomJoin)[1]->DeriveOutputColumns();

	BOOL fDisjointWithBottomOuter = pcrsUsed->IsDisjoint(pcrsBottomOuter);
	BOOL fDisjointWithBottomInner = pcrsUsed->IsDisjoint(pcrsBottomInner);
	if (!fDisjointWithBottomOuter && !fDisjointWithBottomInner)
	{
		// top join uses columns from both children of bottom join;
		// join swap is not possible
		return nullptr;
	}

	CExpression *pexprChild = (*pexprBottomJoin)[0];
	CExpression *pexprChildOther = (*pexprBottomJoin)[1];
	if (fDisjointWithBottomOuter && !fDisjointWithBottomInner)
	{
		pexprChild = (*pexprBottomJoin)[1];
		pexprChildOther = (*pexprBottomJoin)[0];
	}

	CExpression *pexprRight = (*pexprTopJoin)[1];
	CExpression *pexprScalar = (*pexprTopJoin)[2];
	COperator *pop = pexprTopJoin->Pop();
	pop->AddRef();
	pexprChild->AddRef();
	pexprRight->AddRef();
	pexprScalar->AddRef();
	CExpression *pexprNewBottomJoin =
		GPOS_NEW(mp) CExpression(mp, pop, pexprChild, pexprRight, pexprScalar);

	pexprScalar = (*pexprBottomJoin)[2];
	pop = pexprBottomJoin->Pop();
	pop->AddRef();
	pexprChildOther->AddRef();
	pexprScalar->AddRef();

	// return a new expression with the two joins swapped
	return GPOS_NEW(mp)
		CExpression(mp, pop, pexprNewBottomJoin, pexprChildOther, pexprScalar);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprPushGbBelowJoin
//
//	@doc:
//		Push a Gb, optionally with a having clause below the child join;
//		if push down fails, the function returns NULL expression
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprPushGbBelowJoin(CMemoryPool *mp, CExpression *pexpr)
{
	COperator::EOperatorId op_id = pexpr->Pop()->Eopid();

	GPOS_ASSERT(COperator::EopLogicalGbAgg == op_id ||
				COperator::EopLogicalGbAggDeduplicate == op_id ||
				COperator::EopLogicalSelect == op_id);

	CExpression *pexprSelect = nullptr;
	CExpression *pexprGb = pexpr;
	if (COperator::EopLogicalSelect == op_id)
	{
		pexprSelect = pexpr;
		pexprGb = (*pexpr)[0];
	}

	CExpression *pexprJoin = (*pexprGb)[0];
	CExpression *pexprPrjList = (*pexprGb)[1];
	CExpression *pexprOuter = (*pexprJoin)[0];
	CExpression *pexprInner = (*pexprJoin)[1];
	CExpression *pexprScalar = (*pexprJoin)[2];
	CLogicalGbAgg *popGbAgg = CLogicalGbAgg::PopConvert(pexprGb->Pop());

	CColRefSet *pcrsOuterOutput = pexprOuter->DeriveOutputColumns();
	CColRefSet *pcrsAggOutput = pexprGb->DeriveOutputColumns();
	CColRefSet *pcrsUsed = pexprPrjList->DeriveUsedColumns();
	CColRefSet *pcrsFKey = PcrsFKey(mp, pexprOuter, pexprInner, pexprScalar);

	CColRefSet *pcrsScalarFromOuter =
		GPOS_NEW(mp) CColRefSet(mp, *(pexprScalar->DeriveUsedColumns()));
	pcrsScalarFromOuter->Intersection(pcrsOuterOutput);

	// use minimal grouping columns if they exist, otherwise use all grouping columns
	CColRefSet *pcrsGrpCols = GPOS_NEW(mp) CColRefSet(mp);
	CColRefArray *colref_array = popGbAgg->PdrgpcrMinimal();
	if (nullptr == colref_array)
	{
		colref_array = popGbAgg->Pdrgpcr();
	}
	pcrsGrpCols->Include(colref_array);

	BOOL fCanPush = FCanPushGbAggBelowJoin(pcrsGrpCols, pcrsOuterOutput,
										   pcrsScalarFromOuter, pcrsAggOutput,
										   pcrsUsed, pcrsFKey);

	// cleanup
	CRefCount::SafeRelease(pcrsFKey);
	pcrsScalarFromOuter->Release();

	if (!fCanPush)
	{
		pcrsGrpCols->Release();

		return nullptr;
	}

	// here, we know that grouping columns include FK and all used columns by Gb
	// come only from the outer child of the join;
	// we can safely push Gb to be on top of join's outer child

	popGbAgg->AddRef();
	CLogicalGbAgg *popGbAggNew =
		PopGbAggPushableBelowJoin(mp, popGbAgg, pcrsOuterOutput, pcrsGrpCols);
	pcrsGrpCols->Release();

	pexprOuter->AddRef();
	pexprPrjList->AddRef();
	CExpression *pexprNewGb =
		GPOS_NEW(mp) CExpression(mp, popGbAggNew, pexprOuter, pexprPrjList);

	CExpression *pexprNewOuter = pexprNewGb;
	if (nullptr != pexprSelect)
	{
		// add Select node on top of Gb
		(*pexprSelect)[1]->AddRef();
		pexprNewOuter =
			CUtils::PexprLogicalSelect(mp, pexprNewGb, (*pexprSelect)[1]);
	}

	COperator *popJoin = pexprJoin->Pop();
	popJoin->AddRef();
	pexprInner->AddRef();
	pexprScalar->AddRef();

	return GPOS_NEW(mp)
		CExpression(mp, popJoin, pexprNewOuter, pexprInner, pexprScalar);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PopGbAggPushableBelowJoin
//
//	@doc:
//		Create the Gb operator to be pushed below a join given the original Gb
//		operator, output columns of the join's outer child and grouping cols
//
//---------------------------------------------------------------------------
CLogicalGbAgg *
CXformUtils::PopGbAggPushableBelowJoin(CMemoryPool *mp,
									   CLogicalGbAgg *popGbAggOld,
									   CColRefSet *pcrsOutputOuter,
									   CColRefSet *pcrsGrpCols)
{
	GPOS_ASSERT(nullptr != popGbAggOld);
	GPOS_ASSERT(nullptr != pcrsOutputOuter);
	GPOS_ASSERT(nullptr != pcrsGrpCols);

	CLogicalGbAgg *popGbAggNew = popGbAggOld;
	if (!pcrsOutputOuter->ContainsAll(pcrsGrpCols))
	{
		// we have grouping columns from both join children;
		// we can drop grouping columns from the inner join child since
		// we already have a FK in grouping columns

		popGbAggNew->Release();
		CColRefSet *pcrsGrpColsNew = GPOS_NEW(mp) CColRefSet(mp);
		pcrsGrpColsNew->Include(pcrsGrpCols);
		pcrsGrpColsNew->Intersection(pcrsOutputOuter);
		if (COperator::EopLogicalGbAggDeduplicate == popGbAggOld->Eopid())
		{
			CColRefArray *pdrgpcrKeys =
				CLogicalGbAggDeduplicate::PopConvert(popGbAggOld)
					->PdrgpcrKeys();
			pdrgpcrKeys->AddRef();
			popGbAggNew = GPOS_NEW(mp) CLogicalGbAggDeduplicate(
				mp, pcrsGrpColsNew->Pdrgpcr(mp), popGbAggOld->Egbaggtype(),
				pdrgpcrKeys);
		}
		else
		{
			popGbAggNew = GPOS_NEW(mp) CLogicalGbAgg(
				mp, pcrsGrpColsNew->Pdrgpcr(mp), popGbAggOld->Egbaggtype());
		}
		pcrsGrpColsNew->Release();
	}

	return popGbAggNew;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FCanPushGbAggBelowJoin
//
//	@doc:
//		Check if the preconditions for pushing down Group by through join are
//		satisfied
//---------------------------------------------------------------------------
BOOL
CXformUtils::FCanPushGbAggBelowJoin(CColRefSet *pcrsGrpCols,
									CColRefSet *pcrsJoinOuterChildOutput,
									CColRefSet *pcrsJoinScalarUsedFromOuter,
									CColRefSet *pcrsGrpByOutput,
									CColRefSet *pcrsGrpByUsed,
									CColRefSet *pcrsFKey)
{
	BOOL fGrpByProvidesUsedColumns =
		pcrsGrpByOutput->ContainsAll(pcrsJoinScalarUsedFromOuter);

	BOOL fHasFK = (nullptr != pcrsFKey);
	BOOL fGrpColsContainFK = (fHasFK && pcrsGrpCols->ContainsAll(pcrsFKey));
	BOOL fOutputColsContainUsedCols =
		pcrsJoinOuterChildOutput->ContainsAll(pcrsGrpByUsed);

	if (!fHasFK || !fGrpColsContainFK || !fOutputColsContainUsedCols ||
		!fGrpByProvidesUsedColumns)
	{
		// GrpBy cannot be pushed through join because
		// (1) no FK exists, or
		// (2) FK exists but grouping columns do not include it, or
		// (3) Gb uses columns from both join children, or
		// (4) Gb hides columns required for the join scalar child

		return false;
	}

	return true;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FSameDatatype
//
//	@doc:
//		Check if the input columns of the outer child are the same as the
//		aligned input columns of the each of the inner children.
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FSameDatatype(CColRef2dArray *pdrgpdrgpcrInput)
{
	GPOS_ASSERT(1 < pdrgpdrgpcrInput->Size());

	CColRefArray *pdrgpcrOuter = (*pdrgpdrgpcrInput)[0];

	ULONG ulColIndex = pdrgpcrOuter->Size();
	ULONG child_index = pdrgpdrgpcrInput->Size();
	for (ULONG ulColCounter = 0; ulColCounter < ulColIndex; ulColCounter++)
	{
		CColRef *pcrOuter = (*pdrgpcrOuter)[ulColCounter];

		for (ULONG ulChildCounter = 1; ulChildCounter < child_index;
			 ulChildCounter++)
		{
			CColRefArray *pdrgpcrInnner = (*pdrgpdrgpcrInput)[ulChildCounter];
			CColRef *pcrInner = (*pdrgpcrInnner)[ulColCounter];

			GPOS_ASSERT(pcrOuter != pcrInner);

			if (pcrInner->RetrieveType() != pcrOuter->RetrieveType())
			{
				return false;
			}
		}
	}

	return true;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::ExistentialToAgg
//
//	@doc:
//		Helper for creating Agg expression equivalent to an existential subquery
//
//		Example:
//			For 'exists(select * from r where a = 10)', we produce the following:
//			New Subquery: (select count(*) as cc from r where a = 10)
//			New Scalar: cc > 0
//
//---------------------------------------------------------------------------
void
CXformUtils::ExistentialToAgg(
	CMemoryPool *mp, CExpression *pexprSubquery,
	CExpression **ppexprNewSubquery,  // output argument for new scalar subquery
	CExpression **ppexprNewScalar  // output argument for new scalar expression
)
{
	GPOS_ASSERT(CUtils::FExistentialSubquery(pexprSubquery->Pop()));
	GPOS_ASSERT(nullptr != ppexprNewSubquery);
	GPOS_ASSERT(nullptr != ppexprNewScalar);

	COperator::EOperatorId op_id = pexprSubquery->Pop()->Eopid();
	CExpression *pexprInner = (*pexprSubquery)[0];
	IMDType::ECmpType ecmptype = IMDType::EcmptG;
	if (COperator::EopScalarSubqueryNotExists == op_id)
	{
		ecmptype = IMDType::EcmptEq;
	}

	pexprInner->AddRef();
	CExpression *pexprInnerNew = CUtils::PexprCountStar(mp, pexprInner);
	const CColRef *pcrCount =
		CScalarProjectElement::PopConvert((*(*pexprInnerNew)[1])[0]->Pop())
			->Pcr();

	*ppexprNewSubquery = GPOS_NEW(mp) CExpression(
		mp,
		GPOS_NEW(mp) CScalarSubquery(mp, pcrCount, true /*fGeneratedByExist*/,
									 false /*fGeneratedByQuantified*/),
		pexprInnerNew);
	*ppexprNewScalar =
		CUtils::PexprCmpWithZero(mp, CUtils::PexprScalarIdent(mp, pcrCount),
								 pcrCount->RetrieveType()->MDId(), ecmptype);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::QuantifiedToAgg
//
//	@doc:
//		Helper for creating Agg expression equivalent to a quantified subquery,
//
//
//---------------------------------------------------------------------------
void
CXformUtils::QuantifiedToAgg(
	CMemoryPool *mp, CExpression *pexprSubquery,
	CExpression **ppexprNewSubquery,  // output argument for new scalar subquery
	CExpression **ppexprNewScalar  // output argument for new scalar expression
)
{
	GPOS_ASSERT(CUtils::FQuantifiedSubquery(pexprSubquery->Pop()));
	GPOS_ASSERT(nullptr != ppexprNewSubquery);
	GPOS_ASSERT(nullptr != ppexprNewScalar);

	if (COperator::EopScalarSubqueryAll == pexprSubquery->Pop()->Eopid())
	{
		return SubqueryAllToAgg(mp, pexprSubquery, ppexprNewSubquery,
								ppexprNewScalar);
	}

	return SubqueryAnyToAgg(mp, pexprSubquery, ppexprNewSubquery,
							ppexprNewScalar);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::SubqueryAnyToAgg
//
//	@doc:
//		Helper for transforming SubqueryAll into aggregate subquery,
//		we need to differentiate between two cases:
//		(1) if subquery predicate uses nullable columns, we may produce null values,
//			this is handeled by adding a null indicator to subquery column, and
//			counting the number of subquery results with null value in subquery column,
//		(2) if subquery predicate does not use nullable columns, we can only produce
//			boolean values,
//
//		Examples:
//
//			- For 'b1 in (select b2 from R)', with b2 not-nullable and is a well-known operator, we produce the following:
//			* New Subquery: (select count(*) as cc from R where b1=b2))
//			* New Scalar: cc > 0
//
//			- For 'b1 in (select b2 from R)', with b2 nullable, we produce the following:
//			* New Subquery: (select Prj_cc from (select count(*), sum(null_indic) from R where b1=b2))
//			where Prj_cc is a project for column cc, defined as follows:
//				if (count(*) == 0), then cc = 0,
//				else if (count(*) == sum(null_indic)), then cc = -1,
//				else cc = count(*)
//			where (-1) indicates that subquery produced a null (this is replaced by NULL in
//			SubqueryHandler when unnesting to LeftApply)
//			* New Scalar: cc > 0
//
//
//---------------------------------------------------------------------------
void
CXformUtils::SubqueryAnyToAgg(
	CMemoryPool *mp, CExpression *pexprSubquery,
	CExpression **ppexprNewSubquery,  // output argument for new scalar subquery
	CExpression **ppexprNewScalar  // output argument for new scalar expression
)
{
	GPOS_ASSERT(CUtils::FQuantifiedSubquery(pexprSubquery->Pop()));
	GPOS_ASSERT(COperator::EopScalarSubqueryAny ==
				pexprSubquery->Pop()->Eopid());
	GPOS_ASSERT(nullptr != ppexprNewSubquery);
	GPOS_ASSERT(nullptr != ppexprNewScalar);

	CExpression *pexprInner = (*pexprSubquery)[0];

	// build subquery quantified comparison
	CExpression *pexprResult = nullptr;
	CSubqueryHandler sh(mp, false /* fEnforceCorrelatedApply */);
	CExpression *pexprSubqPred =
		sh.PexprSubqueryPred(pexprInner, pexprSubquery, &pexprResult,
							 CSubqueryHandler::EsqctxtFilter);
	CScalarCmp *scalarCmp = CScalarCmp::PopConvert(pexprSubqPred->Pop());

	GPOS_ASSERT(nullptr != scalarCmp);

	const CColRef *pcrSubq =
		CScalarSubqueryQuantified::PopConvert(pexprSubquery->Pop())->Pcr();
	BOOL fCanEvaluateToNull =
		(CUtils::FUsesNullableCol(mp, pexprSubqPred, pexprResult) ||
		 !CPredicateUtils::FBuiltInComparisonIsVeryStrict(scalarCmp->MdIdOp()));

	CExpression *pexprInnerNew = nullptr;
	pexprInner->AddRef();
	if (fCanEvaluateToNull)
	{
		// TODO: change this to <pexprSubqPred> is not false, get rid of pexprNullIndicator
		// add a null indicator
		CExpression *pexprNullIndicator =
			PexprNullIndicator(mp, CUtils::PexprScalarIdent(mp, pcrSubq));
		CExpression *pexprPrj =
			CUtils::PexprAddProjection(mp, pexprResult, pexprNullIndicator);
		pexprResult = pexprPrj;

		// add disjunction with is not null check
		CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
		pdrgpexpr->Append(pexprSubqPred);
		pdrgpexpr->Append(
			CUtils::PexprIsNull(mp, CUtils::PexprScalarIdent(mp, pcrSubq)));

		pexprSubqPred = CPredicateUtils::PexprDisjunction(mp, pdrgpexpr);
	}

	CExpression *pexprSelect =
		CUtils::PexprLogicalSelect(mp, pexprResult, pexprSubqPred);
	if (fCanEvaluateToNull)
	{
		const CColRef *pcrNullIndicator =
			CScalarProjectElement::PopConvert(
				(*(*(*pexprSelect)[0])[1])[0]->Pop())
				->Pcr();
		pexprInnerNew =
			CUtils::PexprCountStarAndSum(mp, pcrNullIndicator, pexprSelect);
		const CColRef *pcrCount =
			CScalarProjectElement::PopConvert((*(*pexprInnerNew)[1])[0]->Pop())
				->Pcr();
		const CColRef *pcrSum =
			CScalarProjectElement::PopConvert((*(*pexprInnerNew)[1])[1]->Pop())
				->Pcr();

		CExpression *pexprScalarIdentCount =
			CUtils::PexprScalarIdent(mp, pcrCount);
		CExpression *pexprCountEqZero = CUtils::PexprCmpWithZero(
			mp, pexprScalarIdentCount,
			CScalarIdent::PopConvert(pexprScalarIdentCount->Pop())->MdidType(),
			IMDType::EcmptEq);
		CExpression *pexprCountEqSum =
			CUtils::PexprScalarEqCmp(mp, pcrCount, pcrSum);

		CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
		const IMDTypeInt8 *pmdtypeint8 = md_accessor->PtMDType<IMDTypeInt8>();
		IMDId *pmdidInt8 = pmdtypeint8->MDId();
		pmdidInt8->AddRef();
		pmdidInt8->AddRef();

		CExpression *pexprProjected = GPOS_NEW(mp) CExpression(
			mp, GPOS_NEW(mp) CScalarIf(mp, pmdidInt8), pexprCountEqZero,
			CUtils::PexprScalarConstInt8(mp, 0 /*val*/),
			GPOS_NEW(mp) CExpression(
				mp, GPOS_NEW(mp) CScalarIf(mp, pmdidInt8), pexprCountEqSum,
				CUtils::PexprScalarConstInt8(mp, -1 /*val*/),
				CUtils::PexprScalarIdent(mp, pcrCount)));
		CExpression *pexprPrj =
			CUtils::PexprAddProjection(mp, pexprInnerNew, pexprProjected);

		const CColRef *pcrSubquery =
			CScalarProjectElement::PopConvert((*(*pexprPrj)[1])[0]->Pop())
				->Pcr();
		*ppexprNewSubquery = GPOS_NEW(mp) CExpression(
			mp,
			GPOS_NEW(mp)
				CScalarSubquery(mp, pcrSubquery, false /*fGeneratedByExist*/,
								true /*fGeneratedByQuantified*/),
			pexprPrj);
		*ppexprNewScalar = CUtils::PexprCmpWithZero(
			mp, CUtils::PexprScalarIdent(mp, pcrSubquery),
			pcrSubquery->RetrieveType()->MDId(), IMDType::EcmptG);
	}
	else
	{
		// replace <col1> <op> ANY (select <col2> from SQ) with
		//         (select count(*) from (select )) > 0
		pexprInnerNew = CUtils::PexprCountStar(mp, pexprSelect);
		const CColRef *pcrCount =
			CScalarProjectElement::PopConvert((*(*pexprInnerNew)[1])[0]->Pop())
				->Pcr();

		*ppexprNewSubquery = GPOS_NEW(mp) CExpression(
			mp,
			GPOS_NEW(mp)
				CScalarSubquery(mp, pcrCount, false /*fGeneratedByExist*/,
								true /*fGeneratedByQuantified*/),
			pexprInnerNew);
		*ppexprNewScalar = CUtils::PexprCmpWithZero(
			mp, CUtils::PexprScalarIdent(mp, pcrCount),
			pcrCount->RetrieveType()->MDId(), IMDType::EcmptG);
	}
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::SubqueryAllToAgg
//
//	@doc:
//		Helper for transforming SubqueryAll into aggregate subquery,
//		we generate aggregate expressions that compute the following values:
//		- N: number of null values returned by evaluating inner expression
//		- S: number of inner values matching outer value
//		the generated subquery returns a Boolean result generated by the following
//		nested-if statement:
//
//			if (inner is empty)
//				return true
//			else if (N > 0)
//				return null
//			else if (outer value is null)
//				return null
//			else if (S == 0)
//				return true
//			else
//				return false
//
//
//---------------------------------------------------------------------------
void
CXformUtils::SubqueryAllToAgg(
	CMemoryPool *mp, CExpression *pexprSubquery,
	CExpression **ppexprNewSubquery,  // output argument for new scalar subquery
	CExpression **ppexprNewScalar  // output argument for new scalar expression
)
{
	GPOS_ASSERT(CUtils::FQuantifiedSubquery(pexprSubquery->Pop()));
	GPOS_ASSERT(COperator::EopScalarSubqueryAll ==
				pexprSubquery->Pop()->Eopid());
	GPOS_ASSERT(nullptr != ppexprNewSubquery);
	GPOS_ASSERT(nullptr != ppexprNewScalar);

	CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();

	CExpression *pexprInner = (*pexprSubquery)[0];
	CExpression *pexprScalarOuter = (*pexprSubquery)[1];
	CExpression *pexprSubqPred = PexprInversePred(mp, pexprSubquery);

	// generate subquery test expression
	const IMDTypeInt4 *pmdtypeint4 = md_accessor->PtMDType<IMDTypeInt4>();
	IMDId *pmdidInt4 = pmdtypeint4->MDId();
	pmdidInt4->AddRef();
	CExpression *pexprSubqTest = GPOS_NEW(mp)
		CExpression(mp, GPOS_NEW(mp) CScalarIf(mp, pmdidInt4), pexprSubqPred,
					CUtils::PexprScalarConstInt4(mp, 1 /*val*/),
					CUtils::PexprScalarConstInt4(mp, 0 /*val*/));

	CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
	pdrgpexpr->Append(pexprSubqTest);

	// generate null indicator for inner expression
	const CColRef *pcrSubq =
		CScalarSubqueryQuantified::PopConvert(pexprSubquery->Pop())->Pcr();
	CExpression *pexprInnerNullIndicator =
		PexprNullIndicator(mp, CUtils::PexprScalarIdent(mp, pcrSubq));
	pdrgpexpr->Append(pexprInnerNullIndicator);

	// add generated expression as projected nodes
	pexprInner->AddRef();
	CExpression *pexprPrj =
		CUtils::PexprAddProjection(mp, pexprInner, pdrgpexpr);
	pdrgpexpr->Release();

	// generate a group by expression with sum(subquery-test) and sum(inner null indicator) aggreagtes
	CColRefArray *colref_array = GPOS_NEW(mp) CColRefArray(mp);
	CColRef *pcrSubqTest =
		CScalarProjectElement::PopConvert((*(*pexprPrj)[1])[0]->Pop())->Pcr();
	CColRef *pcrInnerNullTest =
		CScalarProjectElement::PopConvert((*(*pexprPrj)[1])[1]->Pop())->Pcr();
	colref_array->Append(pcrSubqTest);
	colref_array->Append(pcrInnerNullTest);
	CExpression *pexprGbAggSum =
		CUtils::PexprGbAggSum(mp, pexprPrj, colref_array);
	colref_array->Release();

	// generate helper test expressions
	const CColRef *pcrSum =
		CScalarProjectElement::PopConvert((*(*pexprGbAggSum)[1])[0]->Pop())
			->Pcr();
	const CColRef *pcrSumNulls =
		CScalarProjectElement::PopConvert((*(*pexprGbAggSum)[1])[1]->Pop())
			->Pcr();
	CExpression *pexprScalarIdentSum = CUtils::PexprScalarIdent(mp, pcrSum);
	CExpression *pexprScalarIdentSumNulls =
		CUtils::PexprScalarIdent(mp, pcrSumNulls);

	CExpression *pexprSumTest = CUtils::PexprCmpWithZero(
		mp, pexprScalarIdentSum,
		CScalarIdent::PopConvert(pexprScalarIdentSum->Pop())->MdidType(),
		IMDType::EcmptEq);
	pexprScalarIdentSum->AddRef();
	CExpression *pexprIsInnerEmpty =
		CUtils::PexprIsNull(mp, pexprScalarIdentSum);
	CExpression *pexprInnerHasNulls = CUtils::PexprCmpWithZero(
		mp, pexprScalarIdentSumNulls,
		CScalarIdent::PopConvert(pexprScalarIdentSumNulls->Pop())->MdidType(),
		IMDType::EcmptG);
	pexprScalarOuter->AddRef();
	CExpression *pexprIsOuterNull = CUtils::PexprIsNull(mp, pexprScalarOuter);

	// generate the main scalar if that will produce subquery result
	const IMDTypeBool *pmdtypebool = md_accessor->PtMDType<IMDTypeBool>();
	IMDId *pmdidBool = pmdtypebool->MDId();
	pmdidBool->AddRef();
	pmdidBool->AddRef();
	pmdidBool->AddRef();
	pmdidBool->AddRef();
	pexprPrj = GPOS_NEW(mp) CExpression(
		mp, GPOS_NEW(mp) CScalarIf(mp, pmdidBool), pexprIsInnerEmpty,
		CUtils::PexprScalarConstBool(
			mp, true /*value*/),  // if inner is empty, return true
		GPOS_NEW(mp) CExpression(
			mp, GPOS_NEW(mp) CScalarIf(mp, pmdidBool), pexprInnerHasNulls,
			CUtils::PexprScalarConstBool(
				mp, false /*value*/,
				true /*is_null*/),	// if inner produced null values, return null
			GPOS_NEW(mp) CExpression(
				mp, GPOS_NEW(mp) CScalarIf(mp, pmdidBool), pexprIsOuterNull,
				CUtils::PexprScalarConstBool(
					mp, false /*value*/,
					true /*is_null*/),	// if outer value is null, return null
				GPOS_NEW(mp) CExpression(
					mp, GPOS_NEW(mp) CScalarIf(mp, pmdidBool),
					pexprSumTest,  // otherwise, test number of inner values that match outer value
					CUtils::PexprScalarConstBool(mp,
												 true /*value*/),  // no matches
					CUtils::PexprScalarConstBool(
						mp, false /*value*/)  // at least one match
					))));

	CExpression *pexprProjected =
		CUtils::PexprAddProjection(mp, pexprGbAggSum, pexprPrj);

	const CColRef *pcrSubquery =
		CScalarProjectElement::PopConvert((*(*pexprProjected)[1])[0]->Pop())
			->Pcr();
	*ppexprNewSubquery = GPOS_NEW(mp) CExpression(
		mp,
		GPOS_NEW(mp)
			CScalarSubquery(mp, pcrSubquery, false /*fGeneratedByExist*/,
							true /*fGeneratedByQuantified*/),
		pexprProjected);
	*ppexprNewScalar = CUtils::PexprScalarCmp(
		mp, CUtils::PexprScalarIdent(mp, pcrSubquery),
		CUtils::PexprScalarConstBool(mp, true /*value*/), IMDType::EcmptEq);
}


//---------------------------------------------------------------------------
// CXformUtils::PexprSeparateSubqueryPreds
//
// Helper function to separate subquery predicates in a top Select node.
// Transforms a join expression join(<logical children>, <expr with SQ>)
// into select(join(<logical children>, <expr>), <subquery preds>).
// Returns NULL if there are no subqueries in the inner join predicates.
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprSeparateSubqueryPreds(CMemoryPool *mp, CExpression *pexpr)
{
	COperator::EOperatorId op_id = pexpr->Pop()->Eopid();
	GPOS_ASSERT(COperator::EopLogicalInnerJoin == op_id ||
				COperator::EopLogicalNAryJoin == op_id);

	// split scalar expression into a conjunction of predicates with and without
	// subqueries
	const ULONG arity = pexpr->Arity();
	CExpression *pexprScalar = (*pexpr)[arity - 1];
	CLogicalNAryJoin *naryLOJOp =
		CLogicalNAryJoin::PopConvertNAryLOJ(pexpr->Pop());
	CExpression *innerJoinPreds = pexprScalar;
	if (nullptr != naryLOJOp)
	{
		innerJoinPreds = naryLOJOp->GetInnerJoinPreds(pexpr);
	}
	CExpressionArray *pdrgpexprConjuncts =
		CPredicateUtils::PdrgpexprConjuncts(mp, innerJoinPreds);
	CExpressionArray *pdrgpexprSQ = GPOS_NEW(mp) CExpressionArray(mp);
	CExpressionArray *pdrgpexprNonSQ = GPOS_NEW(mp) CExpressionArray(mp);

	const ULONG ulConjuncts = pdrgpexprConjuncts->Size();
	for (ULONG ul = 0; ul < ulConjuncts; ul++)
	{
		CExpression *pexprConj = (*pdrgpexprConjuncts)[ul];
		pexprConj->AddRef();

		if (pexprConj->DeriveHasSubquery())
		{
			pdrgpexprSQ->Append(pexprConj);
		}
		else
		{
			pdrgpexprNonSQ->Append(pexprConj);
		}
	}

	pdrgpexprConjuncts->Release();

	if (0 == pdrgpexprSQ->Size())
	{
		// no subqueries found in inner join predicates, they must be in the LOJ preds
		GPOS_ASSERT(nullptr != naryLOJOp);
		pdrgpexprSQ->Release();
		pdrgpexprNonSQ->Release();

		return nullptr;
	}

	// build children array from logical children
	CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
	for (ULONG ul = 0; ul < arity - 1; ul++)
	{
		CExpression *pexprChild = (*pexpr)[ul];
		pexprChild->AddRef();
		pdrgpexpr->Append(pexprChild);
	}

	// build a new join with the new non-subquery predicates
	COperator *popJoin = nullptr;

	if (nullptr == naryLOJOp)
	{
		if (COperator::EopLogicalInnerJoin == op_id)
		{
			popJoin = GPOS_NEW(mp) CLogicalInnerJoin(mp);
		}
		else
		{
			popJoin = GPOS_NEW(mp) CLogicalNAryJoin(mp);
		}
		pdrgpexpr->Append(
			CPredicateUtils::PexprConjunction(mp, pdrgpexprNonSQ));
	}
	else
	{
		// nary LOJ, make sure to include the indexes assigning children
		// to LOJs and to preserve the CScalarNAryJoinPredList
		ULongPtrArray *childIndexes = naryLOJOp->GetLojChildPredIndexes();

		childIndexes->AddRef();

		popJoin = GPOS_NEW(mp) CLogicalNAryJoin(mp, childIndexes);

		pdrgpexpr->Append(naryLOJOp->ReplaceInnerJoinPredicates(
			mp, pexprScalar,
			CPredicateUtils::PexprConjunction(mp, pdrgpexprNonSQ)));
	}

	CExpression *pexprJoin = GPOS_NEW(mp) CExpression(mp, popJoin, pdrgpexpr);

	// return a Select node with a conjunction of subquery predicates
	return CUtils::PexprLogicalSelect(
		mp, pexprJoin, CPredicateUtils::PexprConjunction(mp, pdrgpexprSQ));
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprInversePred
//
//	@doc:
//		Helper for creating the inverse of the predicate used by
//		subquery ALL
//
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprInversePred(CMemoryPool *mp, CExpression *pexprSubquery)
{
	// get the scalar child of subquery
	CScalarSubqueryAll *popSqAll =
		CScalarSubqueryAll::PopConvert(pexprSubquery->Pop());
	CExpression *pexprScalar = (*pexprSubquery)[1];
	const CColRef *colref = popSqAll->Pcr();
	CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();

	// get mdid and name of the inverse of the comparison operator used by subquery
	IMDId *mdid_op = popSqAll->MdIdOp();
	IMDId *pmdidInverseOp =
		md_accessor->RetrieveScOp(mdid_op)->GetInverseOpMdid();
	const CWStringConst *pstrFirst =
		md_accessor->RetrieveScOp(pmdidInverseOp)->Mdname().GetMDName();

	// generate a predicate for the inversion of the comparison involved in the subquery
	pexprScalar->AddRef();
	pmdidInverseOp->AddRef();

	return CUtils::PexprScalarCmp(mp, pexprScalar, colref, *pstrFirst,
								  pmdidInverseOp);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprNullIndicator
//
//	@doc:
//		Helper for creating a null indicator
//		Creates an expression case when <pexpr> is null then 1 else 0 end
//
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprNullIndicator(CMemoryPool *mp, CExpression *pexpr)
{
	CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();

	CExpression *pexprIsNull = CUtils::PexprIsNull(mp, pexpr);
	const IMDTypeInt4 *pmdtypeint4 = md_accessor->PtMDType<IMDTypeInt4>();
	IMDId *mdid = pmdtypeint4->MDId();
	mdid->AddRef();
	return GPOS_NEW(mp)
		CExpression(mp, GPOS_NEW(mp) CScalarIf(mp, mdid), pexprIsNull,
					CUtils::PexprScalarConstInt4(mp, 1 /*val*/),
					CUtils::PexprScalarConstInt4(mp, 0 /*val*/));
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprLogicalPartitionSelector
//
//	@doc:
// 		Create a logical partition selector for the given table descriptor on top
//		of the given child expression. The partition selection filters use columns
//		from the given column array
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprLogicalPartitionSelector(CMemoryPool *mp,
										   CTableDescriptor *ptabdesc,
										   CColRefArray *colref_array,
										   CExpression *pexprChild)
{
	IMDId *rel_mdid = ptabdesc->MDId();
	rel_mdid->AddRef();

	// create an oid column
	CColumnFactory *col_factory = COptCtxt::PoctxtFromTLS()->Pcf();
	CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
	const IMDTypeOid *pmdtype = md_accessor->PtMDType<IMDTypeOid>();
	CColRef *pcrOid = col_factory->PcrCreate(pmdtype, default_type_modifier);
	CExpressionArray *pdrgpexprFilters =
		PdrgpexprPartEqFilters(mp, ptabdesc, colref_array);

	CLogicalPartitionSelector *popSelector = GPOS_NEW(mp)
		CLogicalPartitionSelector(mp, rel_mdid, pdrgpexprFilters, pcrOid);

	return GPOS_NEW(mp) CExpression(mp, popSelector, pexprChild);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprLogicalDMLOverProject
//
//	@doc:
// 		Create a logical DML on top of a project
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprLogicalDMLOverProject(CMemoryPool *mp,
										CExpression *pexprChild,
										CLogicalDML::EDMLOperator edmlop,
										CTableDescriptor *ptabdesc,
										CColRefArray *colref_array,
										CColRef *pcrCtid, CColRef *pcrSegmentId)
{
	GPOS_ASSERT(CLogicalDML::EdmlInsert == edmlop ||
				CLogicalDML::EdmlDelete == edmlop);
	INT val = CScalarDMLAction::EdmlactionInsert;
	if (CLogicalDML::EdmlDelete == edmlop)
	{
		val = CScalarDMLAction::EdmlactionDelete;
	}

	// new expressions to project
	IMDId *rel_mdid = ptabdesc->MDId();
	CExpression *pexprProject = nullptr;
	CColRef *pcrAction = nullptr;
	CColRef *pcrOid = nullptr;

	if (ptabdesc->IsPartitioned() && CLogicalDML::EdmlDelete == edmlop)
	{
		// generate a PartitionSelector node which generates OIDs, then add a project
		// on top of that to add the action column
		CExpression *pexprSelector = PexprLogicalPartitionSelector(
			mp, ptabdesc, colref_array, pexprChild);
		if (CUtils::FGeneratePartOid(ptabdesc->MDId()))
		{
			pcrOid = CLogicalPartitionSelector::PopConvert(pexprSelector->Pop())
						 ->PcrOid();
		}
		pexprProject = CUtils::PexprAddProjection(
			mp, pexprSelector, CUtils::PexprScalarConstInt4(mp, val));
		CExpression *pexprPrL = (*pexprProject)[1];
		pcrAction = CUtils::PcrFromProjElem((*pexprPrL)[0]);
	}
	else
	{
		CExpressionArray *pdrgpexprProjected =
			GPOS_NEW(mp) CExpressionArray(mp);
		// generate one project node with two new columns: action, oid (based on the traceflag)
		pdrgpexprProjected->Append(CUtils::PexprScalarConstInt4(mp, val));

		BOOL fGeneratePartOid = CUtils::FGeneratePartOid(ptabdesc->MDId());
		if (fGeneratePartOid)
		{
			OID oidTable = CMDIdGPDB::CastMdid(rel_mdid)->Oid();
			pdrgpexprProjected->Append(
				CUtils::PexprScalarConstOid(mp, oidTable));
		}

		pexprProject =
			CUtils::PexprAddProjection(mp, pexprChild, pdrgpexprProjected);
		pdrgpexprProjected->Release();

		CExpression *pexprPrL = (*pexprProject)[1];
		pcrAction = CUtils::PcrFromProjElem((*pexprPrL)[0]);
		if (fGeneratePartOid)
		{
			pcrOid = CUtils::PcrFromProjElem((*pexprPrL)[1]);
		}
	}

	GPOS_ASSERT(nullptr != pcrAction);

	if (FTriggersExist(edmlop, ptabdesc, true /*fBefore*/))
	{
		rel_mdid->AddRef();
		pexprProject = PexprRowTrigger(mp, pexprProject, edmlop, rel_mdid,
									   true /*fBefore*/, colref_array);
	}

	if (CLogicalDML::EdmlInsert == edmlop)
	{
		// add assert for check constraints and nullness checks if needed
		COptimizerConfig *optimizer_config =
			COptCtxt::PoctxtFromTLS()->GetOptimizerConfig();
		if (optimizer_config->GetHint()->FEnforceConstraintsOnDML())
		{
			pexprProject = PexprAssertConstraints(mp, pexprProject, ptabdesc,
												  colref_array);
		}
	}

	CExpression *pexprDML = GPOS_NEW(mp) CExpression(
		mp,
		GPOS_NEW(mp)
			CLogicalDML(mp, edmlop, ptabdesc, colref_array,
						GPOS_NEW(mp) CBitSet(mp) /*pbsModified*/, pcrAction,
						pcrOid, pcrCtid, pcrSegmentId, true),
		pexprProject);

	CExpression *pexprOutput = pexprDML;

	if (FTriggersExist(edmlop, ptabdesc, false /*fBefore*/))
	{
		rel_mdid->AddRef();
		pexprOutput = PexprRowTrigger(mp, pexprOutput, edmlop, rel_mdid,
									  false /*fBefore*/, colref_array);
	}

	return pexprOutput;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FTriggersExist
//
//	@doc:
//		Check whether there are any BEFORE or AFTER row-level triggers on
//		the given table that match the given DML operation
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FTriggersExist(CLogicalDML::EDMLOperator edmlop,
							CTableDescriptor *ptabdesc, BOOL fBefore)
{
	CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
	const IMDRelation *pmdrel = md_accessor->RetrieveRel(ptabdesc->MDId());
	const ULONG ulTriggers = pmdrel->TriggerCount();

	for (ULONG ul = 0; ul < ulTriggers; ul++)
	{
		const IMDTrigger *pmdtrigger =
			md_accessor->RetrieveTrigger(pmdrel->TriggerMDidAt(ul));
		if (!pmdtrigger->IsEnabled() || !pmdtrigger->ExecutesOnRowLevel() ||
			!FTriggerApplies(edmlop, pmdtrigger))
		{
			continue;
		}

		if (pmdtrigger->IsBefore() == fBefore)
		{
			return true;
		}
	}

	return false;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FTriggerApplies
//
//	@doc:
//		Does the given trigger type match the given logical DML type
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FTriggerApplies(CLogicalDML::EDMLOperator edmlop,
							 const IMDTrigger *pmdtrigger)
{
	return ((CLogicalDML::EdmlInsert == edmlop && pmdtrigger->IsInsert()) ||
			(CLogicalDML::EdmlDelete == edmlop && pmdtrigger->IsDelete()) ||
			(CLogicalDML::EdmlUpdate == edmlop && pmdtrigger->IsUpdate()));
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprRowTrigger
//
//	@doc:
//		Construct a trigger expression on top of the given expression
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprRowTrigger(CMemoryPool *mp, CExpression *pexprChild,
							 CLogicalDML::EDMLOperator edmlop, IMDId *rel_mdid,
							 BOOL fBefore, CColRefArray *colref_array)
{
	GPOS_ASSERT(CLogicalDML::EdmlInsert == edmlop ||
				CLogicalDML::EdmlDelete == edmlop);

	colref_array->AddRef();
	if (CLogicalDML::EdmlInsert == edmlop)
	{
		return PexprRowTrigger(mp, pexprChild, edmlop, rel_mdid, fBefore,
							   nullptr /*pdrgpcrOld*/, colref_array);
	}

	return PexprRowTrigger(mp, pexprChild, edmlop, rel_mdid, fBefore,
						   colref_array, nullptr /*pdrgpcrNew*/);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprAssertNotNull
//
//	@doc:
//		Construct an assert on top of the given expression for nullness checks
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprAssertNotNull(CMemoryPool *mp, CExpression *pexprChild,
								CTableDescriptor *ptabdesc,
								CColRefArray *colref_array)
{
	CColumnDescriptorArray *pdrgpcoldesc = ptabdesc->Pdrgpcoldesc();

	const ULONG num_cols = pdrgpcoldesc->Size();
	CColRefSet *pcrsNotNull = pexprChild->DeriveNotNullColumns();

	CExpressionArray *pdrgpexprAssertConstraints =
		GPOS_NEW(mp) CExpressionArray(mp);

	for (ULONG ul = 0; ul < num_cols; ul++)
	{
		CColumnDescriptor *pcoldesc = (*pdrgpcoldesc)[ul];
		if (pcoldesc->IsNullable() || pcoldesc->IsSystemColumn())
		{
			// target column is nullable or it's a system column: no need to check for NULL
			continue;
		}

		CColRef *colref = (*colref_array)[ul];

		if (pcrsNotNull->FMember(colref))
		{
			// source column not nullable: no need to check for NULL
			continue;
		}

		// add not null check for current column
		CExpression *pexprNotNull =
			CUtils::PexprIsNotNull(mp, CUtils::PexprScalarIdent(mp, colref));

		CWStringConst *pstrErrorMsg =
			PstrErrorMessage(mp, gpos::CException::ExmaSQL,
							 gpos::CException::ExmiSQLNotNullViolation,
							 pcoldesc->Name().Pstr()->GetBuffer(),
							 ptabdesc->Name().Pstr()->GetBuffer());

		CExpression *pexprAssertConstraint = GPOS_NEW(mp) CExpression(
			mp, GPOS_NEW(mp) CScalarAssertConstraint(mp, pstrErrorMsg),
			pexprNotNull);

		pdrgpexprAssertConstraints->Append(pexprAssertConstraint);
	}

	if (0 == pdrgpexprAssertConstraints->Size())
	{
		pdrgpexprAssertConstraints->Release();
		return pexprChild;
	}

	CExpression *pexprAssertPredicate = GPOS_NEW(mp)
		CExpression(mp, GPOS_NEW(mp) CScalarAssertConstraintList(mp),
					pdrgpexprAssertConstraints);

	CLogicalAssert *popAssert = GPOS_NEW(mp) CLogicalAssert(
		mp, GPOS_NEW(mp) CException(gpos::CException::ExmaSQL,
									gpos::CException::ExmiSQLNotNullViolation));

	return GPOS_NEW(mp)
		CExpression(mp, popAssert, pexprChild, pexprAssertPredicate);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprRowTrigger
//
//	@doc:
//		Construct a trigger expression on top of the given expression
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprRowTrigger(CMemoryPool *mp, CExpression *pexprChild,
							 CLogicalDML::EDMLOperator edmlop, IMDId *rel_mdid,
							 BOOL fBefore, CColRefArray *pdrgpcrOld,
							 CColRefArray *pdrgpcrNew)
{
	INT type = GPMD_TRIGGER_ROW;
	if (fBefore)
	{
		type |= GPMD_TRIGGER_BEFORE;
	}

	switch (edmlop)
	{
		case CLogicalDML::EdmlInsert:
			type |= GPMD_TRIGGER_INSERT;
			break;
		case CLogicalDML::EdmlDelete:
			type |= GPMD_TRIGGER_DELETE;
			break;
		case CLogicalDML::EdmlUpdate:
			type |= GPMD_TRIGGER_UPDATE;
			break;
		default:
			GPOS_ASSERT(!"Invalid DML operation");
	}

	return GPOS_NEW(mp) CExpression(
		mp,
		GPOS_NEW(mp)
			CLogicalRowTrigger(mp, rel_mdid, type, pdrgpcrOld, pdrgpcrNew),
		pexprChild);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PdrgpexprPartFilters
//
//	@doc:
// 		Return partition filter expressions for a DML operation given a table
//		descriptor and the column references seen by this DML
//
//---------------------------------------------------------------------------
CExpressionArray *
CXformUtils::PdrgpexprPartEqFilters(CMemoryPool *mp, CTableDescriptor *ptabdesc,
									CColRefArray *pdrgpcrSource)
{
	GPOS_ASSERT(nullptr != ptabdesc);
	GPOS_ASSERT(nullptr != pdrgpcrSource);

	const ULongPtrArray *pdrgpulPart = ptabdesc->PdrgpulPart();

	CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);

	const ULONG ulPartKeys = pdrgpulPart->Size();
	GPOS_ASSERT(0 < ulPartKeys);
	GPOS_ASSERT(pdrgpcrSource->Size() >= ulPartKeys);

	for (ULONG ul = 0; ul < ulPartKeys; ul++)
	{
		ULONG *pulPartKey = (*pdrgpulPart)[ul];
		CColRef *colref = (*pdrgpcrSource)[*pulPartKey];
		pdrgpexpr->Append(CUtils::PexprScalarIdent(mp, colref));
	}

	return pdrgpexpr;
}

//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::PexprAssertConstraints
//
//      @doc:
//          Construct an assert on top of the given expression for check constraints
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprAssertConstraints(CMemoryPool *mp, CExpression *pexprChild,
									CTableDescriptor *ptabdesc,
									CColRefArray *colref_array)
{
	CExpression *pexprAssertNotNull =
		PexprAssertNotNull(mp, pexprChild, ptabdesc, colref_array);

	return PexprAssertCheckConstraints(mp, pexprAssertNotNull, ptabdesc,
									   colref_array);
}

//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::PexprAssertCheckConstraints
//
//      @doc:
//          Construct an assert on top of the given expression for check constraints
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprAssertCheckConstraints(CMemoryPool *mp,
										 CExpression *pexprChild,
										 CTableDescriptor *ptabdesc,
										 CColRefArray *colref_array)
{
	CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
	const IMDRelation *pmdrel = md_accessor->RetrieveRel(ptabdesc->MDId());

	const ULONG ulCheckConstraint = pmdrel->CheckConstraintCount();
	if (0 < ulCheckConstraint)
	{
		CExpressionArray *pdrgpexprAssertConstraints =
			GPOS_NEW(mp) CExpressionArray(mp);

		for (ULONG ul = 0; ul < ulCheckConstraint; ul++)
		{
			IMDId *pmdidCheckConstraint = pmdrel->CheckConstraintMDidAt(ul);
			const IMDCheckConstraint *pmdCheckConstraint =
				md_accessor->RetrieveCheckConstraints(pmdidCheckConstraint);

			// extract the check constraint expression
			CExpression *pexprCheckConstraint =
				pmdCheckConstraint->GetCheckConstraintExpr(mp, md_accessor,
														   colref_array);

			// A table check constraint is satisfied if and only if the specified <search condition>
			// evaluates to True or Unknown for every row of the table to which it applies.
			// Add an "is not false" expression on top to handle such scenarios
			CExpression *pexprIsNotFalse =
				CUtils::PexprIsNotFalse(mp, pexprCheckConstraint);
			CWStringConst *pstrErrMsg = PstrErrorMessage(
				mp, gpos::CException::ExmaSQL,
				gpos::CException::ExmiSQLCheckConstraintViolation,
				pmdCheckConstraint->Mdname().GetMDName()->GetBuffer(),
				ptabdesc->Name().Pstr()->GetBuffer());
			CExpression *pexprAssertConstraint = GPOS_NEW(mp) CExpression(
				mp, GPOS_NEW(mp) CScalarAssertConstraint(mp, pstrErrMsg),
				pexprIsNotFalse);

			pdrgpexprAssertConstraints->Append(pexprAssertConstraint);
		}

		CExpression *pexprAssertPredicate = GPOS_NEW(mp)
			CExpression(mp, GPOS_NEW(mp) CScalarAssertConstraintList(mp),
						pdrgpexprAssertConstraints);

		CLogicalAssert *popAssert = GPOS_NEW(mp) CLogicalAssert(
			mp, GPOS_NEW(mp) CException(
					gpos::CException::ExmaSQL,
					gpos::CException::ExmiSQLCheckConstraintViolation));


		return GPOS_NEW(mp)
			CExpression(mp, popAssert, pexprChild, pexprAssertPredicate);
	}

	return pexprChild;
}


//---------------------------------------------------------------------------
//   @function:
//		CXformUtils::FSplitAggXform
//
//   @doc:
//      Check if given xform is an Agg splitting xform
//
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FSplitAggXform(CXform::EXformId exfid)
{
	return CXform::ExfSplitGbAgg == exfid || CXform::ExfSplitDQA == exfid ||
		   CXform::ExfSplitGbAggDedup == exfid || CXform::ExfEagerAgg == exfid;
}

BOOL
CXformUtils::FAggGenBySplitDQAXform(CExpression *pexprAgg)
{
	CGroupExpression *pgexprOrigin = pexprAgg->Pgexpr();
	if (nullptr != pgexprOrigin)
	{
		return CXform::ExfSplitDQA == pgexprOrigin->ExfidOrigin();
	}

	return false;
}

// Check if given expression is a multi-stage Agg based on agg type
// or origin xform
BOOL
CXformUtils::FMultiStageAgg(CExpression *pexprAgg)
{
	GPOS_ASSERT(nullptr != pexprAgg);
	GPOS_ASSERT(COperator::EopLogicalGbAgg == pexprAgg->Pop()->Eopid() ||
				COperator::EopLogicalGbAggDeduplicate ==
					pexprAgg->Pop()->Eopid());

	CLogicalGbAgg *popAgg = CLogicalGbAgg::PopConvert(pexprAgg->Pop());
	if (COperator::EgbaggtypeGlobal != popAgg->Egbaggtype())
	{
		// a non-global agg is a multi-stage agg
		return true;
	}

	// check xform lineage
	BOOL fMultiStage = false;
	CGroupExpression *pgexprOrigin = pexprAgg->Pgexpr();
	while (nullptr != pgexprOrigin && !fMultiStage)
	{
		fMultiStage = FSplitAggXform(pgexprOrigin->ExfidOrigin());
		pgexprOrigin = pgexprOrigin->PgexprOrigin();
	}

	return fMultiStage;
}

BOOL
CXformUtils::FLocalAggCreatedByEagerAggXform(CExpression *pexprAgg)
{
	GPOS_ASSERT(nullptr != pexprAgg);
	GPOS_ASSERT(COperator::EopLogicalGbAgg == pexprAgg->Pop()->Eopid() ||
				COperator::EopLogicalGbAggDeduplicate ==
					pexprAgg->Pop()->Eopid());

	CLogicalGbAgg *popAgg = CLogicalGbAgg::PopConvert(pexprAgg->Pop());
	if (COperator::EgbaggtypeLocal != popAgg->Egbaggtype())
	{
		return false;
	}

	CGroupExpression *pgexprOrigin = pexprAgg->Pgexpr();
	// check xform lineage
	BOOL is_eager_agg = false;
	while (nullptr != pgexprOrigin && !is_eager_agg)
	{
		// parse all expressions in group to check if any was created by CXformEagerAgg
		is_eager_agg = CXform::ExfEagerAgg == pgexprOrigin->ExfidOrigin();
		pgexprOrigin = pgexprOrigin->PgexprOrigin();
	}

	return is_eager_agg;
}



//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::FXformInArray
//
//      @doc:
//          Check if given xform id is in the given array of xforms
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FXformInArray(CXform::EXformId exfid,
						   const CXform::EXformId rgXforms[], ULONG ulXforms)
{
	for (ULONG ul = 0; ul < ulXforms; ul++)
	{
		if (rgXforms[ul] == exfid)
		{
			return true;
		}
	}

	return false;
}

//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::FDeriveStatsBeforeXform
//
//      @doc:
//          Return true if stats derivation is needed for this xform
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FDeriveStatsBeforeXform(CXform *pxform)
{
	GPOS_ASSERT(nullptr != pxform);

	return pxform->FExploration() &&
		   CXformExploration::Pxformexp(pxform)->FNeedsStats();
}

//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::FSubqueryDecorrelation
//
//      @doc:
//          Check if xform is a subquery decorrelation xform
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FSubqueryDecorrelation(CXform *pxform)
{
	GPOS_ASSERT(nullptr != pxform);

	return pxform->FExploration() &&
		   CXformExploration::Pxformexp(pxform)->FApplyDecorrelating();
}


//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::FSubqueryUnnesting
//
//      @doc:
//          Check if xform is a subquery unnesting xform
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FSubqueryUnnesting(CXform *pxform)
{
	GPOS_ASSERT(nullptr != pxform);

	return pxform->FExploration() &&
		   CXformExploration::Pxformexp(pxform)->FSubqueryUnnesting();
}

//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::FApplyToNextBinding
//
//      @doc:
//         Check if xform should be applied to the next binding
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FApplyToNextBinding(
	CXform *pxform,
	CExpression *pexprLastBinding  // last extracted xform pattern
)
{
	GPOS_ASSERT(nullptr != pxform);

	if (FSubqueryDecorrelation(pxform))
	{
		// if last binding is free from Subquery or Apply operators, we do not
		// need to apply the xform further
		return CUtils::FHasSubqueryOrApply(pexprLastBinding,
										   false /*fCheckRoot*/) ||
			   CUtils::FHasCorrelatedApply(pexprLastBinding,
										   false /*fCheckRoot*/);
	}

	// set of transformations that should be applied once
	CXform::EXformId rgXforms[] = {
		CXform::ExfJoinAssociativity,
		CXform::ExfExpandFullOuterJoin,
		CXform::ExfUnnestTVF,
		CXform::ExfLeftSemiJoin2CrossProduct,
		CXform::ExfLeftAntiSemiJoin2CrossProduct,
		CXform::ExfLeftAntiSemiJoinNotIn2CrossProduct,
	};

	CXform::EXformId exfid = pxform->Exfid();

	BOOL fApplyOnce = FSubqueryUnnesting(pxform) ||
					  FXformInArray(exfid, rgXforms, GPOS_ARRAY_SIZE(rgXforms));

	return !fApplyOnce;
}

//---------------------------------------------------------------------------
//      @function:
//              CXformUtils::PstrErrorMessage
//
//      @doc:
//          Compute an error message for given exception type
//
//---------------------------------------------------------------------------
CWStringConst *
CXformUtils::PstrErrorMessage(CMemoryPool *mp, ULONG major, ULONG minor, ...)
{
	WCHAR wsz[1024];
	CWStringStatic str(wsz, 1024);

	// manufacture actual exception object
	CException exc(major, minor);

	// during bootstrap there's no context object otherwise, record
	// all details in the context object
	if (nullptr != ITask::Self())
	{
		VA_LIST valist;
		VA_START(valist, minor);

		ELocale eloc = ITask::Self()->Locale();
		CMessage *pmsg =
			CMessageRepository::GetMessageRepository()->LookupMessage(exc,
																	  eloc);
		pmsg->Format(&str, valist);

		VA_END(valist);
	}

	return GPOS_NEW(mp) CWStringConst(mp, str.GetBuffer());
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PdrgpcrIndexKeys
//
//	@doc:
//		Return the array of columns from the given array of columns which appear
//		in the index key columns
//
//---------------------------------------------------------------------------
CColRefArray *
CXformUtils::PdrgpcrIndexKeys(CMemoryPool *mp, CColRefArray *colref_array,
							  const IMDIndex *pmdindex,
							  const IMDRelation *pmdrel)
{
	return PdrgpcrIndexColumns(mp, colref_array, pmdindex, pmdrel, EicKey);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PcrsIndexKeys
//
//	@doc:
//		Return the set of columns from the given array of columns which appear
//		in the index key columns
//
//---------------------------------------------------------------------------
CColRefSet *
CXformUtils::PcrsIndexKeys(CMemoryPool *mp, CColRefArray *colref_array,
						   const IMDIndex *pmdindex, const IMDRelation *pmdrel)
{
	return PcrsIndexColumns(mp, colref_array, pmdindex, pmdrel, EicKey);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PcrsIndexIncludedCols
//
//	@doc:
//		Return the set of columns from the given array of columns which appear
//		in the index included columns
//
//---------------------------------------------------------------------------
CColRefSet *
CXformUtils::PcrsIndexIncludedCols(CMemoryPool *mp, CColRefArray *colref_array,
								   const IMDIndex *pmdindex,
								   const IMDRelation *pmdrel)
{
	return PcrsIndexColumns(mp, colref_array, pmdindex, pmdrel, EicIncluded);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PcrsIndexColumns
//
//	@doc:
//		Return the set of columns from the given array of columns which appear
//		in the index columns of the specified type (included / key)
//
//---------------------------------------------------------------------------
CColRefSet *
CXformUtils::PcrsIndexColumns(CMemoryPool *mp, CColRefArray *colref_array,
							  const IMDIndex *pmdindex,
							  const IMDRelation *pmdrel, EIndexCols eic)
{
	GPOS_ASSERT(EicKey == eic || EicIncluded == eic);
	CColRefArray *pdrgpcrIndexColumns =
		PdrgpcrIndexColumns(mp, colref_array, pmdindex, pmdrel, eic);
	CColRefSet *pcrsCols = GPOS_NEW(mp) CColRefSet(mp, pdrgpcrIndexColumns);

	pdrgpcrIndexColumns->Release();

	return pcrsCols;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PdrgpcrIndexColumns
//
//	@doc:
//		Return the ordered list of columns from the given array of columns which
//		appear in the index columns of the specified type (included / key)
//
//---------------------------------------------------------------------------
CColRefArray *
CXformUtils::PdrgpcrIndexColumns(CMemoryPool *mp, CColRefArray *colref_array,
								 const IMDIndex *pmdindex,
								 const IMDRelation *pmdrel, EIndexCols eic)
{
	GPOS_ASSERT(EicKey == eic || EicIncluded == eic);

	CColRefArray *pdrgpcrIndex = GPOS_NEW(mp) CColRefArray(mp);

	ULONG length = pmdindex->Keys();
	if (EicIncluded == eic)
	{
		length = pmdindex->IncludedCols();
	}

	for (ULONG ul = 0; ul < length; ul++)
	{
		ULONG ulPos = gpos::ulong_max;
		if (EicIncluded == eic)
		{
			ulPos = pmdindex->IncludedColAt(ul);
		}
		else
		{
			ulPos = pmdindex->KeyAt(ul);
		}
		ULONG ulPosNonDropped = pmdrel->NonDroppedColAt(ulPos);

		GPOS_ASSERT(gpos::ulong_max != ulPosNonDropped);
		GPOS_ASSERT(ulPosNonDropped < colref_array->Size());

		CColRef *colref = (*colref_array)[ulPosNonDropped];
		pdrgpcrIndex->Append(colref);
	}

	return pdrgpcrIndex;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FIndexApplicable
//
//	@doc:
//		Check if an index is applicable given the required, output and scalar
//		expression columns
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FIndexApplicable(CMemoryPool *mp, const IMDIndex *pmdindex,
							  const IMDRelation *pmdrel,
							  CColRefArray *pdrgpcrOutput, CColRefSet *pcrsReqd,
							  CColRefSet *pcrsScalar,
							  IMDIndex::EmdindexType emdindtype,
							  IMDIndex::EmdindexType altindtype)
{
	BOOL possible_ao_table = pmdrel->IsAORowOrColTable() ||
							 pmdrel->RetrieveRelStorageType() ==
								 IMDRelation::ErelstorageMixedPartitioned;
	// GiST can match with either Btree or Bitmap indexes
	if (pmdindex->IndexType() == IMDIndex::EmdindGist ||
		// GIN and BRIN can only match with Bitmap Indexes
		(emdindtype == IMDIndex::EmdindBitmap &&
		 (IMDIndex::EmdindGin == pmdindex->IndexType() ||
		  IMDIndex::EmdindBrin == pmdindex->IndexType())))
	{
		// continue
	}
	else if (emdindtype == IMDIndex::EmdindBitmap &&
			 pmdindex->IndexType() == IMDIndex::EmdindBtree &&
			 possible_ao_table)
	{
		// continue, Btree indexes on AO tables can be treated as Bitmap tables
	}
	else if (
		(emdindtype != pmdindex->IndexType() &&
		 altindtype !=
			 pmdindex
				 ->IndexType()) ||	// otherwise make sure the index matches the given type(s)
		0 == pcrsScalar->Size() ||	// no columns to match index against
		(emdindtype != IMDIndex::EmdindBitmap &&
		 possible_ao_table))  // only bitmap scans are supported on AO tables
	{
		return false;
	}

	BOOL fApplicable = true;

	CColRefSet *pcrsIncludedCols =
		CXformUtils::PcrsIndexIncludedCols(mp, pdrgpcrOutput, pmdindex, pmdrel);
	CColRefSet *pcrsIndexCols =
		CXformUtils::PcrsIndexKeys(mp, pdrgpcrOutput, pmdindex, pmdrel);
	if (!pcrsIncludedCols->ContainsAll(pcrsReqd) ||	 // index is not covering
		pcrsScalar->IsDisjoint(
			pcrsIndexCols))	 // indexing columns disjoint from the columns used in the scalar expression
	{
		fApplicable = false;
	}

	// clean up
	pcrsIncludedCols->Release();
	pcrsIndexCols->Release();

	return fApplicable;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprRowNumber
//
//	@doc:
//		Create an expression with "row_number" window function
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprRowNumber(CMemoryPool *mp)
{
	OID row_number_oid = COptCtxt::PoctxtFromTLS()
							 ->GetOptimizerConfig()
							 ->GetWindowOids()
							 ->OidRowNumber();

	CScalarWindowFunc *popRowNumber = GPOS_NEW(mp) CScalarWindowFunc(
		mp, GPOS_NEW(mp) CMDIdGPDB(row_number_oid),
		GPOS_NEW(mp) CMDIdGPDB(GPDB_INT8_OID),
		GPOS_NEW(mp) CWStringConst(mp, GPOS_WSZ_LIT("row_number")),
		CScalarWindowFunc::EwsImmediate, false /* is_distinct */,
		false /* is_star_arg */, false /* is_simple_agg */
	);

	CExpression *pexprScRowNumber = GPOS_NEW(mp) CExpression(mp, popRowNumber);

	return pexprScRowNumber;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprWindowWithRowNumber
//
//	@doc:
//		Create a sequence project (window) expression with a row_number
//		window function and partitioned by the given array of columns references
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprWindowWithRowNumber(CMemoryPool *mp,
									  CExpression *pexprWindowChild,
									  CColRefArray *pdrgpcrInput)
{
	// partitioning information
	CDistributionSpec *pds = nullptr;
	if (nullptr != pdrgpcrInput)
	{
		CExpressionArray *pdrgpexprInput =
			CUtils::PdrgpexprScalarIdents(mp, pdrgpcrInput);
		pds = GPOS_NEW(mp) CDistributionSpecHashed(pdrgpexprInput,
												   true /* fNullsCollocated */);
	}
	else
	{
		pds = GPOS_NEW(mp)
			CDistributionSpecSingleton(CDistributionSpecSingleton::EstMaster);
	}

	// window frames
	CWindowFrameArray *pdrgpwf = GPOS_NEW(mp) CWindowFrameArray(mp);

	// ordering information
	COrderSpecArray *pdrgpos = GPOS_NEW(mp) COrderSpecArray(mp);

	// row_number window function project list
	CExpression *pexprScWindowFunc = PexprRowNumber(mp);

	// generate a new column reference
	CScalarWindowFunc *popScWindowFunc =
		CScalarWindowFunc::PopConvert(pexprScWindowFunc->Pop());
	const IMDType *pmdtype = COptCtxt::PoctxtFromTLS()->Pmda()->RetrieveType(
		popScWindowFunc->MdidType());
	CName name(popScWindowFunc->PstrFunc());
	CColRef *colref = COptCtxt::PoctxtFromTLS()->Pcf()->PcrCreate(
		pmdtype, popScWindowFunc->TypeModifier(), name);

	// new project element
	CScalarProjectElement *popScPrEl =
		GPOS_NEW(mp) CScalarProjectElement(mp, colref);

	// generate a project element
	CExpression *pexprProjElem =
		GPOS_NEW(mp) CExpression(mp, popScPrEl, pexprScWindowFunc);

	// generate the project list
	CExpression *pexprProjList = GPOS_NEW(mp)
		CExpression(mp, GPOS_NEW(mp) CScalarProjectList(mp), pexprProjElem);

	CLogicalSequenceProject *popLgSequence =
		GPOS_NEW(mp) CLogicalSequenceProject(mp, pds, pdrgpos, pdrgpwf);

	pexprWindowChild->AddRef();
	CExpression *pexprLgSequence = GPOS_NEW(mp)
		CExpression(mp, popLgSequence, pexprWindowChild, pexprProjList);

	return pexprLgSequence;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprAssertOneRow
//
//	@doc:
//		Generate a logical Assert expression that errors out when more than
//		one row is generated
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprAssertOneRow(CMemoryPool *mp, CExpression *pexprChild)
{
	GPOS_ASSERT(nullptr != pexprChild);
	GPOS_ASSERT(pexprChild->Pop()->FLogical());

	CExpression *pexprSeqPrj =
		PexprWindowWithRowNumber(mp, pexprChild, nullptr /*pdrgpcrInput*/);
	CColRef *pcrRowNumber =
		CScalarProjectElement::PopConvert((*(*pexprSeqPrj)[1])[0]->Pop())
			->Pcr();
	CExpression *pexprCmp = CUtils::PexprScalarEqCmp(
		mp, pcrRowNumber, CUtils::PexprScalarConstInt4(mp, 1 /*value*/));

	CWStringConst *pstrErrorMsg = PstrErrorMessage(
		mp, gpos::CException::ExmaSQL, gpos::CException::ExmiSQLMaxOneRow);
	CExpression *pexprAssertConstraint = GPOS_NEW(mp) CExpression(
		mp, GPOS_NEW(mp) CScalarAssertConstraint(mp, pstrErrorMsg), pexprCmp);

	CExpression *pexprAssertPredicate = GPOS_NEW(mp)
		CExpression(mp, GPOS_NEW(mp) CScalarAssertConstraintList(mp),
					pexprAssertConstraint);

	CLogicalAssert *popAssert = GPOS_NEW(mp) CLogicalAssert(
		mp, GPOS_NEW(mp) CException(gpos::CException::ExmaSQL,
									gpos::CException::ExmiSQLMaxOneRow));

	return GPOS_NEW(mp)
		CExpression(mp, popAssert, pexprSeqPrj, pexprAssertPredicate);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PcrProjectElement
//
//	@doc:
//		Return the colref of the n-th project element
//---------------------------------------------------------------------------
CColRef *
CXformUtils::PcrProjectElement(CExpression *pexpr, ULONG ulIdxProjElement)
{
	CExpression *pexprProjList = (*pexpr)[1];
	GPOS_ASSERT(COperator::EopScalarProjectList ==
				pexprProjList->Pop()->Eopid());

	CExpression *pexprProjElement = (*pexprProjList)[ulIdxProjElement];
	GPOS_ASSERT(nullptr != pexprProjElement);

	return CScalarProjectElement::PopConvert(pexprProjElement->Pop())->Pcr();
}



// Lookup join keys in scalar child group
void
CXformUtils::LookupJoinKeys(CMemoryPool *mp, CExpression *pexpr,
							CExpressionArray **ppdrgpexprOuter,
							CExpressionArray **ppdrgpexprInner,
							IMdIdArray **join_opfamilies)
{
	GPOS_ASSERT(nullptr != ppdrgpexprOuter);
	GPOS_ASSERT(nullptr != ppdrgpexprInner);

	*ppdrgpexprOuter = nullptr;
	*ppdrgpexprInner = nullptr;
	*join_opfamilies = nullptr;

	CGroupExpression *pgexprScalarOrigin = (*pexpr)[2]->Pgexpr();
	if (nullptr == pgexprScalarOrigin)
	{
		return;
	}

	CColRefSet *pcrsOuterOutput = (*pexpr)[0]->DeriveOutputColumns();
	CColRefSet *pcrsInnerOutput = (*pexpr)[1]->DeriveOutputColumns();

	CGroup *pgroupScalar = pgexprScalarOrigin->Pgroup();
	if (nullptr == pgroupScalar->PdrgpexprJoinKeysOuter())
	{
		// hash join keys not found
		return;
	}

	GPOS_ASSERT(nullptr != pgroupScalar->PdrgpexprJoinKeysInner());

	if (IMdIdArray *opfamilies = pgroupScalar->JoinOpfamilies())
	{
		opfamilies->AddRef();
		*join_opfamilies = opfamilies;
	}

	// extract used columns by hash join keys
	CColRefSet *pcrsUsedOuter =
		CUtils::PcrsExtractColumns(mp, pgroupScalar->PdrgpexprJoinKeysOuter());
	CColRefSet *pcrsUsedInner =
		CUtils::PcrsExtractColumns(mp, pgroupScalar->PdrgpexprJoinKeysInner());

	BOOL fOuterKeysUsesOuterChild = pcrsOuterOutput->ContainsAll(pcrsUsedOuter);
	BOOL fInnerKeysUsesInnerChild = pcrsInnerOutput->ContainsAll(pcrsUsedInner);
	BOOL fInnerKeysUsesOuterChild = pcrsOuterOutput->ContainsAll(pcrsUsedInner);
	BOOL fOuterKeysUsesInnerChild = pcrsInnerOutput->ContainsAll(pcrsUsedOuter);

	if ((fOuterKeysUsesOuterChild && fInnerKeysUsesInnerChild) ||
		(fInnerKeysUsesOuterChild && fOuterKeysUsesInnerChild))
	{
		CGroupProxy gp(pgroupScalar);

		pgroupScalar->PdrgpexprJoinKeysOuter()->AddRef();
		pgroupScalar->PdrgpexprJoinKeysInner()->AddRef();

		// align hash join keys with join child
		if (fOuterKeysUsesOuterChild && fInnerKeysUsesInnerChild)
		{
			*ppdrgpexprOuter = pgroupScalar->PdrgpexprJoinKeysOuter();
			*ppdrgpexprInner = pgroupScalar->PdrgpexprJoinKeysInner();
		}
		else
		{
			GPOS_ASSERT(fInnerKeysUsesOuterChild && fOuterKeysUsesInnerChild);

			*ppdrgpexprOuter = pgroupScalar->PdrgpexprJoinKeysInner();
			*ppdrgpexprInner = pgroupScalar->PdrgpexprJoinKeysOuter();
		}
	}

	pcrsUsedOuter->Release();
	pcrsUsedInner->Release();
}


// Cache join keys on scalar child group
void
CXformUtils::CacheJoinKeys(CExpression *pexpr, CExpressionArray *pdrgpexprOuter,
						   CExpressionArray *pdrgpexprInner,
						   IMdIdArray *join_opfamilies)
{
	GPOS_ASSERT(nullptr != pdrgpexprOuter);
	GPOS_ASSERT(nullptr != pdrgpexprInner);

	CGroupExpression *pgexprScalarOrigin = (*pexpr)[2]->Pgexpr();
	if (nullptr != pgexprScalarOrigin)
	{
		CGroup *pgroupScalar = pgexprScalarOrigin->Pgroup();

		{  // scope of group proxy
			CGroupProxy gp(pgroupScalar);
			gp.SetJoinKeys(pdrgpexprOuter, pdrgpexprInner, join_opfamilies);
		}
	}
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::AddCTEProducer
//
//	@doc:
//		Helper to create a CTE producer expression and add it to global
//		CTE info structure
//		Does not take ownership of pexpr
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprAddCTEProducer(CMemoryPool *mp, ULONG ulCTEId,
								 CColRefArray *colref_array, CExpression *pexpr)
{
	CColRefArray *pdrgpcrProd = CUtils::PdrgpcrCopy(mp, colref_array);
	UlongToColRefMap *colref_mapping =
		CUtils::PhmulcrMapping(mp, colref_array, pdrgpcrProd);
	CExpression *pexprRemapped = pexpr->PexprCopyWithRemappedColumns(
		mp, colref_mapping, true /*must_exist*/);
	colref_mapping->Release();

	CExpression *pexprProducer = GPOS_NEW(mp) CExpression(
		mp, GPOS_NEW(mp) CLogicalCTEProducer(mp, ulCTEId, pdrgpcrProd),
		pexprRemapped);

	CCTEInfo *pcteinfo = COptCtxt::PoctxtFromTLS()->Pcteinfo();
	pcteinfo->AddCTEProducer(pexprProducer);
	pexprProducer->Release();

	return pcteinfo->PexprCTEProducer(ulCTEId);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FProcessGPDBAntiSemiHashJoin
//
//	@doc:
//		Helper to extract equality from an expression tree of the form
//		OP
//		 |--(=)
//		 |	 |-- expr1
//		 |	 +-- expr2
//		 +--exprOther
//
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FExtractEquality(
	CExpression *pexpr,
	CExpression **
		ppexprEquality,	 // output: extracted equality expression, set to NULL if extraction failed
	CExpression **
		ppexprOther	 // output: sibling of equality expression, set to NULL if extraction failed
)
{
	GPOS_ASSERT(2 == pexpr->Arity());

	*ppexprEquality = nullptr;
	*ppexprOther = nullptr;

	CExpression *pexprLeft = (*pexpr)[0];
	CExpression *pexprRight = (*pexpr)[1];
	BOOL fEqualityOnLeft = CPredicateUtils::IsEqualityOp(pexprLeft);
	BOOL fEqualityOnRight = CPredicateUtils::IsEqualityOp(pexprRight);
	if (fEqualityOnLeft || fEqualityOnRight)
	{
		*ppexprEquality = pexprLeft;
		*ppexprOther = pexprRight;
		if (fEqualityOnRight)
		{
			*ppexprEquality = pexprRight;
			*ppexprOther = pexprLeft;
		}

		return true;
	}

	return false;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FProcessGPDBAntiSemiHashJoin
//
//	@doc:
//		GPDB hash join return no results if the inner side of anti-semi-join
//		produces null values, this allows simplifying join predicates of the
//		form (equality_expr IS DISTINCT FROM false) to (equality_expr) since
//		GPDB hash join operator guarantees no join results to be returned in
//		this case
//
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FProcessGPDBAntiSemiHashJoin(
	CMemoryPool *mp, CExpression *pexpr,
	CExpression **
		ppexprResult  // output: result expression, set to NULL if processing failed
)
{
	GPOS_ASSERT(nullptr != ppexprResult);
	GPOS_ASSERT(
		COperator::EopLogicalLeftAntiSemiJoin == pexpr->Pop()->Eopid() ||
		COperator::EopLogicalLeftAntiSemiJoinNotIn == pexpr->Pop()->Eopid());

	*ppexprResult = nullptr;
	CExpression *pexprOuter = (*pexpr)[0];
	CExpression *pexprInner = (*pexpr)[1];
	CExpression *pexprScalar = (*pexpr)[2];

	CExpressionArray *pdrgpexpr =
		CPredicateUtils::PdrgpexprConjuncts(mp, pexprScalar);
	CExpressionArray *pdrgpexprNew = GPOS_NEW(mp) CExpressionArray(mp);
	const ULONG ulPreds = pdrgpexpr->Size();
	BOOL fSimplifiedPredicate = false;
	for (ULONG ul = 0; ul < ulPreds; ul++)
	{
		CExpression *pexprPred = (*pdrgpexpr)[ul];
		if (CPredicateUtils::FIDFFalse(pexprPred))
		{
			CExpression *pexprEquality = nullptr;
			CExpression *pexprFalse = nullptr;
			if (FExtractEquality(
					pexprPred, &pexprEquality,
					&pexprFalse) &&	 // extracted equality expression
				IMDId::EmdidGPDB ==
					CScalarConst::PopConvert(pexprFalse->Pop())
						->GetDatum()
						->MDId()
						->MdidType() &&	 // underlying system is GPDB
				CPhysicalJoin::FHashJoinCompatible(
					pexprEquality, pexprOuter,
					pexprInner) &&	// equality is hash-join compatible
				!CUtils::FUsesNullableCol(
					mp, pexprEquality,
					pexprInner) &&	// equality uses an inner NOT NULL column
				!CUtils::FUsesNullableCol(
					mp, pexprEquality,
					pexprOuter))  // equality uses an outer NOT NULL column
			{
				pexprEquality->AddRef();
				pdrgpexprNew->Append(pexprEquality);
				fSimplifiedPredicate = true;
				continue;
			}
		}
		pexprPred->AddRef();
		pdrgpexprNew->Append(pexprPred);
	}

	pdrgpexpr->Release();
	if (!fSimplifiedPredicate)
	{
		pdrgpexprNew->Release();
		return false;
	}

	pexprOuter->AddRef();
	pexprInner->AddRef();
	pexpr->Pop()->AddRef();
	*ppexprResult = GPOS_NEW(mp)
		CExpression(mp, pexpr->Pop(), pexprOuter, pexprInner,
					CPredicateUtils::PexprConjunction(mp, pdrgpexprNew));

	return true;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprBuildIndexPlan
//
//	@doc:
//		Construct an expression representing a new access path using the given functors for
//		operator constructors and rewritten access path.
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprBuildBtreeIndexPlan(
	CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprGet,
	ULONG ulOriginOpId, CExpressionArray *pdrgpexprConds, CColRefSet *pcrsReqd,
	CColRefSet *pcrsScalarExpr, CColRefSet *outer_refs,
	const IMDIndex *pmdindex, const IMDRelation *pmdrel)
{
	GPOS_ASSERT(nullptr != pexprGet);
	GPOS_ASSERT(nullptr != pdrgpexprConds);
	GPOS_ASSERT(nullptr != pcrsReqd);
	GPOS_ASSERT(nullptr != pcrsScalarExpr);
	GPOS_ASSERT(nullptr != pmdindex);
	GPOS_ASSERT(nullptr != pmdrel);

	COperator::EOperatorId op_id = pexprGet->Pop()->Eopid();
	GPOS_ASSERT(CLogical::EopLogicalGet == op_id ||
				CLogical::EopLogicalDynamicGet == op_id);

	BOOL fDynamicGet = (COperator::EopLogicalDynamicGet == op_id);

	CTableDescriptor *ptabdesc = pexprGet->DeriveTableDescriptor();
	GPOS_ASSERT(nullptr != ptabdesc);
	CColRefArray *pdrgpcrOutput = nullptr;
	CWStringConst *alias = nullptr;
	ULONG ulPartIndex = gpos::ulong_max;
	CColRef2dArray *pdrgpdrgpcrPart = nullptr;
	IMdIdArray *partition_mdids = nullptr;

	if (ptabdesc->RetrieveRelStorageType() != IMDRelation::ErelstorageHeap &&
		pmdindex->IndexType() == IMDIndex::EmdindGist)
	{
		// Non-heap tables not supported for GiST
		return nullptr;
	}

	if (fDynamicGet)
	{
		CLogicalDynamicGet *popDynamicGet =
			CLogicalDynamicGet::PopConvert(pexprGet->Pop());

		ulPartIndex = popDynamicGet->ScanId();
		pdrgpcrOutput = popDynamicGet->PdrgpcrOutput();
		GPOS_ASSERT(nullptr != pdrgpcrOutput);
		alias = GPOS_NEW(mp)
			CWStringConst(mp, popDynamicGet->Name().Pstr()->GetBuffer());
		pdrgpdrgpcrPart = popDynamicGet->PdrgpdrgpcrPart();
		partition_mdids = popDynamicGet->GetPartitionMdids();
	}
	else
	{
		CLogicalGet *popGet = CLogicalGet::PopConvert(pexprGet->Pop());
		pdrgpcrOutput = popGet->PdrgpcrOutput();
		GPOS_ASSERT(nullptr != pdrgpcrOutput);
		alias =
			GPOS_NEW(mp) CWStringConst(mp, popGet->Name().Pstr()->GetBuffer());
	}

	if (!FIndexApplicable(mp, pmdindex, pmdrel, pdrgpcrOutput, pcrsReqd,
						  pcrsScalarExpr, IMDIndex::EmdindBtree))
	{
		GPOS_DELETE(alias);

		return nullptr;
	}

	CColRefArray *pdrgppcrIndexCols =
		PdrgpcrIndexKeys(mp, pdrgpcrOutput, pmdindex, pmdrel);
	CExpressionArray *pdrgpexprIndex = GPOS_NEW(mp) CExpressionArray(mp);
	CExpressionArray *pdrgpexprResidual = GPOS_NEW(mp) CExpressionArray(mp);
	CPredicateUtils::ExtractIndexPredicates(
		mp, md_accessor, pdrgpexprConds, pmdindex, pdrgppcrIndexCols,
		pdrgpexprIndex, pdrgpexprResidual, outer_refs);
	CColRefSet *outer_refs_in_index_get =
		CUtils::PcrsExtractColumns(mp, pdrgpexprIndex);
	outer_refs_in_index_get->Intersection(outer_refs);

	// exit early if:
	// (1) there are no index-able predicates or
	// (2) there are no outer references in index-able predicates
	//
	// (2) is valid only for Join2IndexApply xform wherein the index-get
	// expression must include outer references for it to be an alternative
	// worth considering. Otherwise it has the same effect as a regular NLJ
	// with an index lookup.
	if (0 == pdrgpexprIndex->Size() || outer_refs_in_index_get->Size() == 0)
	{
		// clean up
		GPOS_DELETE(alias);
		pdrgppcrIndexCols->Release();
		pdrgpexprResidual->Release();
		pdrgpexprIndex->Release();
		outer_refs_in_index_get->Release();

		return nullptr;
	}

	// most GiST indexes are lossy, so conservatively re-add all the index quals to the residual so that they can be rechecked
	if (pmdindex->IndexType() == IMDIndex::EmdindGist)
	{
		for (ULONG ul = 0; ul < pdrgpexprIndex->Size(); ul++)
		{
			CExpression *pexprPred = (*pdrgpexprIndex)[ul];
			pexprPred->AddRef();
			pdrgpexprResidual->Append(pexprPred);
		}
	}
	else
	{
		GPOS_ASSERT(pdrgpexprConds->Size() ==
					pdrgpexprResidual->Size() + pdrgpexprIndex->Size());
	}

	ptabdesc->AddRef();
	pdrgpcrOutput->AddRef();
	// create the logical (dynamic) bitmap table get operator
	CLogical *popLogicalGet = nullptr;

	if (fDynamicGet)
	{
		pdrgpdrgpcrPart->AddRef();
		partition_mdids->AddRef();
		popLogicalGet = PopDynamicBtreeIndexOpConstructor(
			mp, pmdindex, ptabdesc, ulOriginOpId,
			GPOS_NEW(mp) CName(mp, CName(alias)), ulPartIndex, pdrgpcrOutput,
			pdrgpdrgpcrPart, partition_mdids);
	}
	else
	{
		popLogicalGet = PopStaticBtreeIndexOpConstructor(
			mp, pmdindex, ptabdesc, ulOriginOpId,
			GPOS_NEW(mp) CName(mp, CName(alias)), pdrgpcrOutput);
	}

	// clean up
	GPOS_DELETE(alias);
	pdrgppcrIndexCols->Release();
	outer_refs_in_index_get->Release();

	CExpression *pexprIndexCond =
		CPredicateUtils::PexprConjunction(mp, pdrgpexprIndex);
	CExpression *pexprResidualCond =
		CPredicateUtils::PexprConjunction(mp, pdrgpexprResidual);

	return PexprRewrittenBtreeIndexPath(mp, pexprIndexCond, pexprResidualCond,
										pmdindex, ptabdesc, popLogicalGet);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprScalarBitmapBoolOp
//
//	@doc:
//		 Helper for creating BitmapBoolOp expression
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprScalarBitmapBoolOp(
	CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprOriginalPred,
	CExpressionArray *pdrgpexpr, CTableDescriptor *ptabdesc,
	const IMDRelation *pmdrel, CColRefArray *pdrgpcrOutput,
	CColRefSet *outer_refs, CColRefSet *pcrsReqd, BOOL fConjunction,
	CExpression **ppexprRecheck, CExpression **ppexprResidual,
	BOOL isAPartialPredicate)
{
	GPOS_ASSERT(nullptr != pdrgpexpr);

	const ULONG ulPredicates = pdrgpexpr->Size();

	// array of recheck predicates
	CExpressionArray *pdrgpexprRecheckNew = GPOS_NEW(mp) CExpressionArray(mp);

	// array of residual predicates
	CExpressionArray *pdrgpexprResidualNew = GPOS_NEW(mp) CExpressionArray(mp);

	// array of bitmap index probe/bitmap bool op expressions
	CExpressionArray *pdrgpexprBitmap = GPOS_NEW(mp) CExpressionArray(mp);

	CreateBitmapIndexProbeOps(
		mp, md_accessor, pexprOriginalPred, pdrgpexpr, ptabdesc, pmdrel,
		pdrgpcrOutput, outer_refs, pcrsReqd, fConjunction, pdrgpexprBitmap,
		pdrgpexprRecheckNew, pdrgpexprResidualNew, isAPartialPredicate);

	GPOS_ASSERT(pdrgpexprRecheckNew->Size() == pdrgpexprBitmap->Size());

	const ULONG ulBitmapExpr = pdrgpexprBitmap->Size();

	if (0 == ulBitmapExpr || (!fConjunction && ulBitmapExpr < ulPredicates))
	{
		// no relevant bitmap indexes found,
		// or expression is a disjunction and some disjuncts don't have applicable bitmap indexes
		pdrgpexprBitmap->Release();
		pdrgpexprRecheckNew->Release();
		pdrgpexprResidualNew->Release();
		return nullptr;
	}

	CExpression *pexprBitmapBoolOp = nullptr;
	CExpression *pexprRecheckNew = nullptr;

	JoinBitmapIndexProbes(mp, pdrgpexprBitmap, pdrgpexprRecheckNew,
						  fConjunction, &pexprBitmapBoolOp, &pexprRecheckNew);

	if (nullptr != *ppexprRecheck)
	{
		CExpression *pexprRecheckNewCombined = CPredicateUtils::PexprConjDisj(
			mp, *ppexprRecheck, pexprRecheckNew, fConjunction);
		(*ppexprRecheck)->Release();
		pexprRecheckNew->Release();
		*ppexprRecheck = pexprRecheckNewCombined;
	}
	else
	{
		*ppexprRecheck = pexprRecheckNew;
	}

	if (0 < pdrgpexprResidualNew->Size())
	{
		ComputeBitmapTableScanResidualPredicate(
			mp, fConjunction, pexprOriginalPred, ppexprResidual,
			pdrgpexprResidualNew);
	}

	// cleanup
	pdrgpexprRecheckNew->Release();
	pdrgpexprBitmap->Release();
	pdrgpexprResidualNew->Release();

	return pexprBitmapBoolOp;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::ComputeBitmapTableScanResidualPredicate
//
//	@doc:
//		Compute the residual predicate for a bitmap table scan
//
//---------------------------------------------------------------------------
void
CXformUtils::ComputeBitmapTableScanResidualPredicate(
	CMemoryPool *mp, BOOL fConjunction, CExpression *pexprOriginalPred,
	CExpression **
		ppexprResidual,	 // input-output argument: the residual predicate computed so-far, and resulting predicate
	CExpressionArray *pdrgpexprResidualNew)
{
	GPOS_ASSERT(nullptr != pexprOriginalPred);
	GPOS_ASSERT(0 < pdrgpexprResidualNew->Size());

	if (!fConjunction)
	{
		// one of the disjuncts requires a residual predicate: we need to reevaluate the original predicate
		// for example, for index keys ik1 and ik2, the following will require re-evaluating
		// the whole predicate rather than just k < 100:
		// ik1 = 1 or (ik2=2 and k<100)
		pexprOriginalPred->AddRef();
		CRefCount::SafeRelease(*ppexprResidual);
		*ppexprResidual = pexprOriginalPred;
		return;
	}

	pdrgpexprResidualNew->AddRef();
	CExpression *pexprResidualNew =
		CPredicateUtils::PexprConjDisj(mp, pdrgpexprResidualNew, fConjunction);

	if (nullptr != *ppexprResidual)
	{
		CExpression *pexprResidualNewCombined = CPredicateUtils::PexprConjDisj(
			mp, *ppexprResidual, pexprResidualNew, fConjunction);
		(*ppexprResidual)->Release();
		pexprResidualNew->Release();
		*ppexprResidual = pexprResidualNewCombined;
	}
	else
	{
		*ppexprResidual = pexprResidualNew;
	}
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprBitmapBoolOp
//
//	@doc:
//		Construct a bitmap bool op expression between the given bitmap access
// 		path expressions
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprBitmapBoolOp(CMemoryPool *mp, IMDId *pmdidBitmapType,
							   CExpression *pexprLeft, CExpression *pexprRight,
							   BOOL fConjunction)
{
	GPOS_ASSERT(nullptr != pexprLeft);
	GPOS_ASSERT(nullptr != pexprRight);

	CScalarBitmapBoolOp::EBitmapBoolOp ebitmapboolop =
		CScalarBitmapBoolOp::EbitmapboolAnd;

	if (!fConjunction)
	{
		ebitmapboolop = CScalarBitmapBoolOp::EbitmapboolOr;
	}

	return GPOS_NEW(mp) CExpression(
		mp,
		GPOS_NEW(mp) CScalarBitmapBoolOp(mp, ebitmapboolop, pmdidBitmapType),
		pexprLeft, pexprRight);
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprConditionOnBoolColumn
//
//	@doc:
//		Construct a bitmap index path expression for the given predicate
//		out of the children of the given expression.
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprBitmapLookupWithPredicateBreakDown(
	CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprOriginalPred,
	CExpression *pexprPred, CTableDescriptor *ptabdesc,
	const IMDRelation *pmdrel, CColRefArray *pdrgpcrOutput,
	CColRefSet *outer_refs, CColRefSet *pcrsReqd, CExpression **ppexprRecheck,
	CExpression **ppexprResidual)
{
	GPOS_ASSERT(nullptr == *ppexprRecheck);
	GPOS_ASSERT(nullptr == *ppexprResidual);

	CExpressionArray *pdrgpexpr = nullptr;
	BOOL fConjunction = CPredicateUtils::FAnd(pexprPred);

	if (fConjunction)
	{
		// Combine all the supported conjuncts into a single CExpression, to be able to
		// pass them as one unit to PexprScalarBitmapBoolOp and to PexprBitmapSelectBestIndex,
		// since we have optimizations that find the best multi-column index.

		CExpressionArray *temp_conjuncts =
			CPredicateUtils::PdrgpexprConjuncts(mp, pexprPred);
		CExpressionArray *supported_conjuncts =
			GPOS_NEW(mp) CExpressionArray(mp);

		pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);

		const ULONG size = temp_conjuncts->Size();
		for (ULONG ul = 0; ul < size; ul++)
		{
			CExpression *pexpr = (*temp_conjuncts)[ul];

			pexpr->AddRef();
			if (CPredicateUtils::FBitmapLookupSupportedPredicateOrConjunct(
					pexpr, outer_refs))
			{
				supported_conjuncts->Append(pexpr);
			}
			else
			{
				pdrgpexpr->Append(pexpr);
			}
		}
		temp_conjuncts->Release();

		if (0 < supported_conjuncts->Size())
		{
			CExpression *anded_expr =
				CPredicateUtils::PexprConjunction(mp, supported_conjuncts);
			pdrgpexpr->Append(anded_expr);
		}
		else
		{
			supported_conjuncts->Release();
		}
	}
	else
	{
		pdrgpexpr = CPredicateUtils::PdrgpexprDisjuncts(mp, pexprPred);
	}

	if (1 == pdrgpexpr->Size())
	{
		// unsupported predicate that cannot be split further into conjunctions and disjunctions
		pdrgpexpr->Release();
		return nullptr;
	}

	// expression is a deeper tree: recurse further in each of the components
	CExpression *pexprResult = PexprScalarBitmapBoolOp(
		mp, md_accessor, pexprOriginalPred, pdrgpexpr, ptabdesc, pmdrel,
		pdrgpcrOutput, outer_refs, pcrsReqd, fConjunction, ppexprRecheck,
		ppexprResidual,
		!fConjunction /* we are now breaking up something other than an AND
												 predicate and want to consider BTree indexes as well */
	);
	pdrgpexpr->Release();

	return pexprResult;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprBitmapSelectBestIndex
//
//	@doc:
//		Given conjuncts of supported predicates, select the best index and
//		construct a bitmap index path expression. Return unused predicates
//		as residuals.
//		Examples for conjuncts of supported predicates:
//		  col op <const>
//		  col op <outer ref>
//		  col op <const> AND col op <outer ref>
//		Casts are also acceptable here.
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprBitmapSelectBestIndex(
	CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprPred,
	CTableDescriptor *ptabdesc, const IMDRelation *pmdrel,
	CColRefArray *pdrgpcrOutput, CColRefSet *pcrsReqd,
	CColRefSet *pcrsOuterRefs, CExpression **ppexprRecheck,
	CExpression **ppexprResidual, BOOL alsoConsiderBTreeIndexes)
{
	CColRefSet *pcrsScalar = pexprPred->DeriveUsedColumns();
	ULONG ulBestIndex = 0;
	CExpression *pexprIndexFinal = nullptr;
	CDouble bestSelectivity =
		CDouble(2.0);  // selectivity can be a max value of 1
	ULONG bestNumResiduals = gpos::ulong_max;
	ULONG bestNumIndexCols = gpos::ulong_max;
	IMDIndex::EmdindexType altIndexType = IMDIndex::EmdindBitmap;

	if (alsoConsiderBTreeIndexes)
	{
		altIndexType = IMDIndex::EmdindBtree;
	}

	const ULONG ulIndexes = pmdrel->IndexCount();
	for (ULONG ul = 0; ul < ulIndexes; ul++)
	{
		const IMDIndex *pmdindex =
			md_accessor->RetrieveIndex(pmdrel->IndexMDidAt(ul));

		if (CXformUtils::FIndexApplicable(mp, pmdindex, pmdrel, pdrgpcrOutput,
										  pcrsReqd, pcrsScalar,
										  IMDIndex::EmdindBitmap, altIndexType))
		{
			// found an applicable index
			CExpressionArray *pdrgpexprScalar =
				CPredicateUtils::PdrgpexprConjuncts(mp, pexprPred);
			CColRefArray *pdrgpcrIndexCols =
				PdrgpcrIndexKeys(mp, pdrgpcrOutput, pmdindex, pmdrel);
			CExpressionArray *pdrgpexprIndex =
				GPOS_NEW(mp) CExpressionArray(mp);
			CExpressionArray *pdrgpexprResidual =
				GPOS_NEW(mp) CExpressionArray(mp);

			CPredicateUtils::ExtractIndexPredicates(
				mp, md_accessor, pdrgpexprScalar, pmdindex, pdrgpcrIndexCols,
				pdrgpexprIndex, pdrgpexprResidual, pcrsOuterRefs,
				alsoConsiderBTreeIndexes);

			pdrgpexprScalar->Release();

			if (0 == pdrgpexprIndex->Size())
			{
				// no usable predicate, clean up
				pdrgpcrIndexCols->Release();
				pdrgpexprIndex->Release();
				pdrgpexprResidual->Release();
				continue;
			}

			BOOL fCompatible = CPredicateUtils::FCompatiblePredicates(
				pdrgpexprIndex, pmdindex, pdrgpcrIndexCols, md_accessor);
			pdrgpcrIndexCols->Release();

			if (!fCompatible)
			{
				pdrgpexprIndex->Release();
				pdrgpexprResidual->Release();
				continue;
			}

			pdrgpexprIndex->AddRef();
			CExpression *pexprIndex =
				CPredicateUtils::PexprConjunction(mp, pdrgpexprIndex);

			CDouble selectivity = CFilterStatsProcessor::SelectivityOfPredicate(
				mp, pexprIndex, ptabdesc, pcrsOuterRefs);

			pexprIndex->Release();

			BOOL possible_ao_table =
				pmdrel->IsAORowOrColTable() ||
				pmdrel->RetrieveRelStorageType() ==
					IMDRelation::ErelstorageMixedPartitioned;

			// Btree indexes on AO tables are only great when the NDV is high. Do this check here
			if (selectivity > AO_TABLE_BTREE_INDEX_SELECTIVITY_THRESHOLD &&
				possible_ao_table &&
				pmdindex->IndexType() == IMDIndex::EmdindBtree)
			{
				pdrgpexprIndex->Release();
				pdrgpexprResidual->Release();
				continue;
			}

			CColRefArray *indexColumns = CXformUtils::PdrgpcrIndexKeys(
				mp, pdrgpcrOutput, pmdindex, pmdrel);

			// make sure the first key of index is included in the scalar predicate
			// (except for BRIN, which are symmetrical)
			// FIXME: Consider removing the first column check for GIN as well
			const CColRef *pcrFirstIndexKey = (*indexColumns)[0];

			if (!pcrsScalar->FMember(pcrFirstIndexKey) &&
				pmdindex->IndexType() != IMDIndex::EmdindBrin)
			{
				indexColumns->Release();
				pdrgpexprIndex->Release();
				pdrgpexprResidual->Release();
				continue;
			}

			ULONG numResiduals = pdrgpexprResidual->Size();
			ULONG numIndexCols = indexColumns->Size();
			// Score indexes by using three criteria:
			// - selectivity of the index predicate (more selective, i.e. smaller selectivity value, is better)
			// - number of residual predicates (fewer is better)
			// - number of columns in the index
			//   (with the same selectivity and # of residual preds, a smaller index is better)
			if (bestSelectivity > selectivity ||
				(bestSelectivity == selectivity &&
				 (bestNumResiduals > numResiduals ||
				  (bestNumResiduals == numResiduals &&
				   bestNumIndexCols > numIndexCols))))
			{
				CRefCount::SafeRelease((*ppexprResidual));
				pdrgpexprResidual->AddRef();
				(*ppexprResidual) = CPredicateUtils::PexprConjDisj(
					mp, pdrgpexprResidual, true /* fConjunction */);

				// if the index covers all the columns in the predicate, the residual generated is a trivial
				// constant true filter. Stop the search as this is an optimal index and discard the residual.
				if (CUtils::FScalarConstTrue((*ppexprResidual)))
				{
					(*ppexprResidual)->Release();
					(*ppexprResidual) = nullptr;
				}

				ulBestIndex = ul;
				bestSelectivity = selectivity;
				bestNumResiduals = numResiduals;
				bestNumIndexCols = numIndexCols;
				pdrgpexprIndex->AddRef();
				CRefCount::SafeRelease(pexprIndexFinal);
				pexprIndexFinal =
					CPredicateUtils::PexprConjunction(mp, pdrgpexprIndex);
			}

			pdrgpexprIndex->Release();
			pdrgpexprResidual->Release();
			indexColumns->Release();
		}
	}

	// if the final best index was found, return the correct expression
	if (nullptr != pexprIndexFinal)
	{
		const IMDIndex *pmdindex =
			md_accessor->RetrieveIndex(pmdrel->IndexMDidAt(ulBestIndex));
		CIndexDescriptor *pindexdesc =
			CIndexDescriptor::Pindexdesc(mp, ptabdesc, pmdindex);
		pmdindex->GetIndexRetItemTypeMdid()->AddRef();
		pexprIndexFinal->AddRef();
		(*ppexprRecheck) = pexprIndexFinal;

		return GPOS_NEW(mp) CExpression(
			mp,
			GPOS_NEW(mp) CScalarBitmapIndexProbe(
				mp, pindexdesc, pmdindex->GetIndexRetItemTypeMdid()),
			pexprIndexFinal);
	}

	// else the unmatched predicate becomes the residual
	pexprPred->AddRef();
	(*ppexprResidual) = pexprPred;
	return nullptr;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::CreateBitmapIndexProbeOps
//
//	@doc:
//		Given an array of predicate expressions, construct a bitmap access path
//		expression for each predicate and accumulate it in the pdrgpexprBitmap array
//
//---------------------------------------------------------------------------
void
CXformUtils::CreateBitmapIndexProbeOps(
	CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprOriginalPred,
	CExpressionArray *pdrgpexprPreds, CTableDescriptor *ptabdesc,
	const IMDRelation *pmdrel, CColRefArray *pdrgpcrOutput,
	CColRefSet *outer_refs, CColRefSet *pcrsReqd,
	BOOL,  // fConjunction
	CExpressionArray *pdrgpexprBitmap, CExpressionArray *pdrgpexprRecheck,
	CExpressionArray *pdrgpexprResidual, BOOL isAPartialPredicate)
{
	GPOS_ASSERT(nullptr != pdrgpexprPreds);

	ULONG ulPredicates = pdrgpexprPreds->Size();

	for (ULONG ul = 0; ul < ulPredicates; ul++)
	{
		CExpression *pexprPred = (*pdrgpexprPreds)[ul];
		CExpression *pexprBitmap = nullptr;
		CExpression *pexprRecheck = nullptr;

		CreateBitmapIndexProbesWithOrWithoutPredBreakdown(
			mp, md_accessor, pexprOriginalPred, pexprPred, ptabdesc, pmdrel,
			pdrgpcrOutput, outer_refs, pcrsReqd, &pexprBitmap, &pexprRecheck,
			pdrgpexprResidual, isAPartialPredicate);

		if (nullptr != pexprBitmap)
		{
			pdrgpexprBitmap->Append(pexprBitmap);
			pdrgpexprRecheck->Append(pexprRecheck);
		}
	}
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::CreateBitmapIndexProbesWithOrWithoutPredBreakdown
//
//	@doc:
//		Given a predicate pexprPred, try one of two strategies, in a loop:
//			1. if it is a supported predicate, call PexprBitmapSelectBestIndex
//			2. else, call PexprBitmapLookupWithPredicateBreakDown to find indexes
//				for parts of the predicate
//
//		This method also has one optimization:
//		- try using multiple indexes by redriving the logic with the
//		  residual predicates, finding a second-best index
//
//---------------------------------------------------------------------------
void
CXformUtils::CreateBitmapIndexProbesWithOrWithoutPredBreakdown(
	CMemoryPool *pmp, CMDAccessor *pmda, CExpression *pexprOriginalPred,
	CExpression *pexprPred, CTableDescriptor *ptabdesc,
	const IMDRelation *pmdrel, CColRefArray *pdrgpcrOutput,
	CColRefSet *pcrsOuterRefs, CColRefSet *pcrsReqd,
	CExpression **pexprBitmapResult, CExpression **pexprRecheckResult,
	CExpressionArray *pdrgpexprResidualResult, BOOL isAPartialPredicate)
{
	CExpression *pexprRecheckLocal, *pexprResidualLocal, *pexprBitmapLocal;

	BOOL retryIndexLookupWithResidual = false;

	// create temporary arrays in which we accumulate indexes and preds
	// when we try multiple indexes, these are always ANDed together
	CExpressionArray *pdrgpexprBitmapTemp = GPOS_NEW(pmp) CExpressionArray(pmp);
	CExpressionArray *pdrgpexprRecheckTemp =
		GPOS_NEW(pmp) CExpressionArray(pmp);

	pexprPred->AddRef();

	while (nullptr != pexprPred)
	{
		pexprRecheckLocal = pexprResidualLocal = pexprBitmapLocal = nullptr;

		if (CPredicateUtils::FBitmapLookupSupportedPredicateOrConjunct(
				pexprPred, pcrsOuterRefs))
		{
			// do not break the predicate down and lookup for an index covering maximum predicate columns,
			// this is done in following scenario to generate optimal index paths.
			// predicate is a conjunct tree with children of the form :
			// "ident op const" or "ident or const-array" or boolean-ident or "NOT boolean-ident",
			// where the casts are allowed on both idents and constants.

			// example, with schema: t(a, b, c , d , e , f , g , h)
			// indexes: i1 (b, c, d), i2 (g, h) and i3(d)
			// for predicate: d = 1 AND b = 2 AND c = 3 AND e = 4 AND g = 5 AND h = 6
			// the index paths chosen will be:
			// i1 covering (d = 1 AND b = 2 AND c = 3) and i2 covering (g = 5 AND h = 6)
			// with residual as e = 4.

			BOOL isAPartialPredicateOrArrayCmp = isAPartialPredicate;

			if (!isAPartialPredicateOrArrayCmp)
			{
				// consider a bitmap index scan on a btree index if we find any array comparisons,
				// since we currently don't support those for regular index scans
				CExpressionArray *conjuncts =
					CPredicateUtils::PdrgpexprConjuncts(pmp, pexprPred);
				ULONG size = conjuncts->Size();

				for (ULONG i = 0; i < size && !isAPartialPredicateOrArrayCmp;
					 i++)
				{
					isAPartialPredicateOrArrayCmp =
						CPredicateUtils::FArrayCompareIdentToConstIgnoreCast(
							(*conjuncts)[i]);
				}

				conjuncts->Release();
			}

			// this also applies for the simple predicates of the form "ident op const" or "ident op const-array"
			pexprBitmapLocal = PexprBitmapSelectBestIndex(
				pmp, pmda, pexprPred, ptabdesc, pmdrel, pdrgpcrOutput, pcrsReqd,
				pcrsOuterRefs, &pexprRecheckLocal, &pexprResidualLocal,
				isAPartialPredicateOrArrayCmp  // for partial preds or array comps
				// we want to consider btree indexes
			);

			// since we did not break the conjunct tree, the index path found may cover a part of the
			// predicate only, hence we perform the index lookup again for the residual.

			// example, with schema as t(a, b, c, f, e), indexes as i1(a, b), i2(a) and i3(e, f)
			// for predicate: (a = 40) AND (b = 3) AND (e = 11) AND (c = 34)
			// first lookup will produce index path (i1) with residual as: (e = 11) AND (c = 34)
			// retrying with this residual will produce second index path (i3) with final residual: (c = 34).
			// the two index paths will be joined at the end.
			retryIndexLookupWithResidual = true;
		}
		else
		{
			// break the predicate down and look for index paths on individual children
			pexprBitmapLocal = PexprBitmapLookupWithPredicateBreakDown(
				pmp, pmda, pexprOriginalPred, pexprPred, ptabdesc, pmdrel,
				pdrgpcrOutput, pcrsOuterRefs, pcrsReqd, &pexprRecheckLocal,
				&pexprResidualLocal);
			// if no index path was constructed for this predicate, return it as residual.
			// Example, with schema: t(a, b, c, d), index i1(a,b)
			// and predicate: (a = 3) OR (c = 4 AND d =5)
			// no index path will be found for (c = 4 AND d =5), in which case the entire
			// disjunct will become a residual.
			if (nullptr == pexprBitmapLocal)
			{
				pexprPred->AddRef();
				pexprResidualLocal = pexprPred;
			}
		}

		CRefCount::SafeRelease(pexprPred);

		if (nullptr != pexprBitmapLocal)
		{
			GPOS_ASSERT(nullptr != pexprRecheckLocal);

			pdrgpexprRecheckTemp->Append(pexprRecheckLocal);
			pdrgpexprBitmapTemp->Append(pexprBitmapLocal);
		}

		if (nullptr != pexprBitmapLocal && retryIndexLookupWithResidual)
		{
			// if an index path was found, then perform the lookup again for the residual
			pexprPred = pexprResidualLocal;
			continue;
		}

		// terminate the lookup if no index path was found or
		// if the retry was not required
		pexprPred = nullptr;
	}

	// for simple conjuncts, there may be multiple index paths generated by CreateBitmapIndexProbesWithOrWithoutPredBreakdown()
	// due to the retry, in that case we combine them with BitmapAnd expression.
	// for example, if you have index foo_a and foo_b with a predicate (a = 1 and b = 2 and c = 3)
	// pdrgpexprBitmapTemp will return [foo_a, foo_b]
	// pdrgpexprRecheckTemp will give recheck conditions [a = 1, b = 2]
	// these can then be ANDed together below before being appended as the final bitmap and recheck conditions
	// needed for the index probe.
	const ULONG ulBitmapExpr = pdrgpexprBitmapTemp->Size();
	if (0 < ulBitmapExpr)
	{
		JoinBitmapIndexProbes(pmp, pdrgpexprBitmapTemp, pdrgpexprRecheckTemp,
							  true /*fConjunction*/, pexprBitmapResult,
							  pexprRecheckResult);
	}

	if (nullptr != pexprResidualLocal)
	{
		pdrgpexprResidualResult->Append(pexprResidualLocal);
		// Note that since we dont have an fConjunction parameter,
		// adding the residuals to the list might be incorrect, if we are
		// inside an OR predicate. We do this anyway and rely on logic in
		// ComputeBitmapTableScanResidualPredicate(), called from
		// PexprScalarBitmapBoolOp() to replace these jumbled predicates
		// with the entire original predicate as a residual predicate.
	}

	// cleanup
	CRefCount::SafeRelease(pdrgpexprBitmapTemp);
	CRefCount::SafeRelease(pdrgpexprRecheckTemp);
}

// combine the individual bitmap access paths to form a bitmap bool op expression
void
CXformUtils::JoinBitmapIndexProbes(CMemoryPool *pmp,
								   CExpressionArray *pdrgpexprBitmap,
								   CExpressionArray *pdrgpexprRecheck,
								   BOOL fConjunction,
								   CExpression **ppexprBitmap,
								   CExpression **ppexprRecheck)
{
	const ULONG ulBitmapExpr = pdrgpexprBitmap->Size();
	CExpression *pexprBitmapBoolOp = (*pdrgpexprBitmap)[0];
	pexprBitmapBoolOp->AddRef();
	IMDId *pmdidBitmap =
		CScalar::PopConvert(pexprBitmapBoolOp->Pop())->MdidType();

	for (ULONG ul = 1; ul < ulBitmapExpr; ul++)
	{
		CExpression *pexprBitmap = (*pdrgpexprBitmap)[ul];
		pexprBitmap->AddRef();
		pmdidBitmap->AddRef();

		pexprBitmapBoolOp = PexprBitmapBoolOp(
			pmp, pmdidBitmap, pexprBitmapBoolOp, pexprBitmap, fConjunction);
	}

	GPOS_ASSERT(nullptr != pexprBitmapBoolOp && 0 < pdrgpexprRecheck->Size());
	(*ppexprBitmap) = pexprBitmapBoolOp;

	pdrgpexprRecheck->AddRef();
	CExpression *pexprRecheckNew =
		CPredicateUtils::PexprConjDisj(pmp, pdrgpexprRecheck, fConjunction);
	(*ppexprRecheck) = pexprRecheckNew;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FHasAmbiguousType
//
//	@doc:
//		Check if expression has a scalar node with ambiguous return type
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FHasAmbiguousType(CExpression *pexpr, CMDAccessor *md_accessor)
{
	GPOS_CHECK_STACK_SIZE;
	GPOS_ASSERT(nullptr != pexpr);
	GPOS_ASSERT(nullptr != md_accessor);

	BOOL fAmbiguous = false;
	if (pexpr->Pop()->FScalar())
	{
		CScalar *popScalar = CScalar::PopConvert(pexpr->Pop());
		switch (popScalar->Eopid())
		{
			case COperator::EopScalarAggFunc:
				fAmbiguous = CScalarAggFunc::PopConvert(popScalar)
								 ->FHasAmbiguousReturnType();
				break;

			case COperator::EopScalarProjectList:
			case COperator::EopScalarProjectElement:
			case COperator::EopScalarSwitchCase:
				break;	// these operators do not have valid return type

			default:
				IMDId *mdid = popScalar->MdidType();
				if (nullptr != mdid)
				{
					// check MD type of scalar node
					fAmbiguous = md_accessor->RetrieveType(mdid)->IsAmbiguous();
				}
		}
	}

	if (!fAmbiguous)
	{
		// recursively process children
		const ULONG arity = pexpr->Arity();
		for (ULONG ul = 0; !fAmbiguous && ul < arity; ul++)
		{
			CExpression *pexprChild = (*pexpr)[ul];
			fAmbiguous = FHasAmbiguousType(pexprChild, md_accessor);
		}
	}

	return fAmbiguous;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprSelect2BitmapBoolOp
//
//	@doc:
//		Transform a Select into a Bitmap(Dynamic)TableGet over BitmapBoolOp if
//		bitmap indexes exist
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprSelect2BitmapBoolOp(CMemoryPool *mp, CExpression *pexpr)
{
	GPOS_ASSERT(nullptr != pexpr);

	// extract components
	CExpression *pexprRelational = (*pexpr)[0];
	CExpression *pexprScalar = (*pexpr)[1];
	CLogical *popGet = CLogical::PopConvert(pexprRelational->Pop());

	CTableDescriptor *ptabdesc = pexprRelational->DeriveTableDescriptor();
	GPOS_ASSERT(nullptr != ptabdesc);
	const ULONG ulIndices = ptabdesc->IndexCount();
	if (0 == ulIndices)
	{
		return nullptr;
	}

	// derive the scalar and relational properties to build set of required columns
	CColRefSet *pcrsOutput = pexpr->DeriveOutputColumns();
	CColRefSet *pcrsScalarExpr = pexprScalar->DeriveUsedColumns();

	CColRefSet *pcrsReqd = GPOS_NEW(mp) CColRefSet(mp);
	pcrsReqd->Include(pcrsOutput);
	pcrsReqd->Include(pcrsScalarExpr);

	CExpression *pexprResult = PexprBitmapTableGet(
		mp, popGet, pexpr->Pop()->UlOpId(), ptabdesc, pexprScalar,
		nullptr,  // outer_refs
		pcrsReqd);
	pcrsReqd->Release();

	return pexprResult;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprBitmapTableGet
//
//	@doc:
//		Transform a Select into a Bitmap(Dynamic)TableGet over BitmapBoolOp if
//		bitmap indexes exist
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprBitmapTableGet(CMemoryPool *mp, CLogical *popGet,
								 ULONG ulOriginOpId, CTableDescriptor *ptabdesc,
								 CExpression *pexprScalar,
								 CColRefSet *outer_refs, CColRefSet *pcrsReqd)
{
	GPOS_ASSERT(COperator::EopLogicalGet == popGet->Eopid() ||
				COperator::EopLogicalDynamicGet == popGet->Eopid());

	BOOL fDynamicGet = (COperator::EopLogicalDynamicGet == popGet->Eopid());

	BOOL fConjunction = CPredicateUtils::FAnd(pexprScalar);

	CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
	pexprScalar->AddRef();
	pdrgpexpr->Append(pexprScalar);


	GPOS_ASSERT(0 < pdrgpexpr->Size());

	// find the indexes whose included columns meet the required columns
	CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
	const IMDRelation *pmdrel = md_accessor->RetrieveRel(ptabdesc->MDId());

	GPOS_ASSERT(0 < pdrgpexpr->Size());

	CColRefArray *pdrgpcrOutput = CLogical::PdrgpcrOutputFromLogicalGet(popGet);
	GPOS_ASSERT(nullptr != pdrgpcrOutput);

	CExpression *pexprRecheck = nullptr;
	CExpression *pexprResidual = nullptr;
	CExpression *pexprBitmap = PexprScalarBitmapBoolOp(
		mp, md_accessor, pexprScalar, pdrgpexpr, ptabdesc, pmdrel,
		pdrgpcrOutput, outer_refs, pcrsReqd, fConjunction, &pexprRecheck,
		&pexprResidual, false /*isAPartialPredicate*/
	);
	CExpression *pexprResult = nullptr;

	if (nullptr != pexprBitmap)
	{
		GPOS_ASSERT(nullptr != pexprRecheck);
		ptabdesc->AddRef();
		pdrgpcrOutput->AddRef();

		CName *pname = GPOS_NEW(mp)
			CName(mp, CName(CLogical::NameFromLogicalGet(popGet).Pstr()));

		// create a bitmap table scan on top
		CLogical *popBitmapTableGet = nullptr;

		if (fDynamicGet)
		{
			CLogicalDynamicGet *popDynamicGet =
				CLogicalDynamicGet::PopConvert(popGet);
			popDynamicGet->PdrgpdrgpcrPart()->AddRef();
			popDynamicGet->GetPartitionMdids()->AddRef();
			popBitmapTableGet = GPOS_NEW(mp) CLogicalDynamicBitmapTableGet(
				mp, ptabdesc, ulOriginOpId, pname, popDynamicGet->ScanId(),
				pdrgpcrOutput, popDynamicGet->PdrgpdrgpcrPart(),
				popDynamicGet->GetPartitionMdids());
		}
		else
		{
			popBitmapTableGet = GPOS_NEW(mp) CLogicalBitmapTableGet(
				mp, ptabdesc, ulOriginOpId, pname, pdrgpcrOutput);
		}
		pexprResult = GPOS_NEW(mp)
			CExpression(mp, popBitmapTableGet, pexprRecheck, pexprBitmap);

		if (nullptr != pexprResidual)
		{
			// add a selection on top with the residual condition
			pexprResult =
				GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CLogicalSelect(mp),
										 pexprResult, pexprResidual);
		}
	}

	// cleanup
	pdrgpexpr->Release();

	return pexprResult;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprRemapColumns
//
//	@doc:
//		Remap the expression from the old columns to the new ones
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprRemapColumns(CMemoryPool *mp, CExpression *pexpr,
							   CColRefArray *pdrgpcrA,
							   CColRefArray *pdrgpcrRemappedA,
							   CColRefArray *pdrgpcrB,
							   CColRefArray *pdrgpcrRemappedB)
{
	UlongToColRefMap *colref_mapping =
		CUtils::PhmulcrMapping(mp, pdrgpcrA, pdrgpcrRemappedA);
	GPOS_ASSERT_IMP(nullptr == pdrgpcrB, nullptr == pdrgpcrRemappedB);
	if (nullptr != pdrgpcrB)
	{
		CUtils::AddColumnMapping(mp, colref_mapping, pdrgpcrB,
								 pdrgpcrRemappedB);
	}
	CExpression *pexprRemapped = pexpr->PexprCopyWithRemappedColumns(
		mp, colref_mapping, true /*must_exist*/);
	colref_mapping->Release();

	return pexprRemapped;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::FJoinPredOnSingleChild
//
//	@doc:
//		Check if expression handle is attached to a Join expression
//		with a join predicate that uses columns from a single child
//
//---------------------------------------------------------------------------
BOOL
CXformUtils::FJoinPredOnSingleChild(CMemoryPool *mp, CExpressionHandle &exprhdl)
{
	GPOS_ASSERT(CUtils::FLogicalJoin(exprhdl.Pop()));

	const ULONG arity = exprhdl.Arity();
	if (0 == exprhdl.DeriveUsedColumns(arity - 1)->Size())
	{
		// no columns are used in join predicate
		return false;
	}

	// construct array of children output columns
	CColRefSetArray *pdrgpcrs = GPOS_NEW(mp) CColRefSetArray(mp);
	for (ULONG ul = 0; ul < arity - 1; ul++)
	{
		CColRefSet *pcrsOutput = exprhdl.DeriveOutputColumns(ul);
		pcrsOutput->AddRef();
		pdrgpcrs->Append(pcrsOutput);
	}

	CExpressionArray *pdrgpexprPreds = CPredicateUtils::PdrgpexprConjuncts(
		mp, exprhdl.PexprScalarExactChild(arity - 1,
										  true /*error_on_null_return*/));
	const ULONG ulPreds = pdrgpexprPreds->Size();
	BOOL fPredUsesSingleChild = false;
	for (ULONG ulPred = 0; !fPredUsesSingleChild && ulPred < ulPreds; ulPred++)
	{
		CExpression *pexpr = (*pdrgpexprPreds)[ulPred];
		CColRefSet *pcrsUsed = pexpr->DeriveUsedColumns();
		for (ULONG ulChild = 0; !fPredUsesSingleChild && ulChild < arity - 1;
			 ulChild++)
		{
			fPredUsesSingleChild = (*pdrgpcrs)[ulChild]->ContainsAll(pcrsUsed);
		}
	}
	pdrgpexprPreds->Release();
	pdrgpcrs->Release();

	return fPredUsesSingleChild;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprCTEConsumer
//
//	@doc:
//		Create a new CTE consumer for the given CTE id
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprCTEConsumer(CMemoryPool *mp, ULONG ulCTEId,
							  CColRefArray *colref_array)
{
	CLogicalCTEConsumer *popConsumer =
		GPOS_NEW(mp) CLogicalCTEConsumer(mp, ulCTEId, colref_array);
	COptCtxt::PoctxtFromTLS()->Pcteinfo()->IncrementConsumers(ulCTEId);

	return GPOS_NEW(mp) CExpression(mp, popConsumer);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PdrgpcrSubsequence
//
//	@doc:
//		Returns a new array containing the columns from the given column array 'colref_array'
//		at the positions indicated by the given ULONG array 'pdrgpulIndexesOfRefs'
//
//---------------------------------------------------------------------------
CColRefArray *
CXformUtils::PdrgpcrReorderedSubsequence(CMemoryPool *mp,
										 CColRefArray *colref_array,
										 ULongPtrArray *pdrgpulIndexesOfRefs)
{
	GPOS_ASSERT(nullptr != colref_array);
	GPOS_ASSERT(nullptr != pdrgpulIndexesOfRefs);

	const ULONG length = pdrgpulIndexesOfRefs->Size();
	GPOS_ASSERT(length <= colref_array->Size());

	CColRefArray *pdrgpcrNewSubsequence = GPOS_NEW(mp) CColRefArray(mp);
	for (ULONG ul = 0; ul < length; ul++)
	{
		ULONG ulPos = *(*pdrgpulIndexesOfRefs)[ul];
		GPOS_ASSERT(ulPos < colref_array->Size());
		pdrgpcrNewSubsequence->Append((*colref_array)[ulPos]);
	}

	return pdrgpcrNewSubsequence;
}

//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprWinFuncAgg2ScalarAgg
//
//	@doc:
//		Converts an Agg window function into regular Agg
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprWinFuncAgg2ScalarAgg(CMemoryPool *mp,
									   CExpression *pexprWinFunc)
{
	GPOS_ASSERT(nullptr != pexprWinFunc);
	GPOS_ASSERT(COperator::EopScalarWindowFunc == pexprWinFunc->Pop()->Eopid());

	CExpressionArray *pdrgpexprFullWinFuncArgs =
		GPOS_NEW(mp) CExpressionArray(mp);

	CExpressionArray *pdrgpexprWinFuncArgs = GPOS_NEW(mp) CExpressionArray(mp);
	const ULONG ulArgs = pexprWinFunc->Arity();
	for (ULONG ul = 0; ul < ulArgs; ul++)
	{
		CExpression *pexprArg = (*pexprWinFunc)[ul];
		pexprArg->AddRef();
		pdrgpexprWinFuncArgs->Append(pexprArg);
	}

	pdrgpexprFullWinFuncArgs->Append(GPOS_NEW(mp) CExpression(
		mp, GPOS_NEW(mp) CScalarValuesList(mp), pdrgpexprWinFuncArgs));

	pdrgpexprFullWinFuncArgs->Append(
		GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CScalarValuesList(mp),
								 GPOS_NEW(mp) CExpressionArray(mp)));
	pdrgpexprFullWinFuncArgs->Append(
		GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CScalarValuesList(mp),
								 GPOS_NEW(mp) CExpressionArray(mp)));
	pdrgpexprFullWinFuncArgs->Append(
		GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CScalarValuesList(mp),
								 GPOS_NEW(mp) CExpressionArray(mp)));

	CScalarWindowFunc *popScWinFunc =
		CScalarWindowFunc::PopConvert(pexprWinFunc->Pop());
	IMDId *mdid_func = popScWinFunc->FuncMdId();

	mdid_func->AddRef();
	return GPOS_NEW(mp) CExpression(
		mp,
		CUtils::PopAggFunc(mp, mdid_func,
						   GPOS_NEW(mp) CWStringConst(
							   mp, popScWinFunc->PstrFunc()->GetBuffer()),
						   popScWinFunc->IsDistinct(), EaggfuncstageGlobal,
						   false,	 // fSplit
						   nullptr,	 // pmdidResolvedReturnType
						   EaggfunckindNormal, GPOS_NEW(mp) ULongPtrArray(mp)),
		pdrgpexprFullWinFuncArgs);
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::MapPrjElemsWithDistinctAggs
//
//	@doc:
//		Given a project list, create a map whose key is the argument of
//		distinct Agg, and value is the set of project elements that define
//		distinct Aggs on that argument,
//		non-distinct Aggs are grouped together in one set with key 'True',
//		for example,
//
//		Input: (x : count(distinct a),
//				y : sum(distinct a),
//				z : avg(distinct b),
//				w : max(c))
//
//		Output: (a --> {x,y},
//				 b --> {z},
//				 true --> {w})
//
//---------------------------------------------------------------------------
void
CXformUtils::MapPrjElemsWithDistinctAggs(
	CMemoryPool *mp, CExpression *pexprPrjList,
	ExprToExprArrayMap **pphmexprdrgpexpr,	// output: created map
	ULONG *pulDifferentDQAs	 // output: number of DQAs with different arguments
)
{
	GPOS_ASSERT(nullptr != pexprPrjList);
	GPOS_ASSERT(nullptr != pphmexprdrgpexpr);
	GPOS_ASSERT(nullptr != pulDifferentDQAs);

	ExprToExprArrayMap *phmexprdrgpexpr = GPOS_NEW(mp) ExprToExprArrayMap(mp);
	ULONG ulDifferentDQAs = 0;
	CExpression *pexprTrue = CUtils::PexprScalarConstBool(mp, true /*value*/);
	const ULONG arity = pexprPrjList->Arity();
	for (ULONG ul = 0; ul < arity; ul++)
	{
		CExpression *pexprPrjEl = (*pexprPrjList)[ul];
		CExpression *pexprChild = (*pexprPrjEl)[0];
		COperator *popChild = pexprChild->Pop();
		COperator::EOperatorId eopidChild = popChild->Eopid();
		if (COperator::EopScalarAggFunc != eopidChild &&
			COperator::EopScalarWindowFunc != eopidChild)
		{
			continue;
		}

		BOOL is_distinct = false;
		if (COperator::EopScalarAggFunc == eopidChild)
		{
			is_distinct = CScalarAggFunc::PopConvert(popChild)->IsDistinct();
		}
		else
		{
			is_distinct = CScalarWindowFunc::PopConvert(popChild)->IsDistinct();
		}

		CExpression *pexprKey = nullptr;
		if (is_distinct && 1 == pexprChild->Arity())
		{
			// use first argument of Distinct Agg as key
			pexprKey = (*pexprChild)[0];
		}
		else
		{
			// use constant True as key
			pexprKey = pexprTrue;
		}

		CExpressionArray *pdrgpexpr = phmexprdrgpexpr->Find(pexprKey);
		BOOL fExists = (nullptr != pdrgpexpr);
		if (!fExists)
		{
			// first occurrence, create a new expression array
			pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
		}
		pexprPrjEl->AddRef();
		pdrgpexpr->Append(pexprPrjEl);

		if (!fExists)
		{
			pexprKey->AddRef();
#ifdef GPOS_DEBUG
			BOOL fSuccess =
#endif	// GPOS_DEBUG
				phmexprdrgpexpr->Insert(pexprKey, pdrgpexpr);
			GPOS_ASSERT(fSuccess);

			if (pexprKey != pexprTrue)
			{
				// first occurrence of a new DQA, increment counter
				ulDifferentDQAs++;
			}
		}
	}

	pexprTrue->Release();

	*pphmexprdrgpexpr = phmexprdrgpexpr;
	*pulDifferentDQAs = ulDifferentDQAs;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::ICmpPrjElemsArr
//
//	@doc:
//		Comparator used in sorting arrays of project elements
//		based on the column id of the first entry
//
//---------------------------------------------------------------------------
INT
CXformUtils::ICmpPrjElemsArr(const void *pvFst, const void *pvSnd)
{
	GPOS_ASSERT(nullptr != pvFst);
	GPOS_ASSERT(nullptr != pvSnd);

	const CExpressionArray *pdrgpexprFst = *(const CExpressionArray **) (pvFst);
	const CExpressionArray *pdrgpexprSnd = *(const CExpressionArray **) (pvSnd);

	CExpression *pexprPrjElemFst = (*pdrgpexprFst)[0];
	CExpression *pexprPrjElemSnd = (*pdrgpexprSnd)[0];
	ULONG ulIdFst =
		CScalarProjectElement::PopConvert(pexprPrjElemFst->Pop())->Pcr()->Id();
	ULONG ulIdSnd =
		CScalarProjectElement::PopConvert(pexprPrjElemSnd->Pop())->Pcr()->Id();

	if (ulIdFst < ulIdSnd)
	{
		return -1;
	}

	if (ulIdFst > ulIdSnd)
	{
		return 1;
	}

	return 0;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PdrgpdrgpexprSortedPrjElemsArray
//
//	@doc:
//		Iterate over given hash map and return array of arrays of project
//		elements sorted by the column id of the first entries
//
//---------------------------------------------------------------------------
CExpressionArrays *
CXformUtils::PdrgpdrgpexprSortedPrjElemsArray(
	CMemoryPool *mp, ExprToExprArrayMap *phmexprdrgpexpr)
{
	GPOS_ASSERT(nullptr != phmexprdrgpexpr);

	CExpressionArrays *pdrgpdrgpexprPrjElems =
		GPOS_NEW(mp) CExpressionArrays(mp);
	ExprToExprArrayMapIter hmexprdrgpexpriter(phmexprdrgpexpr);
	while (hmexprdrgpexpriter.Advance())
	{
		CExpressionArray *pdrgpexprPrjElems =
			const_cast<CExpressionArray *>(hmexprdrgpexpriter.Value());
		pdrgpexprPrjElems->AddRef();
		pdrgpdrgpexprPrjElems->Append(pdrgpexprPrjElems);
	}
	pdrgpdrgpexprPrjElems->Sort(ICmpPrjElemsArr);

	return pdrgpdrgpexprPrjElems;
}


//---------------------------------------------------------------------------
//	@function:
//		CXformUtils::PexprGbAggOnCTEConsumer2Join
//
//	@doc:
//		Convert GbAgg with distinct aggregates to a join expression
//
//		each leaf node of the resulting join expression is a GbAgg on a single
//		distinct aggs, we also create a GbAgg leaf for all remaining (non-distinct)
//		aggregates, for example
//
//		Input:
//			GbAgg_(count(distinct a), max(distinct a), sum(distinct b), avg(a))
//				+---CTEConsumer
//
//		Output:
//			InnerJoin
//				|--InnerJoin
//				|		|--GbAgg_(count(distinct a), max(distinct a))
//				|		|		+---CTEConsumer
//				|		+--GbAgg_(sum(distinct b))
//				|				+---CTEConsumer
//				+--GbAgg_(avg(a))
//						+---CTEConsumer
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::PexprGbAggOnCTEConsumer2Join(CMemoryPool *mp,
										  CExpression *pexprGbAgg)
{
	GPOS_ASSERT(nullptr != pexprGbAgg);
	GPOS_ASSERT(COperator::EopLogicalGbAgg == pexprGbAgg->Pop()->Eopid());

	CLogicalGbAgg *popGbAgg = CLogicalGbAgg::PopConvert(pexprGbAgg->Pop());
	CColRefArray *pdrgpcrGrpCols = popGbAgg->Pdrgpcr();

	GPOS_ASSERT(popGbAgg->FGlobal());

	if (COperator::EopLogicalCTEConsumer != (*pexprGbAgg)[0]->Pop()->Eopid())
	{
		// child of GbAgg must be a CTE consumer

		return nullptr;
	}

	CExpression *pexprPrjList = (*pexprGbAgg)[1];
	ULONG ulDistinctAggs = pexprPrjList->DeriveTotalDistinctAggs();

	if (1 == ulDistinctAggs)
	{
		// if only one distinct agg is used, return input expression
		pexprGbAgg->AddRef();
		return pexprGbAgg;
	}

	ExprToExprArrayMap *phmexprdrgpexpr = nullptr;
	ULONG ulDifferentDQAs = 0;
	MapPrjElemsWithDistinctAggs(mp, pexprPrjList, &phmexprdrgpexpr,
								&ulDifferentDQAs);
	if (1 == phmexprdrgpexpr->Size())
	{
		// if all distinct aggs use the same argument, return input expression
		phmexprdrgpexpr->Release();

		pexprGbAgg->AddRef();
		return pexprGbAgg;
	}

	CExpression *pexprCTEConsumer = (*pexprGbAgg)[0];
	CLogicalCTEConsumer *popConsumer =
		CLogicalCTEConsumer::PopConvert(pexprCTEConsumer->Pop());
	const ULONG ulCTEId = popConsumer->UlCTEId();
	CColRefArray *pdrgpcrConsumerOutput = popConsumer->Pdrgpcr();
	CCTEInfo *pcteinfo = COptCtxt::PoctxtFromTLS()->Pcteinfo();

	CExpression *pexprLastGbAgg = nullptr;
	CColRefArray *pdrgpcrLastGrpCols = nullptr;
	CExpression *pexprJoin = nullptr;
	CExpression *pexprTrue = CUtils::PexprScalarConstBool(mp, true /*value*/);

	// iterate over map to extract sorted array of array of project elements,
	// we need to sort arrays here since hash map iteration is non-deterministic,
	// which may create non-deterministic ordering of join children leading to
	// changing the plan of the same query when run multiple times
	CExpressionArrays *pdrgpdrgpexprPrjElems =
		PdrgpdrgpexprSortedPrjElemsArray(mp, phmexprdrgpexpr);

	// counter of consumers
	ULONG ulConsumers = 0;

	const ULONG size = pdrgpdrgpexprPrjElems->Size();
	for (ULONG ulPrjElemsArr = 0; ulPrjElemsArr < size; ulPrjElemsArr++)
	{
		CExpressionArray *pdrgpexprPrjElems =
			(*pdrgpdrgpexprPrjElems)[ulPrjElemsArr];

		CExpression *pexprNewGbAgg = nullptr;
		if (0 == ulConsumers)
		{
			// reuse input consumer
			pdrgpcrGrpCols->AddRef();
			pexprCTEConsumer->AddRef();
			pdrgpexprPrjElems->AddRef();
			pexprNewGbAgg = GPOS_NEW(mp) CExpression(
				mp,
				GPOS_NEW(mp) CLogicalGbAgg(mp, pdrgpcrGrpCols,
										   COperator::EgbaggtypeGlobal),
				pexprCTEConsumer,
				GPOS_NEW(mp)
					CExpression(mp, GPOS_NEW(mp) CScalarProjectList(mp),
								pdrgpexprPrjElems));
		}
		else
		{
			// create a new consumer
			CColRefArray *pdrgpcrNewConsumerOutput =
				CUtils::PdrgpcrCopy(mp, pdrgpcrConsumerOutput);
			CExpression *pexprNewConsumer = GPOS_NEW(mp)
				CExpression(mp, GPOS_NEW(mp) CLogicalCTEConsumer(
									mp, ulCTEId, pdrgpcrNewConsumerOutput));
			pcteinfo->IncrementConsumers(ulCTEId);

			// fix Aggs arguments to use new consumer output column
			UlongToColRefMap *colref_mapping = CUtils::PhmulcrMapping(
				mp, pdrgpcrConsumerOutput, pdrgpcrNewConsumerOutput);
			CExpressionArray *pdrgpexprNewPrjElems =
				GPOS_NEW(mp) CExpressionArray(mp);
			const ULONG ulPrjElems = pdrgpexprPrjElems->Size();
			for (ULONG ul = 0; ul < ulPrjElems; ul++)
			{
				CExpression *pexprPrjEl = (*pdrgpexprPrjElems)[ul];

				// to match requested columns upstream, we have to re-use the same computed
				// columns that define the aggregates, we avoid creating new columns during
				// expression copy by passing must_exist as false
				CExpression *pexprNewPrjEl =
					pexprPrjEl->PexprCopyWithRemappedColumns(
						mp, colref_mapping, false /*must_exist*/);
				pdrgpexprNewPrjElems->Append(pexprNewPrjEl);
			}

			// re-map grouping columns
			CColRefArray *pdrgpcrNewGrpCols = CUtils::PdrgpcrRemap(
				mp, pdrgpcrGrpCols, colref_mapping, true /*must_exist*/);

			// create new GbAgg expression
			pexprNewGbAgg = GPOS_NEW(mp) CExpression(
				mp,
				GPOS_NEW(mp) CLogicalGbAgg(mp, pdrgpcrNewGrpCols,
										   COperator::EgbaggtypeGlobal),
				pexprNewConsumer,
				GPOS_NEW(mp)
					CExpression(mp, GPOS_NEW(mp) CScalarProjectList(mp),
								pdrgpexprNewPrjElems));

			colref_mapping->Release();
		}

		ulConsumers++;

		CColRefArray *pdrgpcrNewGrpCols =
			CLogicalGbAgg::PopConvert(pexprNewGbAgg->Pop())->Pdrgpcr();
		if (nullptr != pexprLastGbAgg)
		{
			CExpression *pexprJoinCondition = nullptr;
			if (0 == pdrgpcrLastGrpCols->Size())
			{
				GPOS_ASSERT(0 == pdrgpcrNewGrpCols->Size());

				pexprTrue->AddRef();
				pexprJoinCondition = pexprTrue;
			}
			else
			{
				GPOS_ASSERT(pdrgpcrLastGrpCols->Size() ==
							pdrgpcrNewGrpCols->Size());

				pexprJoinCondition = CPredicateUtils::PexprINDFConjunction(
					mp, pdrgpcrLastGrpCols, pdrgpcrNewGrpCols);
			}

			if (nullptr == pexprJoin)
			{
				// create first join
				pexprJoin = CUtils::PexprLogicalJoin<CLogicalInnerJoin>(
					mp, pexprLastGbAgg, pexprNewGbAgg, pexprJoinCondition);
			}
			else
			{
				// cascade joins
				pexprJoin = CUtils::PexprLogicalJoin<CLogicalInnerJoin>(
					mp, pexprJoin, pexprNewGbAgg, pexprJoinCondition);
			}
		}

		pexprLastGbAgg = pexprNewGbAgg;
		pdrgpcrLastGrpCols = pdrgpcrNewGrpCols;
	}

	pdrgpdrgpexprPrjElems->Release();
	phmexprdrgpexpr->Release();
	pexprTrue->Release();

	return pexprJoin;
}

//---------------------------------------------------------------------------
// CXformUtils::AddALinearStackOfUnaryExpressions
//
// Given two CExpressions, a "lower part" and a "stack", consisting of
// zero or more CExpressions with a single logical child and optional scalar
// children, make a copy of the stack, with the lower (excluded) end of the
// stack ("exclusiveBottomOfStack") replaced by "lowerPartOfExpr".
//
// Input:
//
//      lowerPartOfExpr             topOfStack
//          / | | \                     |
//    (optional children)              ...
//                                      |
//                               lastIncludedNode
//                                      |
//                             exclusiveBottomOfStack
//
// Result:
//
//                topOfStack
//                    |
//                   ...
//                    |
//             lastIncludedNode
//                    |
//              lowerPartOfExpr
//                  / | | \ .
//            (optional children)
//
//---------------------------------------------------------------------------
CExpression *
CXformUtils::AddALinearStackOfUnaryExpressions(
	CMemoryPool *mp, CExpression *lowerPartOfExpr, CExpression *topOfStack,
	CExpression *exclusiveBottomOfStack)
{
	if (nullptr == topOfStack || topOfStack == exclusiveBottomOfStack)
	{
		// nothing to add on top of lowerPartOfExpr
		return lowerPartOfExpr;
	}

	ULONG arity = topOfStack->Arity();

	// a stack must consist of logical nodes
	GPOS_ASSERT(topOfStack->Pop()->FLogical());
	GPOS_CHECK_STACK_SIZE;

	// Recursively process the node just below topOfStack first, to build the new stack bottom-up.
	// Note that if the stack ends here, the recursive call will return lowerPartOfExpr.
	CExpression *processedRestOfStack = AddALinearStackOfUnaryExpressions(
		mp, lowerPartOfExpr, (*topOfStack)[0], exclusiveBottomOfStack);

	// now add a copy of node topOfStack
	CExpressionArray *childrenArray = GPOS_NEW(mp) CExpressionArray(mp);
	COperator *pop = topOfStack->Pop();

	// the first, logical child becomes the copied rest of the stack
	childrenArray->Append(processedRestOfStack);

	// then copy the remaining (scalar) children, unmodified
	for (ULONG ul = 1; ul < arity; ul++)
	{
		CExpression *scalarChild = (*topOfStack)[ul];

		GPOS_ASSERT(scalarChild->Pop()->FScalar());
		scalarChild->AddRef();
		childrenArray->Append(scalarChild);
	}

	pop->AddRef();

	return GPOS_NEW(mp) CExpression(mp, pop, childrenArray);
}

// EOF

相关信息

greenplumn 源码目录

相关文章

greenplumn CDecorrelator 源码

greenplumn CJoinOrder 源码

greenplumn CJoinOrderDP 源码

greenplumn CJoinOrderDPv2 源码

greenplumn CJoinOrderGreedy 源码

greenplumn CJoinOrderMinCard 源码

greenplumn CSubqueryHandler 源码

greenplumn CXform 源码

greenplumn CXformCTEAnchor2Sequence 源码

greenplumn CXformCTEAnchor2TrivialSelect 源码

0  赞