greenplumn CLeftOuterJoinStatsProcessor 源码
greenplumn CLeftOuterJoinStatsProcessor 代码
文件路径:/src/backend/gporca/libnaucrates/src/statistics/CLeftOuterJoinStatsProcessor.cpp
//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright 2018 VMware, Inc. or its affiliates.
//
//	@filename:
//		CLeftOuterJoinStatsProcessor.cpp
//
//	@doc:
//		Statistics helper routines for processing Left Outer Joins
//---------------------------------------------------------------------------
#include "naucrates/statistics/CLeftOuterJoinStatsProcessor.h"
#include "naucrates/statistics/CStatisticsUtils.h"
using namespace gpmd;
// return statistics object after performing LOJ operation with another statistics structure
CStatistics *
CLeftOuterJoinStatsProcessor::CalcLOJoinStatsStatic(
	CMemoryPool *mp, const IStatistics *outer_side_stats,
	const IStatistics *inner_side_stats, CStatsPredJoinArray *join_preds_stats)
{
	GPOS_ASSERT(nullptr != outer_side_stats);
	GPOS_ASSERT(nullptr != inner_side_stats);
	GPOS_ASSERT(nullptr != join_preds_stats);
	const CStatistics *result_stats_outer_side =
		dynamic_cast<const CStatistics *>(outer_side_stats);
	const CStatistics *result_stats_inner_side =
		dynamic_cast<const CStatistics *>(inner_side_stats);
	CStatistics *inner_join_stats =
		CStatistics::CastStats(result_stats_outer_side->CalcInnerJoinStats(
			mp, inner_side_stats, join_preds_stats));
	CDouble num_rows_inner_join = inner_join_stats->Rows();
	CDouble num_rows_LASJ(1.0);
	// create a new hash map of histograms, for each column from the outer child
	// add the buckets that do not contribute to the inner join
	UlongToHistogramMap *LOJ_histograms =
		CLeftOuterJoinStatsProcessor::MakeLOJHistogram(
			mp, result_stats_outer_side, result_stats_inner_side,
			inner_join_stats, join_preds_stats, num_rows_inner_join,
			&num_rows_LASJ);
	// cardinality of LOJ is at least the cardinality of the outer child
	CDouble num_rows_LOJ =
		std::max(outer_side_stats->Rows(), num_rows_inner_join + num_rows_LASJ);
	// create an output stats object
	CStatistics *result_stats_LOJ = GPOS_NEW(mp) CStatistics(
		mp, LOJ_histograms, inner_join_stats->CopyWidths(mp), num_rows_LOJ,
		outer_side_stats->IsEmpty(), outer_side_stats->GetNumberOfPredicates());
	inner_join_stats->Release();
	// In the output statistics object, the upper bound source cardinality of the join column
	// cannot be greater than the upper bound source cardinality information maintained in the input
	// statistics object. Therefore we choose CStatistics::EcbmMin the bounding method which takes
	// the minimum of the cardinality upper bound of the source column (in the input hash map)
	// and estimated join cardinality.
	// modify source id to upper bound card information
	CStatisticsUtils::ComputeCardUpperBounds(
		mp, result_stats_outer_side, result_stats_LOJ, num_rows_LOJ,
		CStatistics::EcbmMin /* card_bounding_method */);
	CStatisticsUtils::ComputeCardUpperBounds(
		mp, result_stats_inner_side, result_stats_LOJ, num_rows_LOJ,
		CStatistics::EcbmMin /* card_bounding_method */);
	return result_stats_LOJ;
}
// create a new hash map of histograms for LOJ from the histograms
// of the outer child and the histograms of the inner join
UlongToHistogramMap *
CLeftOuterJoinStatsProcessor::MakeLOJHistogram(
	CMemoryPool *mp, const CStatistics *outer_side_stats,
	const CStatistics *inner_side_stats, CStatistics *inner_join_stats,
	CStatsPredJoinArray *join_preds_stats, CDouble num_rows_inner_join,
	CDouble *result_rows_LASJ)
{
	GPOS_ASSERT(nullptr != outer_side_stats);
	GPOS_ASSERT(nullptr != inner_side_stats);
	GPOS_ASSERT(nullptr != join_preds_stats);
	GPOS_ASSERT(nullptr != inner_join_stats);
	// build a bitset with all outer child columns contributing to the join
	CBitSet *outer_side_join_cols = GPOS_NEW(mp) CBitSet(mp);
	for (ULONG j = 0; j < join_preds_stats->Size(); j++)
	{
		CStatsPredJoin *join_stats = (*join_preds_stats)[j];
		if (join_stats->HasValidColIdOuter())
		{
			(void) outer_side_join_cols->ExchangeSet(join_stats->ColIdOuter());
		}
	}
	// for the columns in the outer child, compute the buckets that do not contribute to the inner join
	CStatistics *LASJ_stats =
		CStatistics::CastStats(outer_side_stats->CalcLASJoinStats(
			mp, inner_side_stats, join_preds_stats,
			false /* DoIgnoreLASJHistComputation */
			));
	CDouble num_rows_LASJ(0.0);
	if (!LASJ_stats->IsEmpty())
	{
		num_rows_LASJ = LASJ_stats->Rows();
	}
	UlongToHistogramMap *LOJ_histograms = GPOS_NEW(mp) UlongToHistogramMap(mp);
	ULongPtrArray *outer_colids_with_stats =
		outer_side_stats->GetColIdsWithStats(mp);
	const ULONG num_outer_cols = outer_colids_with_stats->Size();
	for (ULONG i = 0; i < num_outer_cols; i++)
	{
		ULONG colid = *(*outer_colids_with_stats)[i];
		const CHistogram *inner_join_histogram =
			inner_join_stats->GetHistogram(colid);
		GPOS_ASSERT(nullptr != inner_join_histogram);
		if (outer_side_join_cols->Get(colid))
		{
			// add buckets from the outer histogram that do not contribute to the inner join
			const CHistogram *LASJ_histogram = LASJ_stats->GetHistogram(colid);
			GPOS_ASSERT(nullptr != LASJ_histogram);
			if (LASJ_histogram->IsWellDefined() && !LASJ_histogram->IsEmpty())
			{
				// union the buckets from the inner join and LASJ to get the LOJ buckets
				CHistogram *LOJ_histogram =
					LASJ_histogram->MakeUnionAllHistogramNormalize(
						num_rows_LASJ, inner_join_histogram,
						num_rows_inner_join);
				CStatisticsUtils::AddHistogram(mp, colid, LOJ_histogram,
											   LOJ_histograms);
				GPOS_DELETE(LOJ_histogram);
			}
			else
			{
				CStatisticsUtils::AddHistogram(mp, colid, inner_join_histogram,
											   LOJ_histograms);
			}
		}
		else
		{
			// if column from the outer side that is not a join then just add it
			CStatisticsUtils::AddHistogram(mp, colid, inner_join_histogram,
										   LOJ_histograms);
		}
	}
	LASJ_stats->Release();
	// extract all columns from the inner child of the join
	ULongPtrArray *inner_colids_with_stats =
		inner_side_stats->GetColIdsWithStats(mp);
	// add its corresponding statistics
	AddHistogramsLOJInner(mp, inner_join_stats, inner_colids_with_stats,
						  num_rows_LASJ, num_rows_inner_join, LOJ_histograms);
	*result_rows_LASJ = num_rows_LASJ;
	// clean up
	inner_colids_with_stats->Release();
	outer_colids_with_stats->Release();
	outer_side_join_cols->Release();
	return LOJ_histograms;
}
// helper function to add histograms of the inner side of a LOJ
void
CLeftOuterJoinStatsProcessor::AddHistogramsLOJInner(
	CMemoryPool *mp, const CStatistics *inner_join_stats,
	ULongPtrArray *inner_colids_with_stats, CDouble num_rows_LASJ,
	CDouble num_rows_inner_join, UlongToHistogramMap *LOJ_histograms)
{
	GPOS_ASSERT(nullptr != inner_join_stats);
	GPOS_ASSERT(nullptr != inner_colids_with_stats);
	GPOS_ASSERT(nullptr != LOJ_histograms);
	const ULONG num_inner_cols = inner_colids_with_stats->Size();
	for (ULONG ul = 0; ul < num_inner_cols; ul++)
	{
		ULONG colid = *(*inner_colids_with_stats)[ul];
		const CHistogram *inner_join_histogram =
			inner_join_stats->GetHistogram(colid);
		GPOS_ASSERT(nullptr != inner_join_histogram);
		// the number of nulls added to the inner side should be the number of rows of the LASJ on the outer side.
		CHistogram *null_histogram = GPOS_NEW(mp) CHistogram(
			mp, GPOS_NEW(mp) CBucketArray(mp), true /*is_well_defined*/,
			1.0 /*null_freq*/, CHistogram::DefaultNDVRemain,
			CHistogram::DefaultNDVFreqRemain);
		CHistogram *LOJ_histogram =
			inner_join_histogram->MakeUnionAllHistogramNormalize(
				num_rows_inner_join, null_histogram, num_rows_LASJ);
		CStatisticsUtils::AddHistogram(mp, colid, LOJ_histogram,
									   LOJ_histograms);
		GPOS_DELETE(null_histogram);
		GPOS_DELETE(LOJ_histogram);
	}
}
// EOF
相关信息
相关文章
greenplumn CFilterStatsProcessor 源码
greenplumn CGroupByStatsProcessor 源码
greenplumn CInnerJoinStatsProcessor 源码
greenplumn CJoinStatsProcessor 源码
greenplumn CLeftAntiSemiJoinStatsProcessor 源码
greenplumn CLeftSemiJoinStatsProcessor 源码
                        
                            0
                        
                        
                             赞
                        
                    
                    
                热门推荐
- 
                        2、 - 优质文章
- 
                        3、 gate.io
- 
                        8、 openharmony
- 
                        9、 golang