greenplumn CLeftAntiSemiJoinStatsProcessor 源码

  • 2022-08-18
  • 浏览 (61)

greenplumn CLeftAntiSemiJoinStatsProcessor 代码

文件路径:/src/backend/gporca/libnaucrates/src/statistics/CLeftAntiSemiJoinStatsProcessor.cpp

//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright 2018 VMware, Inc. or its affiliates.
//
//	@filename:
//		CLeftAntiSemiJoinStatsProcessor.cpp
//
//	@doc:
//		Statistics helper routines for processing Left Anti-Semi Join
//---------------------------------------------------------------------------

#include "naucrates/statistics/CLeftAntiSemiJoinStatsProcessor.h"

#include "naucrates/statistics/CScaleFactorUtils.h"
#include "naucrates/statistics/CStatisticsUtils.h"

using namespace gpmd;

// helper for LAS-joining histograms
void
CLeftAntiSemiJoinStatsProcessor::JoinHistogramsLASJ(
	const CHistogram *histogram1, const CHistogram *histogram2,
	CStatsPredJoin *join_stats, CDouble num_rows1,
	CDouble,					//num_rows2,
	CHistogram **result_hist1,	// output: histogram 1 after join
	CHistogram **result_hist2,	// output: histogram 2 after join
	CDouble *scale_factor,		// output: scale factor based on the join
	BOOL is_input_empty, IStatistics::EStatsJoinType,
	BOOL DoIgnoreLASJHistComputation)
{
	GPOS_ASSERT(nullptr != histogram1);
	GPOS_ASSERT(nullptr != histogram2);
	GPOS_ASSERT(nullptr != join_stats);
	GPOS_ASSERT(nullptr != result_hist1);
	GPOS_ASSERT(nullptr != result_hist2);
	GPOS_ASSERT(nullptr != scale_factor);

	// anti-semi join should give the full outer side.
	// use 1.0 as scale factor if anti semi join
	*scale_factor = 1.0;

	CStatsPred::EStatsCmpType stats_cmp_type = join_stats->GetCmpType();

	if (is_input_empty)
	{
		*result_hist1 = histogram1->CopyHistogram();
		*result_hist2 = nullptr;

		return;
	}

	BOOL empty_histograms = histogram1->IsEmpty() || histogram2->IsEmpty();
	if (!empty_histograms &&
		CHistogram::JoinPredCmpTypeIsSupported(stats_cmp_type))
	{
		*result_hist1 = histogram1->MakeLASJHistogramNormalize(
			stats_cmp_type, num_rows1, histogram2, scale_factor,
			DoIgnoreLASJHistComputation);
		*result_hist2 = nullptr;

		if ((*result_hist1)->IsEmpty())
		{
			// if the LASJ histograms is empty then all tuples of the outer join column
			// joined with those on the inner side. That is, LASJ will produce no rows
			*scale_factor = num_rows1;
		}

		return;
	}

	// for an unsupported join predicate operator or in the case of missing stats,
	// copy input histograms and use default scale factor
	*scale_factor = CDouble(CScaleFactorUtils::DefaultJoinPredScaleFactor);
	*result_hist1 = histogram1->CopyHistogram();
	*result_hist2 = nullptr;
}

//	Return statistics object after performing LASJ
CStatistics *
CLeftAntiSemiJoinStatsProcessor::CalcLASJoinStatsStatic(
	CMemoryPool *mp, const IStatistics *outer_stats_input,
	const IStatistics *inner_stats_input, CStatsPredJoinArray *join_preds_stats,
	BOOL DoIgnoreLASJHistComputation)
{
	GPOS_ASSERT(nullptr != inner_stats_input);
	GPOS_ASSERT(nullptr != outer_stats_input);
	GPOS_ASSERT(nullptr != join_preds_stats);
	const CStatistics *outer_stats =
		dynamic_cast<const CStatistics *>(outer_stats_input);

	return CJoinStatsProcessor::SetResultingJoinStats(
		mp, outer_stats->GetStatsConfig(), outer_stats_input, inner_stats_input,
		join_preds_stats, IStatistics::EsjtLeftAntiSemiJoin /* esjt */,
		DoIgnoreLASJHistComputation);
}

// Compute the null frequency for LASJ
CDouble
CLeftAntiSemiJoinStatsProcessor::NullFreqLASJ(
	CStatsPred::EStatsCmpType stats_cmp_type, const CHistogram *outer_histogram,
	const CHistogram *inner_histogram)
{
	GPOS_ASSERT(nullptr != outer_histogram);
	GPOS_ASSERT(nullptr != inner_histogram);

	if (CStatsPred::EstatscmptINDF != stats_cmp_type)
	{
		// for equality predicate NULLs on the outer side of the join
		// will not join with those in the inner side
		return outer_histogram->GetNullFreq();
	}

	if (CStatistics::Epsilon < inner_histogram->GetNullFreq())
	{
		// for INDF predicate NULLs on the outer side of the join
		// will join with those in the inner side if they are present
		return CDouble(0.0);
	}

	return outer_histogram->GetNullFreq();
}


// EOF

相关信息

greenplumn 源码目录

相关文章

greenplumn CBucket 源码

greenplumn CFilterStatsProcessor 源码

greenplumn CGroupByStatsProcessor 源码

greenplumn CHistogram 源码

greenplumn CInnerJoinStatsProcessor 源码

greenplumn CJoinStatsProcessor 源码

greenplumn CLeftOuterJoinStatsProcessor 源码

greenplumn CLeftSemiJoinStatsProcessor 源码

greenplumn CLimitStatsProcessor 源码

greenplumn CPoint 源码

0  赞