greenplumn CGroupByStatsProcessor 源码
greenplumn CGroupByStatsProcessor 代码
文件路径:/src/backend/gporca/libnaucrates/src/statistics/CGroupByStatsProcessor.cpp
//---------------------------------------------------------------------------
// Greenplum Database
// Copyright 2018 VMware, Inc. or its affiliates.
//
// @filename:
// CGroupByStatsProcessor.cpp
//
// @doc:
// Statistics helper routines for processing group by operations
//---------------------------------------------------------------------------
#include "naucrates/statistics/CGroupByStatsProcessor.h"
#include "gpopt/base/COptCtxt.h"
#include "gpopt/optimizer/COptimizerConfig.h"
#include "naucrates/statistics/CStatistics.h"
#include "naucrates/statistics/CStatisticsUtils.h"
using namespace gpopt;
// return statistics object after Group by computation
CStatistics *
CGroupByStatsProcessor::CalcGroupByStats(CMemoryPool *mp,
const CStatistics *input_stats,
ULongPtrArray *GCs,
ULongPtrArray *aggs, CBitSet *keys)
{
// create hash map from colid -> histogram for resultant structure
UlongToHistogramMap *col_histogram_mapping =
GPOS_NEW(mp) UlongToHistogramMap(mp);
// hash map colid -> width
UlongToDoubleMap *col_width_mapping = GPOS_NEW(mp) UlongToDoubleMap(mp);
CColumnFactory *col_factory = COptCtxt::PoctxtFromTLS()->Pcf();
CStatistics *agg_stats = nullptr;
CDouble agg_rows = CStatistics::MinRows;
if (input_stats->IsEmpty())
{
// add dummy histograms for the aggregates and grouping columns
CHistogram::AddDummyHistogramAndWidthInfo(
mp, col_factory, col_histogram_mapping, col_width_mapping, aggs,
true /* is_empty */);
CHistogram::AddDummyHistogramAndWidthInfo(
mp, col_factory, col_histogram_mapping, col_width_mapping, GCs,
true /* is_empty */);
agg_stats = GPOS_NEW(mp)
CStatistics(mp, col_histogram_mapping, col_width_mapping, agg_rows,
true /* is_empty */);
}
else
{
// for computed aggregates, we're not going to be very smart right now
CHistogram::AddDummyHistogramAndWidthInfo(
mp, col_factory, col_histogram_mapping, col_width_mapping, aggs,
false /* is_empty */);
CColRefSet *computed_groupby_cols = GPOS_NEW(mp) CColRefSet(mp);
CColRefSet *groupby_cols_for_stats =
CStatisticsUtils::MakeGroupByColsForStats(mp, GCs,
computed_groupby_cols);
// add statistical information of columns (1) used to compute the cardinality of the aggregate
// and (2) the grouping columns that are computed
CStatisticsUtils::AddGrpColStats(
mp, input_stats, groupby_cols_for_stats, col_histogram_mapping,
col_width_mapping);
CStatisticsUtils::AddGrpColStats(mp, input_stats, computed_groupby_cols,
col_histogram_mapping,
col_width_mapping);
const CStatisticsConfig *stats_config = input_stats->GetStatsConfig();
CDoubleArray *NDVs = CStatisticsUtils::ExtractNDVForGrpCols(
mp, stats_config, input_stats, groupby_cols_for_stats, keys);
CDouble groups =
CStatisticsUtils::GetCumulativeNDVs(stats_config, NDVs);
// clean up
groupby_cols_for_stats->Release();
computed_groupby_cols->Release();
NDVs->Release();
agg_rows = std::min(std::max(CStatistics::MinRows.Get(), groups.Get()),
input_stats->Rows().Get());
// create a new stats object for the output
agg_stats = GPOS_NEW(mp)
CStatistics(mp, col_histogram_mapping, col_width_mapping, agg_rows,
input_stats->IsEmpty());
}
// In the output statistics object, the upper bound source cardinality of the grouping column
// cannot be greater than the upper bound source cardinality information maintained in the input
// statistics object. Therefore we choose CStatistics::EcbmMin the bounding method which takes
// the minimum of the cardinality upper bound of the source column (in the input hash map)
// and estimated group by cardinality.
// modify source id to upper bound card information
CStatisticsUtils::ComputeCardUpperBounds(
mp, input_stats, agg_stats, agg_rows,
CStatistics::EcbmMin /* card_bounding_method */);
return agg_stats;
}
// EOF
相关信息
相关文章
greenplumn CFilterStatsProcessor 源码
greenplumn CInnerJoinStatsProcessor 源码
greenplumn CJoinStatsProcessor 源码
greenplumn CLeftAntiSemiJoinStatsProcessor 源码
greenplumn CLeftOuterJoinStatsProcessor 源码
greenplumn CLeftSemiJoinStatsProcessor 源码
0
赞