greenplumn CHistogramTest 源码

  • 2022-08-18
  • 浏览 (116)

greenplumn CHistogramTest 代码

文件路径:/src/backend/gporca/server/src/unittest/dxl/statistics/CHistogramTest.cpp

//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright (C) 2018 VMware, Inc. or its affiliates.
//
//	@filename:
//		CHistogramTest.cpp
//
//	@doc:
//		Testing operations on histogram objects
//---------------------------------------------------------------------------

#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif

#include "unittest/dxl/statistics/CHistogramTest.h"

#include <stdint.h>

#include "gpos/error/CAutoTrace.h"
#include "gpos/io/COstreamString.h"
#include "gpos/string/CWStringDynamic.h"

#include "naucrates/statistics/CHistogram.h"
#include "naucrates/statistics/CPoint.h"

#include "unittest/base.h"
#include "unittest/dxl/statistics/CCardinalityTestUtils.h"
#include "unittest/gpopt/CTestUtils.h"

using namespace gpopt;

// unittest for statistics objects
GPOS_RESULT
CHistogramTest::EresUnittest()
{
	// tests that use shared optimization context
	CUnittest rgutSharedOptCtxt[] = {
		GPOS_UNITTEST_FUNC(CHistogramTest::EresUnittest_CHistogramInt4),
		GPOS_UNITTEST_FUNC(CHistogramTest::EresUnittest_CHistogramBool),
		GPOS_UNITTEST_FUNC(CHistogramTest::EresUnittest_Skew),
		GPOS_UNITTEST_FUNC(CHistogramTest::EresUnittest_CHistogramValid),
		GPOS_UNITTEST_FUNC(CHistogramTest::EresUnittest_MergeUnion),
		GPOS_UNITTEST_FUNC(
			CHistogramTest::EresUnittest_MergeUnionDoubleLessThanEpsilon)};


	CAutoMemoryPool amp;
	CMemoryPool *mp = amp.Pmp();

	// setup a file-based provider
	CMDProviderMemory *pmdp = CTestUtils::m_pmdpf;
	pmdp->AddRef();
	CMDAccessor mda(mp, CMDCache::Pcache(), CTestUtils::m_sysidDefault, pmdp);

	// install opt context in TLS
	CAutoOptCtxt aoc(mp, &mda, nullptr /* pceeval */,
					 CTestUtils::GetCostModel(mp));

	return CUnittest::EresExecute(rgutSharedOptCtxt,
								  GPOS_ARRAY_SIZE(rgutSharedOptCtxt));
}

// histogram of int4
GPOS_RESULT
CHistogramTest::EresUnittest_CHistogramInt4()
{
	// create memory pool
	CAutoMemoryPool amp;
	CMemoryPool *mp = amp.Pmp();

	// original histogram
	CHistogram *histogram = CCardinalityTestUtils::PhistExampleInt4(mp);
	CCardinalityTestUtils::PrintHist(mp, "histogram", histogram);

	// test edge case of MakeBucketGreaterThan
	CPoint *ppoint0 = CTestUtils::PpointInt4(mp, 9);
	CHistogram *phist0 =
		histogram->MakeHistogramFilter(CStatsPred::EstatscmptG, ppoint0);
	CCardinalityTestUtils::PrintHist(mp, "phist0", phist0);
	GPOS_RTL_ASSERT(phist0->GetNumBuckets() == 9);

	CPoint *point1 = CTestUtils::PpointInt4(mp, 35);
	CHistogram *histogram1 =
		histogram->MakeHistogramFilter(CStatsPred::EstatscmptL, point1);
	CCardinalityTestUtils::PrintHist(mp, "histogram1", histogram1);
	GPOS_RTL_ASSERT(histogram1->GetNumBuckets() == 4);

	// edge case where point is equal to upper bound
	CPoint *point2 = CTestUtils::PpointInt4(mp, 50);
	CHistogram *histogram2 =
		histogram->MakeHistogramFilter(CStatsPred::EstatscmptL, point2);
	CCardinalityTestUtils::PrintHist(mp, "histogram2", histogram2);
	GPOS_RTL_ASSERT(histogram2->GetNumBuckets() == 5);

	// equality check
	CPoint *point3 = CTestUtils::PpointInt4(mp, 100);
	CHistogram *phist3 =
		histogram->MakeHistogramFilter(CStatsPred::EstatscmptEq, point3);
	CCardinalityTestUtils::PrintHist(mp, "phist3", phist3);
	GPOS_RTL_ASSERT(phist3->GetNumBuckets() == 1);

	// normalized output after filter
	CPoint *ppoint4 = CTestUtils::PpointInt4(mp, 100);
	CDouble scale_factor(0.0);
	CHistogram *phist4 = histogram->MakeHistogramFilterNormalize(
		CStatsPred::EstatscmptEq, ppoint4, &scale_factor);
	CCardinalityTestUtils::PrintHist(mp, "phist4", phist4);
	GPOS_RTL_ASSERT(phist4->IsValid());

	// lasj
	CHistogram *phist5 =
		histogram->MakeLASJHistogram(CStatsPred::EstatscmptEq, histogram2);
	CCardinalityTestUtils::PrintHist(mp, "phist5", phist5);
	GPOS_RTL_ASSERT(phist5->GetNumBuckets() == 5);

	// inequality check
	CHistogram *phist6 =
		histogram->MakeHistogramFilter(CStatsPred::EstatscmptNEq, point2);
	CCardinalityTestUtils::PrintHist(mp, "phist6", phist6);
	GPOS_RTL_ASSERT(phist6->GetNumBuckets() == 10);

	// histogram with null fraction and remaining tuples
	CHistogram *phist7 = PhistExampleInt4Remain(mp);
	CCardinalityTestUtils::PrintHist(mp, "phist7", phist7);
	CPoint *ppoint5 = CTestUtils::PpointInt4(mp, 20);

	// equality check, hitting remaining tuples
	CHistogram *phist8 =
		phist7->MakeHistogramFilter(CStatsPred::EstatscmptEq, point3);
	GPOS_RTL_ASSERT(fabs((phist8->GetFrequency() - 0.2).Get()) <
					CStatistics::Epsilon);
	GPOS_RTL_ASSERT(fabs((phist8->GetNumDistinct() - 1.0).Get()) <
					CStatistics::Epsilon);

	// greater than, hitting remaining tuples
	CHistogram *phist9 =
		phist7->MakeHistogramFilter(CStatsPred::EstatscmptG, point1);
	CCardinalityTestUtils::PrintHist(mp, "phist9", phist9);
	GPOS_RTL_ASSERT(fabs((phist9->GetFrequency() - 0.26).Get()) <
					CStatistics::Epsilon);
	GPOS_RTL_ASSERT(fabs((phist9->GetNumDistinct() - 1.8).Get()) <
					CStatistics::Epsilon);

	// equality join, hitting remaining tuples
	CHistogram *phist10 =
		phist7->MakeJoinHistogram(CStatsPred::EstatscmptEq, phist7);
	GPOS_RTL_ASSERT(phist10->GetNumBuckets() == 5);
	GPOS_RTL_ASSERT(fabs((phist10->GetDistinctRemain() - 2.0).Get()) <
					CStatistics::Epsilon);
	GPOS_RTL_ASSERT(fabs((phist10->GetFreqRemain() - 0.08).Get()) <
					CStatistics::Epsilon);

	// clean up
	ppoint0->Release();
	point1->Release();
	point2->Release();
	point3->Release();
	ppoint4->Release();
	ppoint5->Release();
	GPOS_DELETE(histogram);
	GPOS_DELETE(phist0);
	GPOS_DELETE(histogram1);
	GPOS_DELETE(histogram2);
	GPOS_DELETE(phist3);
	GPOS_DELETE(phist4);
	GPOS_DELETE(phist5);
	GPOS_DELETE(phist6);
	GPOS_DELETE(phist7);
	GPOS_DELETE(phist8);
	GPOS_DELETE(phist9);
	GPOS_DELETE(phist10);

	return GPOS_OK;
}

// histogram on bool
GPOS_RESULT
CHistogramTest::EresUnittest_CHistogramBool()
{
	// create memory pool
	CAutoMemoryPool amp;
	CMemoryPool *mp = amp.Pmp();

	// generate histogram of the form [false, false), [true,true)
	CBucketArray *histogram_buckets = GPOS_NEW(mp) CBucketArray(mp);
	CBucket *pbucketFalse =
		CCardinalityTestUtils::PbucketSingletonBoolVal(mp, false, 0.1);
	CBucket *pbucketTrue =
		CCardinalityTestUtils::PbucketSingletonBoolVal(mp, false, 0.9);
	histogram_buckets->Append(pbucketFalse);
	histogram_buckets->Append(pbucketTrue);
	CHistogram *histogram = GPOS_NEW(mp) CHistogram(mp, histogram_buckets);

	// equality check
	CPoint *point1 = CTestUtils::PpointBool(mp, false);
	CDouble scale_factor(0.0);
	CHistogram *histogram1 = histogram->MakeHistogramFilterNormalize(
		CStatsPred::EstatscmptEq, point1, &scale_factor);
	CCardinalityTestUtils::PrintHist(mp, "histogram1", histogram1);
	GPOS_RTL_ASSERT(histogram1->GetNumBuckets() == 1);

	// clean up
	point1->Release();
	GPOS_DELETE(histogram);
	GPOS_DELETE(histogram1);

	return GPOS_OK;
}


// check for well-formed histogram. Expected to fail
GPOS_RESULT
CHistogramTest::EresUnittest_CHistogramValid()
{
	// create memory pool
	CAutoMemoryPool amp;
	CMemoryPool *mp = amp.Pmp();

	CBucketArray *histogram_buckets = GPOS_NEW(mp) CBucketArray(mp);

	// generate histogram of the form [0, 10), [9, 20)
	CBucket *bucket1 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 0, 10, 0.1, 2.0);
	histogram_buckets->Append(bucket1);
	CBucket *bucket2 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 9, 20, 0.1, 2.0);
	histogram_buckets->Append(bucket2);

	// original histogram
	CHistogram *histogram = GPOS_NEW(mp) CHistogram(mp, histogram_buckets);

	// create an auto object
	CAutoP<CHistogram> ahist;
	ahist = histogram;

	{
		CAutoTrace at(mp);
		at.Os() << std::endl << "Invalid Histogram" << std::endl;
		histogram->OsPrint(at.Os());
	}

	if (histogram->IsValid())
	{
		return GPOS_FAILED;
	}

	return GPOS_OK;
}

// generates example int histogram having tuples not covered by buckets,
// including null fraction and nDistinctRemain
CHistogram *
CHistogramTest::PhistExampleInt4Remain(CMemoryPool *mp)
{
	// generate histogram of the form [0, 0], [10, 10], [20, 20] ...
	CBucketArray *histogram_buckets = GPOS_NEW(mp) CBucketArray(mp);
	for (ULONG idx = 0; idx < 5; idx++)
	{
		INT iLower = INT(idx * 10);
		INT iUpper = iLower;
		CDouble frequency(0.1);
		CDouble distinct(1.0);
		CBucket *bucket = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
			mp, iLower, iUpper, frequency, distinct);
		histogram_buckets->Append(bucket);
	}

	return GPOS_NEW(mp)
		CHistogram(mp, histogram_buckets, true, 0.1 /*null_freq*/,
				   2.0 /*distinct_remaining*/, 0.4 /*freq_remaining*/);
}

// basis skew test
GPOS_RESULT
CHistogramTest::EresUnittest_Skew()
{
	// create memory pool
	CAutoMemoryPool amp;
	CMemoryPool *mp = amp.Pmp();

	CBucket *bucket1 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 1, 100, CDouble(0.6), CDouble(100.0));
	CBucket *bucket2 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 101, 200, CDouble(0.2), CDouble(100.0));
	CBucket *pbucket3 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 201, 300, CDouble(0.2), CDouble(100.0));
	CBucket *pbucket4 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 301, 400, CDouble(0.2), CDouble(100.0));
	CBucket *pbucket5 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 401, 500, CDouble(0.2), CDouble(100.0));
	CBucket *pbucket6 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 501, 600, CDouble(0.2), CDouble(100.0));
	CBucket *pbucket7 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 601, 700, CDouble(0.2), CDouble(100.0));
	CBucket *pbucket8 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 701, 800, CDouble(0.2), CDouble(100.0));

	CBucketArray *pdrgppbucket1 = GPOS_NEW(mp) CBucketArray(mp);
	pdrgppbucket1->Append(bucket1);
	pdrgppbucket1->Append(bucket2);
	pdrgppbucket1->Append(pbucket3);
	CHistogram *histogram1 = GPOS_NEW(mp) CHistogram(mp, pdrgppbucket1);

	CBucketArray *pdrgppbucket2 = GPOS_NEW(mp) CBucketArray(mp);
	pdrgppbucket2->Append(pbucket4);
	pdrgppbucket2->Append(pbucket5);
	pdrgppbucket2->Append(pbucket6);
	pdrgppbucket2->Append(pbucket7);
	pdrgppbucket2->Append(pbucket8);
	CHistogram *histogram2 = GPOS_NEW(mp) CHistogram(mp, pdrgppbucket2);
	GPOS_ASSERT(histogram1->GetSkew() > histogram2->GetSkew());

	{
		CAutoTrace at(mp);
		histogram1->OsPrint(at.Os());
		histogram2->OsPrint(at.Os());
	}

	GPOS_DELETE(histogram1);
	GPOS_DELETE(histogram2);

	return GPOS_OK;
}

// basic merge commutativity test
GPOS_RESULT
CHistogramTest::EresUnittest_MergeUnion()
{
	// create memory pool
	CAutoMemoryPool amp;
	CMemoryPool *mp = amp.Pmp();

	// 1000 rows
	CBucket *bucket1 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 1, 100, CDouble(0.6), CDouble(100.0));
	CBucket *bucket2 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 150, 200, CDouble(0.4), CDouble(100.0));

	// 600 rows
	CBucket *pbucket3 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 50, 150, CDouble(0.3), CDouble(100.0));
	CBucket *pbucket4 = CCardinalityTestUtils::PbucketIntegerClosedLowerBound(
		mp, 200, 400, CDouble(0.7), CDouble(100.0));

	CBucketArray *pdrgppbucket1 = GPOS_NEW(mp) CBucketArray(mp);
	pdrgppbucket1->Append(bucket1);
	pdrgppbucket1->Append(bucket2);
	CHistogram *histogram1 = GPOS_NEW(mp) CHistogram(mp, pdrgppbucket1);

	CBucketArray *pdrgppbucket2 = GPOS_NEW(mp) CBucketArray(mp);
	pdrgppbucket2->Append(pbucket3);
	pdrgppbucket2->Append(pbucket4);
	CHistogram *histogram2 = GPOS_NEW(mp) CHistogram(mp, pdrgppbucket2);

	CDouble output_rows1(0.0);
	CHistogram *result1 = histogram1->MakeUnionHistogramNormalize(
		1000, histogram2, 600, &output_rows1);


	CDouble output_rows2(0.0);
	CHistogram *result2 = histogram2->MakeUnionHistogramNormalize(
		600, histogram1, 1000, &output_rows2);

	{
		CAutoTrace at(mp);
		result1->OsPrint(at.Os());
		result2->OsPrint(at.Os());
		at.Os() << "Result 1: " << output_rows1 << std::endl;
		at.Os() << "Result 2: " << output_rows2 << std::endl;
	}

	GPOS_ASSERT(output_rows1 == output_rows2);
	GPOS_DELETE(histogram1);
	GPOS_DELETE(histogram2);
	GPOS_DELETE(result1);
	GPOS_DELETE(result2);

	return GPOS_OK;
}

// merge union test with double values differing by less than epsilon
GPOS_RESULT
CHistogramTest::EresUnittest_MergeUnionDoubleLessThanEpsilon()
{
	// create memory pool
	CAutoMemoryPool amp;
	CMemoryPool *mp = amp.Pmp();

	// [631.82140700000002, 631.82140700000002]
	CPoint *ppLower1 = CCardinalityTestUtils::PpointDouble(
		mp, GPDB_FLOAT8, CDouble(631.82140700000002));
	CPoint *ppUpper1 = CCardinalityTestUtils::PpointDouble(
		mp, GPDB_FLOAT8, CDouble(631.82140700000002));

	CBucket *bucket1 = GPOS_NEW(mp)
		CBucket(ppLower1, ppUpper1, true /* is_lower_closed */,
				true /*is_upper_closed*/, CDouble(0.2), CDouble(50));

	// (631.82140500000003, 645.05197699999997)
	CPoint *ppLower2 = CCardinalityTestUtils::PpointDouble(
		mp, GPDB_FLOAT8, CDouble(631.82140500000003));
	CPoint *ppUpper2 = CCardinalityTestUtils::PpointDouble(
		mp, GPDB_FLOAT8, CDouble(645.05197699999997));
	CBucket *bucket2 = GPOS_NEW(mp)
		CBucket(ppLower2, ppUpper2, false /* is_lower_closed */,
				false /*is_upper_closed*/, CDouble(0.2), CDouble(50));

	CBucketArray *pdrgppbucket1 = GPOS_NEW(mp) CBucketArray(mp);
	pdrgppbucket1->Append(bucket1);
	CHistogram *histogram1 = GPOS_NEW(mp) CHistogram(mp, pdrgppbucket1);

	CBucketArray *pdrgppbucket2 = GPOS_NEW(mp) CBucketArray(mp);
	pdrgppbucket2->Append(bucket2);
	CHistogram *histogram2 = GPOS_NEW(mp) CHistogram(mp, pdrgppbucket2);

	CDouble output_rows1(0.0);
	CHistogram *result1 = histogram1->MakeUnionHistogramNormalize(
		1000, histogram2, 600, &output_rows1);


	{
		CAutoTrace at(mp);
		result1->OsPrint(at.Os());
		at.Os() << "Result 1: " << output_rows1 << std::endl;
	}

	GPOS_DELETE(histogram1);
	GPOS_DELETE(histogram2);
	GPOS_DELETE(result1);

	return GPOS_OK;
}
// EOF

相关信息

greenplumn 源码目录

相关文章

greenplumn CBucketTest 源码

greenplumn CCardinalityTestUtils 源码

greenplumn CFilterCardinalityTest 源码

greenplumn CJoinCardinalityTest 源码

greenplumn CMCVTest 源码

greenplumn CPointTest 源码

greenplumn CStatisticsTest 源码

0  赞