greenplumn CDXLUtils 源码

  • 2022-08-18
  • 浏览 (181)

greenplumn CDXLUtils 代码

文件路径:/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp

//---------------------------------------------------------------------------
//	Greenplum Database
//	Copyright (C) 2010 Greenplum, Inc.
//
//	@filename:
//		CDXLUtils.cpp
//
//	@doc:
//		Implementation of the utility methods for parsing and searializing DXL.
//---------------------------------------------------------------------------

#include "naucrates/dxl/CDXLUtils.h"

#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <xercesc/util/Base64.hpp>
#include <xercesc/util/XMLString.hpp>

#include "gpos/base.h"
#include "gpos/common/CAutoRef.h"
#include "gpos/common/CAutoRg.h"
#include "gpos/common/CAutoTimer.h"
#include "gpos/io/CFileReader.h"
#include "gpos/io/COstreamString.h"
#include "gpos/io/ioutils.h"
#include "gpos/task/CAutoTraceFlag.h"
#include "gpos/task/CTraceFlagIter.h"
#include "gpos/task/CWorker.h"

#include "gpopt/base/COptCtxt.h"
#include "gpopt/engine/CEnumeratorConfig.h"
#include "gpopt/engine/CStatisticsConfig.h"
#include "gpopt/mdcache/CMDAccessor.h"
#include "gpopt/optimizer/COptimizerConfig.h"
#include "naucrates/base/CQueryToDXLResult.h"
#include "naucrates/dxl/parser/CParseHandlerDXL.h"
#include "naucrates/dxl/parser/CParseHandlerDummy.h"
#include "naucrates/dxl/parser/CParseHandlerFactory.h"
#include "naucrates/dxl/parser/CParseHandlerManager.h"
#include "naucrates/dxl/parser/CParseHandlerPlan.h"
#include "naucrates/dxl/xml/CDXLMemoryManager.h"
#include "naucrates/dxl/xml/CXMLSerializer.h"
#include "naucrates/md/CDXLStatsDerivedRelation.h"
#include "naucrates/md/CMDRequest.h"
#include "naucrates/traceflags/traceflags.h"

using namespace gpdxl;
using namespace gpmd;
using namespace gpos;
using namespace gpopt;

XERCES_CPP_NAMESPACE_USE



//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::GetParseHandlerForDXLString
//
//	@doc:
//		Start the parsing of the given DXL string and return the top-level parser.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CParseHandlerDXL *
CDXLUtils::GetParseHandlerForDXLString(CMemoryPool *mp, const CHAR *dxl_string,
									   const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// setup own memory manager
	CDXLMemoryManager *memory_manager = GPOS_NEW(mp) CDXLMemoryManager(mp);
	SAX2XMLReader *sax_2_xml_reader =
		XMLReaderFactory::createXMLReader(memory_manager);

	XMLCh *xsd_path = nullptr;

	if (nullptr != xsd_file_path)
	{
		// setup XSD validation
		sax_2_xml_reader->setFeature(XMLUni::fgSAX2CoreValidation, true);
		sax_2_xml_reader->setFeature(XMLUni::fgXercesDynamic, false);
		sax_2_xml_reader->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
		sax_2_xml_reader->setFeature(XMLUni::fgXercesSchema, true);

		sax_2_xml_reader->setFeature(XMLUni::fgXercesSchemaFullChecking, true);
		sax_2_xml_reader->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);

		xsd_path = XMLString::transcode(xsd_file_path, memory_manager);
		sax_2_xml_reader->setProperty(
			XMLUni::fgXercesSchemaExternalSchemaLocation, (void *) xsd_path);
	}

	CParseHandlerManager *parse_handler_mgr =
		GPOS_NEW(mp) CParseHandlerManager(memory_manager, sax_2_xml_reader);

	CParseHandlerDXL *parse_handler_dxl =
		CParseHandlerFactory::GetParseHandlerDXL(mp, parse_handler_mgr);

	parse_handler_mgr->ActivateParseHandler(parse_handler_dxl);

	MemBufInputSource *input_src_memory_buffer = new (memory_manager)
		MemBufInputSource((const XMLByte *) dxl_string, strlen(dxl_string),
						  "dxl test", false, memory_manager);

	try
	{
		sax_2_xml_reader->parse(*input_src_memory_buffer);
	}
	catch (const XMLException &)
	{
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLXercesParseError);
		return nullptr;
	}
	catch (const SAXParseException &)
	{
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLXercesParseError);
		return nullptr;
	}
	catch (const SAXException &)
	{
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLXercesParseError);
		return nullptr;
	}


	GPOS_CHECK_ABORT;

	// cleanup
	delete sax_2_xml_reader;
	delete input_src_memory_buffer;
	GPOS_DELETE(parse_handler_mgr);
	GPOS_DELETE(memory_manager);
	delete xsd_path;

	GPOS_CHECK_ABORT;

	return parse_handler_dxl;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::GetParseHandlerForDXLFile
//
//	@doc:
//		Start the parsing of the given DXL string and return the top-level parser.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CParseHandlerDXL *
CDXLUtils::GetParseHandlerForDXLFile(CMemoryPool *mp, const CHAR *dxl_filename,
									 const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// setup own memory manager
	CDXLMemoryManager mm(mp);
	SAX2XMLReader *sax_2_xml_reader = nullptr;

	sax_2_xml_reader = XMLReaderFactory::createXMLReader(&mm);

	XMLCh *xsd_path = nullptr;

	if (nullptr != xsd_file_path)
	{
		// setup XSD validation
		sax_2_xml_reader->setFeature(XMLUni::fgSAX2CoreValidation, true);
		sax_2_xml_reader->setFeature(XMLUni::fgXercesDynamic, false);
		sax_2_xml_reader->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
		sax_2_xml_reader->setFeature(XMLUni::fgXercesSchema, true);

		sax_2_xml_reader->setFeature(XMLUni::fgXercesSchemaFullChecking, true);
		sax_2_xml_reader->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);

		xsd_path = XMLString::transcode(xsd_file_path, &mm);
		sax_2_xml_reader->setProperty(
			XMLUni::fgXercesSchemaExternalSchemaLocation, (void *) xsd_path);
	}

	CParseHandlerManager parse_handler_mgr(&mm, sax_2_xml_reader);
	CParseHandlerDXL *parse_handler_dxl =
		CParseHandlerFactory::GetParseHandlerDXL(mp, &parse_handler_mgr);
	parse_handler_mgr.ActivateParseHandler(parse_handler_dxl);
	GPOS_CHECK_ABORT;

	try
	{
		GPOS_CHECK_ABORT;

		sax_2_xml_reader->parse(dxl_filename);
	}
	catch (const XMLException &)
	{
		GPOS_DELETE(parse_handler_dxl);
		delete sax_2_xml_reader;
		delete[] xsd_path;
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLXercesParseError);

		return nullptr;
	}
	catch (const SAXParseException &ex)
	{
		GPOS_DELETE(parse_handler_dxl);
		delete sax_2_xml_reader;
		delete[] xsd_path;

		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLXercesParseError);

		return nullptr;
	}
	catch (const SAXException &)
	{
		GPOS_DELETE(parse_handler_dxl);
		delete sax_2_xml_reader;
		delete[] xsd_path;
		GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLXercesParseError);

		return nullptr;
	}

	GPOS_CHECK_ABORT;

	// cleanup
	delete sax_2_xml_reader;

	delete[] xsd_path;

	return parse_handler_dxl;
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::GetParseHandlerForDXLString
//
//	@doc:
//		Start the parsing of the given DXL string and return the top-level parser.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CParseHandlerDXL *
CDXLUtils::GetParseHandlerForDXLString(CMemoryPool *mp,
									   const CWStringBase *dxl_string,
									   const CHAR *xsd_file_path)
{
	CAutoRg<CHAR> multi_byte_char_string;
	multi_byte_char_string =
		CreateMultiByteCharStringFromWCString(mp, dxl_string->GetBuffer());
	CParseHandlerDXL *parse_handler_dxl = GetParseHandlerForDXLString(
		mp, multi_byte_char_string.Rgt(), xsd_file_path);
	return parse_handler_dxl;
}



//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::GetPlanDXLNode
//
//	@doc:
//		Parse DXL string into a DXL plan tree.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CDXLNode *
CDXLUtils::GetPlanDXLNode(CMemoryPool *mp, const CHAR *dxl_string,
						  const CHAR *xsd_file_path, ULLONG *plan_id,
						  ULLONG *plan_space_size)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != plan_id);
	GPOS_ASSERT(nullptr != plan_space_size);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	GPOS_ASSERT(nullptr != parse_handler_dxl_wrapper.Value());

	// collect plan info from dxl parse handler
	CDXLNode *root_dxl_node = parse_handler_dxl_wrapper->PdxlnPlan();
	*plan_id = parse_handler_dxl_wrapper->GetPlanId();
	*plan_space_size = parse_handler_dxl_wrapper->GetPlanSpaceSize();

	GPOS_ASSERT(nullptr != root_dxl_node);

#ifdef GPOS_DEBUG
	root_dxl_node->GetOperator()->AssertValid(root_dxl_node,
											  true /* validate_children */);
#endif

	root_dxl_node->AddRef();

	return root_dxl_node;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseQueryToQueryDXLTree
//
//	@doc:
//		Parse DXL string representing the query into
//		1. a DXL tree representing the query
//		2. a DXL tree representing the query output
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CQueryToDXLResult *
CDXLUtils::ParseQueryToQueryDXLTree(CMemoryPool *mp, const CHAR *dxl_string,
									const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect dxl tree of the query from dxl parse handler
	CDXLNode *root_dxl_node = parse_handler_dxl->GetQueryDXLRoot();
	GPOS_ASSERT(nullptr != root_dxl_node);

#ifdef GPOS_DEBUG
	root_dxl_node->GetOperator()->AssertValid(root_dxl_node,
											  true /* validate_children */);
#endif

	root_dxl_node->AddRef();

	// collect the list of query output columns from the dxl parse handler
	GPOS_ASSERT(nullptr != parse_handler_dxl->GetOutputColumnsDXLArray());
	CDXLNodeArray *query_output_cols_dxlnode_array =
		parse_handler_dxl->GetOutputColumnsDXLArray();
	query_output_cols_dxlnode_array->AddRef();

	// collect the list of CTEs
	CDXLNodeArray *cte_producers = parse_handler_dxl->GetCTEProducerDXLArray();
	GPOS_ASSERT(nullptr != cte_producers);
	cte_producers->AddRef();

	CQueryToDXLResult *ptrOutput = GPOS_NEW(mp) CQueryToDXLResult(
		root_dxl_node, query_output_cols_dxlnode_array, cte_producers);

	return ptrOutput;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToScalarExprDXLNode
//
//	@doc:
//		Parse a scalar expression as a top level node in a "ScalarExpr" tag.
//---------------------------------------------------------------------------
CDXLNode *
CDXLUtils::ParseDXLToScalarExprDXLNode(CMemoryPool *mp, const CHAR *dxl_string,
									   const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path));

	// collect dxl tree of the query from dxl parse handler
	CDXLNode *root_dxl_node = parse_handler_dxl_wrapper->GetScalarExprDXLRoot();
	GPOS_ASSERT(nullptr != root_dxl_node);
	root_dxl_node->AddRef();

	return root_dxl_node;
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToIMDObjectArray
//
//	@doc:
//		Parse a list of metadata objects from the given DXL string.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
IMDCacheObjectArray *
CDXLUtils::ParseDXLToIMDObjectArray(CMemoryPool *mp, const CHAR *dxl_string,
									const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect metadata objects from dxl parse handler
	IMDCacheObjectArray *imd_obj_array =
		parse_handler_dxl->GetMdIdCachedObjArray();
	imd_obj_array->AddRef();

	return imd_obj_array;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToMDId
//
//	@doc:
//		Parse an mdid from a DXL metadata document
//
//---------------------------------------------------------------------------
IMDId *
CDXLUtils::ParseDXLToMDId(CMemoryPool *mp, const CWStringBase *dxl_string,
						  const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect metadata objects from dxl parse handler
	IMdIdArray *mdid_array = parse_handler_dxl->GetMdIdArray();

	GPOS_ASSERT(1 == mdid_array->Size());

	IMDId *mdid = (*mdid_array)[0];
	mdid->AddRef();

	return mdid;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToMDRequest
//
//	@doc:
//		Parse a metadata request.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CMDRequest *
CDXLUtils::ParseDXLToMDRequest(CMemoryPool *mp, const CHAR *dxl_string,
							   const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect metadata ids from dxl parse handler
	CMDRequest *md_request = parse_handler_dxl->GetMiniDumper();
	GPOS_ASSERT(nullptr != md_request);
	md_request->AddRef();

	return md_request;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToMDRequest
//
//	@doc:
//		Parse an MD request from the given DXL string.
//		Same as above but with a wide-character input
//
//---------------------------------------------------------------------------
CMDRequest *
CDXLUtils::ParseDXLToMDRequest(CMemoryPool *mp, const WCHAR *dxl_string,
							   const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	CAutoRg<CHAR> multi_byte_char_string_wrapper(
		CDXLUtils::CreateMultiByteCharStringFromWCString(mp, dxl_string));

	// create and install a parse handler for the DXL document
	CMDRequest *md_request = ParseDXLToMDRequest(
		mp, multi_byte_char_string_wrapper.Rgt(), xsd_file_path);

	return md_request;
}

// parse optimizer config DXL
COptimizerConfig *
CDXLUtils::ParseDXLToOptimizerConfig(CMemoryPool *mp, const CHAR *dxl_string,
									 const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	// though we could access the traceflags member of the CParseHandlerDXL
	// here we don't access them or store them anywhere,
	// so, if using this function, note that any traceflags present
	// in the DXL being parsed will be discarded
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect optimizer conf from dxl parse handler
	COptimizerConfig *optimizer_config =
		parse_handler_dxl->GetOptimizerConfig();
	GPOS_ASSERT(nullptr != optimizer_config);
	optimizer_config->AddRef();

	return optimizer_config;
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToStatsDerivedRelArray
//
//	@doc:
//		Parse a list of statistics objects from the given DXL string.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CDXLStatsDerivedRelationArray *
CDXLUtils::ParseDXLToStatsDerivedRelArray(CMemoryPool *mp,
										  const CWStringBase *dxl_string,
										  const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect statistics objects from dxl parse handler
	CDXLStatsDerivedRelationArray *dxl_derived_rel_stats_array =
		parse_handler_dxl->GetStatsDerivedRelDXLArray();
	dxl_derived_rel_stats_array->AddRef();

	return dxl_derived_rel_stats_array;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToStatsDerivedRelArray
//
//	@doc:
//		Parse a list of statistics objects from the given DXL string.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
CDXLStatsDerivedRelationArray *
CDXLUtils::ParseDXLToStatsDerivedRelArray(CMemoryPool *mp,
										  const CHAR *dxl_string,
										  const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect statistics objects from dxl parse handler
	CDXLStatsDerivedRelationArray *dxl_derived_rel_stats_array =
		parse_handler_dxl->GetStatsDerivedRelDXLArray();
	dxl_derived_rel_stats_array->AddRef();

	return dxl_derived_rel_stats_array;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToOptimizerStatisticObjArray
//
//	@doc:
//		Translate the array of dxl statistics objects to an array of
//		optimizer statistics object.
//---------------------------------------------------------------------------
CStatisticsArray *
CDXLUtils::ParseDXLToOptimizerStatisticObjArray(
	CMemoryPool *mp, CMDAccessor *md_accessor,
	CDXLStatsDerivedRelationArray *dxl_derived_rel_stats_array)
{
	GPOS_ASSERT(nullptr != dxl_derived_rel_stats_array);

	CStatisticsArray *statistics_array = GPOS_NEW(mp) CStatisticsArray(mp);
	const ULONG ulRelStat = dxl_derived_rel_stats_array->Size();
	for (ULONG ulIdxRelStat = 0; ulIdxRelStat < ulRelStat; ulIdxRelStat++)
	{
		// create hash map from colid -> histogram
		UlongToHistogramMap *column_id_histogram_map =
			GPOS_NEW(mp) UlongToHistogramMap(mp);

		// width hash map
		UlongToDoubleMap *column_id_width_map =
			GPOS_NEW(mp) UlongToDoubleMap(mp);

		CDXLStatsDerivedRelation *stats_derived_relation_dxl =
			(*dxl_derived_rel_stats_array)[ulIdxRelStat];
		const CDXLStatsDerivedColumnArray *derived_column_stats_array =
			stats_derived_relation_dxl->GetDXLStatsDerivedColArray();

		const ULONG num_of_columns = derived_column_stats_array->Size();
		for (ULONG column_id_idx = 0; column_id_idx < num_of_columns;
			 column_id_idx++)
		{
			CDXLStatsDerivedColumn *dxl_derived_col_stats =
				(*derived_column_stats_array)[column_id_idx];

			ULONG column_id = dxl_derived_col_stats->GetColId();
			CDouble width = dxl_derived_col_stats->Width();
			CDouble null_freq = dxl_derived_col_stats->GetNullFreq();
			CDouble distinct_remaining =
				dxl_derived_col_stats->GetDistinctRemain();
			CDouble freq_remaining = dxl_derived_col_stats->GetFreqRemain();

			CBucketArray *stats_buckets_array =
				CDXLUtils::ParseDXLToBucketsArray(mp, md_accessor,
												  dxl_derived_col_stats);
			CHistogram *histogram = GPOS_NEW(mp)
				CHistogram(mp, stats_buckets_array, true /*is_well_defined*/,
						   null_freq, distinct_remaining, freq_remaining);

			column_id_histogram_map->Insert(GPOS_NEW(mp) ULONG(column_id),
											histogram);
			column_id_width_map->Insert(GPOS_NEW(mp) ULONG(column_id),
										GPOS_NEW(mp) CDouble(width));
		}

		CDouble rows = stats_derived_relation_dxl->Rows();
		CStatistics *stats = GPOS_NEW(mp)
			CStatistics(mp, column_id_histogram_map, column_id_width_map, rows,
						false /* is_empty */
			);
		//stats->AddCardUpperBound(mp, ulIdxRelStat, rows);

		statistics_array->Append(stats);
	}

	return statistics_array;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToBucketsArray
//
//	@doc:
//		Extract the array of optimizer buckets from the dxl representation of
//		dxl buckets in the dxl derived column statistics object.
//---------------------------------------------------------------------------
CBucketArray *
CDXLUtils::ParseDXLToBucketsArray(CMemoryPool *mp, CMDAccessor *md_accessor,
								  CDXLStatsDerivedColumn *dxl_derived_col_stats)
{
	CBucketArray *stats_buckets_array = GPOS_NEW(mp) CBucketArray(mp);

	const CDXLBucketArray *dxl_bucket_array =
		dxl_derived_col_stats->TransformHistogramToDXLBucketArray();
	const ULONG num_of_buckets = dxl_bucket_array->Size();
	for (ULONG ul = 0; ul < num_of_buckets; ul++)
	{
		CDXLBucket *dxl_bucket = (*dxl_bucket_array)[ul];

		// translate the lower and upper bounds of the bucket
		IDatum *datum_lower_bound =
			GetDatum(mp, md_accessor, dxl_bucket->GetDXLDatumLower());
		CPoint *point_lower_bound = GPOS_NEW(mp) CPoint(datum_lower_bound);

		IDatum *datum_upper_bound =
			GetDatum(mp, md_accessor, dxl_bucket->GetDXLDatumUpper());
		CPoint *point_upper_bound = GPOS_NEW(mp) CPoint(datum_upper_bound);

		CBucket *bucket = GPOS_NEW(mp)
			CBucket(point_lower_bound, point_upper_bound,
					dxl_bucket->IsLowerClosed(), dxl_bucket->IsUpperClosed(),
					dxl_bucket->GetFrequency(), dxl_bucket->GetNumDistinct());

		stats_buckets_array->Append(bucket);
	}

	return stats_buckets_array;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::GetDatum
//
//	@doc:
//		Translate the optimizer datum from dxl datum object
//
//---------------------------------------------------------------------------
IDatum *
CDXLUtils::GetDatum(CMemoryPool *mp, CMDAccessor *md_accessor,
					const CDXLDatum *dxl_datum)
{
	IMDId *mdid = dxl_datum->MDId();
	return md_accessor->RetrieveType(mdid)->GetDatumForDXLDatum(mp, dxl_datum);
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToIMDObjectArray
//
//	@doc:
//		Parse a list of metadata objects from the given DXL string.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
IMDCacheObjectArray *
CDXLUtils::ParseDXLToIMDObjectArray(CMemoryPool *mp,
									const CWStringBase *dxl_string,
									const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CParseHandlerDXL *parse_handler_dxl =
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path);
	CAutoP<CParseHandlerDXL> parse_handler_dxl_wrapper(parse_handler_dxl);

	// collect metadata objects from dxl parse handler
	IMDCacheObjectArray *imd_obj_array =
		parse_handler_dxl->GetMdIdCachedObjArray();
	imd_obj_array->AddRef();

	return imd_obj_array;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::ParseDXLToIMDIdCacheObj
//
//	@doc:
//		Parse a single metadata object given its DXL representation.
// 		Returns NULL if the DXL represents no metadata objects, or the first parsed
//		object if it does.
//		If a non-empty XSD schema location is provided, the DXL is validated against
//		that schema, and an exception is thrown if the DXL does not conform.
//
//---------------------------------------------------------------------------
IMDCacheObject *
CDXLUtils::ParseDXLToIMDIdCacheObj(CMemoryPool *mp,
								   const CWStringBase *dxl_string,
								   const CHAR *xsd_file_path)
{
	GPOS_ASSERT(nullptr != mp);

	// create and install a parse handler for the DXL document
	CAutoP<CParseHandlerDXL> parse_handler_dxl_array(
		GetParseHandlerForDXLString(mp, dxl_string, xsd_file_path));

	// collect metadata objects from dxl parse handler
	IMDCacheObjectArray *imd_obj_array =
		parse_handler_dxl_array->GetMdIdCachedObjArray();

	if (0 == imd_obj_array->Size())
	{
		// no metadata objects found
		return nullptr;
	}

	IMDCacheObject *imd_cached_obj = (*imd_obj_array)[0];
	imd_cached_obj->AddRef();

	return imd_cached_obj;
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeQuery
//
//	@doc:
//		Serialize a DXL Query tree into a DXL document
//
//---------------------------------------------------------------------------
void
CDXLUtils::SerializeQuery(CMemoryPool *mp, IOstream &os,
						  const CDXLNode *dxl_query_node,
						  const CDXLNodeArray *query_output_dxlnode_array,
						  const CDXLNodeArray *cte_producers,
						  BOOL serialize_header_footer, BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != dxl_query_node &&
				nullptr != query_output_dxlnode_array);

	CAutoTimer at("\n[OPT]: DXL Query Serialization Time",
				  GPOS_FTRACE(EopttracePrintOptimizationStatistics));

	CXMLSerializer xml_serializer(mp, os, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
	}

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenQuery));

	// serialize the query output columns
	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenQueryOutput));
	for (ULONG ul = 0; ul < query_output_dxlnode_array->Size(); ++ul)
	{
		CDXLNode *scalar_ident = (*query_output_dxlnode_array)[ul];
		scalar_ident->SerializeToDXL(&xml_serializer);
	}
	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenQueryOutput));

	// serialize the CTE list
	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenCTEList));
	const ULONG ulCTEs = cte_producers->Size();
	for (ULONG ul = 0; ul < ulCTEs; ++ul)
	{
		CDXLNode *cte = (*cte_producers)[ul];
		cte->SerializeToDXL(&xml_serializer);
	}
	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenCTEList));


	dxl_query_node->SerializeToDXL(&xml_serializer);

	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenQuery));

	if (serialize_header_footer)
	{
		SerializeFooter(&xml_serializer);
	}
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeULLONG
//
//	@doc:
//		Serialize a ULLONG value
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeULLONG(CMemoryPool *mp, ULLONG value)
{
	GPOS_ASSERT(nullptr != mp);

	CAutoP<CWStringDynamic> string_var(GPOS_NEW(mp) CWStringDynamic(mp));

	// create a string stream to hold the result of serialization
	COstreamString oss(string_var.Value());
	oss << value;

	return string_var.Reset();
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializePlan
//
//	@doc:
//		Serialize a DXL tree into a DXL document
//
//---------------------------------------------------------------------------
void
CDXLUtils::SerializePlan(CMemoryPool *mp, IOstream &os, const CDXLNode *node,
						 ULLONG plan_id, ULLONG plan_space_size,
						 BOOL serialize_header_footer, BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != node);

	CAutoTimer at("\n[OPT]: DXL Plan Serialization Time",
				  GPOS_FTRACE(EopttracePrintOptimizationStatistics));

	CXMLSerializer xml_serializer(mp, os, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
	}

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenPlan));

	// serialize plan id and space size attributes

	xml_serializer.AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenPlanId),
								plan_id);
	xml_serializer.AddAttribute(
		CDXLTokens::GetDXLTokenStr(EdxltokenPlanSpaceSize), plan_space_size);

	node->SerializeToDXL(&xml_serializer);

	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenPlan));

	if (serialize_header_footer)
	{
		SerializeFooter(&xml_serializer);
	}
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeMetadata
//
//	@doc:
//		Serialize a list of MD objects into a DXL document and write to
//		to the provided output stream
//
//---------------------------------------------------------------------------
void
CDXLUtils::SerializeMetadata(CMemoryPool *mp,
							 const IMDCacheObjectArray *imd_obj_array,
							 IOstream &os, BOOL serialize_header_footer,
							 BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != imd_obj_array);

	CXMLSerializer xml_serializer(mp, os, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
	}

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenMetadata));


	for (ULONG ul = 0; ul < imd_obj_array->Size(); ul++)
	{
		IMDCacheObject *imd_cache_obj = (*imd_obj_array)[ul];
		imd_cache_obj->Serialize(&xml_serializer);
	}

	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenMetadata));

	if (serialize_header_footer)
	{
		SerializeFooter(&xml_serializer);
	}

	return;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeMetadata
//
//	@doc:
//		Serialize a list of MD objects into a DXL document and write to
//		to the provided output stream
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeMetadata(CMemoryPool *mp, const IMDId *mdid,
							 BOOL serialize_header_footer, BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(mdid->IsValid());

	CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);

	// create a string stream to hold the result of serialization
	COstreamString oss(dxl_string);

	CXMLSerializer xml_serializer(mp, oss, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
	}

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenMetadata));

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenMdid));
	mdid->Serialize(&xml_serializer,
					CDXLTokens::GetDXLTokenStr(EdxltokenValue));
	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenMdid));

	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenMetadata));

	if (serialize_header_footer)
	{
		SerializeFooter(&xml_serializer);
	}

	return dxl_string;
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeSamplePlans
//
//	@doc:
//		Serialize a list of sample plans in the given enumerator config
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeSamplePlans(CMemoryPool *mp,
								CEnumeratorConfig *enumerator_cfg,
								BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);

	CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);

	// create a string stream to hold the result of serialization
	COstreamString oss(dxl_string);

	CXMLSerializer xml_serializer(mp, oss, indentation);
	SerializeHeader(mp, &xml_serializer);

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenSamplePlans));

	const ULONG size = enumerator_cfg->UlCreatedSamples();
	for (ULONG ul = 0; ul < size; ul++)
	{
		xml_serializer.OpenElement(
			CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
			CDXLTokens::GetDXLTokenStr(EdxltokenSamplePlan));
		// we add 1 to plan id since id's are zero-based internally, and we reserve 0 for best plan
		xml_serializer.AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenPlanId),
									enumerator_cfg->UllPlanSample(ul) + 1);
		xml_serializer.AddAttribute(
			CDXLTokens::GetDXLTokenStr(EdxltokenRelativeCost),
			enumerator_cfg->CostPlanSample(ul) / enumerator_cfg->CostBest());
		xml_serializer.CloseElement(
			CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
			CDXLTokens::GetDXLTokenStr(EdxltokenSamplePlan));
	}

	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenSamplePlans));

	SerializeFooter(&xml_serializer);

	return dxl_string;
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeCostDistr
//
//	@doc:
//		Serialize cost distribution
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeCostDistr(CMemoryPool *mp,
							  CEnumeratorConfig *enumerator_cfg,
							  BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);

	CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);

	// create a string stream to hold the result of serialization
	COstreamString oss(dxl_string);

	CXMLSerializer xml_serializer(mp, oss, indentation);
	SerializeHeader(mp, &xml_serializer);

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenCostDistr));

	const ULONG size = enumerator_cfg->UlCostDistrSize();
	for (ULLONG ul = 0; ul < size; ul++)
	{
		xml_serializer.OpenElement(
			CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
			CDXLTokens::GetDXLTokenStr(EdxltokenValue));
		xml_serializer.AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenX),
									enumerator_cfg->DCostDistrX(ul));
		xml_serializer.AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenY),
									enumerator_cfg->DCostDistrY(ul));
		xml_serializer.CloseElement(
			CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
			CDXLTokens::GetDXLTokenStr(EdxltokenValue));
	}

	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenCostDistr));

	SerializeFooter(&xml_serializer);

	return dxl_string;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeMDRequest
//
//	@doc:
//		Serialize a list of mdids into a DXL MD Request document and write to
//		to the provided output stream
//
//---------------------------------------------------------------------------
void
CDXLUtils::SerializeMDRequest(CMemoryPool *mp, CMDRequest *md_request,
							  IOstream &os, BOOL serialize_header_footer,
							  BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != md_request);

	CXMLSerializer xml_serializer(mp, os, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
	}

	md_request->Serialize(&xml_serializer);

	if (serialize_header_footer)
	{
		SerializeFooter(&xml_serializer);
	}

	return;
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeStatistics
//
//	@doc:
//		Serialize a list of statistics objects into a DXL document
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeStatistics(CMemoryPool *mp, CMDAccessor *md_accessor,
							   const CStatisticsArray *statistics_array,
							   BOOL serialize_header_footer, BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != statistics_array);
	CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);

	// create a string stream to hold the result of serialization
	COstreamString oss(dxl_string);

	CDXLUtils::SerializeStatistics(mp, md_accessor, statistics_array, oss,
								   serialize_header_footer, indentation);

	return dxl_string;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeStatistics
//
//	@doc:
//		Serialize a list of statistics objects into a DXL document and write to
//		to the provided output stream
//
//---------------------------------------------------------------------------
void
CDXLUtils::SerializeStatistics(CMemoryPool *mp, CMDAccessor *md_accessor,
							   const CStatisticsArray *statistics_array,
							   IOstream &os, BOOL serialize_header_footer,
							   BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != statistics_array);

	CXMLSerializer xml_serializer(mp, os, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
	}

	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenStatistics));

	GPOS_ASSERT(nullptr != statistics_array);

	for (ULONG ul = 0; ul < statistics_array->Size(); ul++)
	{
		CStatistics *stats = (*statistics_array)[ul];
		CDXLStatsDerivedRelation *stats_derived_relation_dxl =
			stats->GetDxlStatsDrvdRelation(mp, md_accessor);
		stats_derived_relation_dxl->Serialize(&xml_serializer);
		stats_derived_relation_dxl->Release();
	}

	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenStatistics));

	if (serialize_header_footer)
	{
		SerializeFooter(&xml_serializer);
	}

	return;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeMetadata
//
//	@doc:
//		Serialize a list of MD objects into a DXL document
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeMetadata(CMemoryPool *mp,
							 const IMDCacheObjectArray *imd_obj_array,
							 BOOL serialize_header_footer, BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != imd_obj_array);
	CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);

	// create a string stream to hold the result of serialization
	COstreamString oss(dxl_string);

	CDXLUtils::SerializeMetadata(mp, imd_obj_array, oss,
								 serialize_header_footer, indentation);

	return dxl_string;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeMDObj
//
//	@doc:
//		Serialize an MD object into a DXL document
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeMDObj(CMemoryPool *mp, const IMDCacheObject *imd_cache_obj,
						  BOOL serialize_header_footer, BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != imd_cache_obj);

	CAutoP<CWStringDynamic> string_var(GPOS_NEW(mp) CWStringDynamic(mp));

	// create a string stream to hold the result of serialization
	COstreamString oss(string_var.Value());

	CXMLSerializer xml_serializer(mp, oss, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
		xml_serializer.OpenElement(
			CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
			CDXLTokens::GetDXLTokenStr(EdxltokenMetadata));
	}
	GPOS_CHECK_ABORT;

	imd_cache_obj->Serialize(&xml_serializer);
	GPOS_CHECK_ABORT;

	if (serialize_header_footer)
	{
		xml_serializer.CloseElement(
			CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
			CDXLTokens::GetDXLTokenStr(EdxltokenMetadata));
		SerializeFooter(&xml_serializer);
	}

	GPOS_CHECK_ABORT;
	return string_var.Reset();
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeScalarExpr
//
//	@doc:
//		Serialize a DXL tree representing a ScalarExpr into a DXL document
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::SerializeScalarExpr(CMemoryPool *mp, const CDXLNode *node,
							   BOOL serialize_header_footer, BOOL indentation)
{
	GPOS_ASSERT(nullptr != mp);
	GPOS_ASSERT(nullptr != node);
	CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);

	// create a string stream to hold the result of serialization
	COstreamString oss(dxl_string);
	CXMLSerializer xml_serializer(mp, oss, indentation);

	if (serialize_header_footer)
	{
		SerializeHeader(mp, &xml_serializer);
	}
	xml_serializer.OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenScalarExpr));

	// serialize the content of the scalar expression
	node->SerializeToDXL(&xml_serializer);
	xml_serializer.CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenScalarExpr));
	if (serialize_header_footer)
	{
		SerializeFooter(&xml_serializer);
	}
	return dxl_string;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeHeader
//
//	@doc:
//		Serialize the DXL document header
//
//---------------------------------------------------------------------------
void
CDXLUtils::SerializeHeader(CMemoryPool *mp, CXMLSerializer *xml_serializer)
{
	GPOS_ASSERT(nullptr != xml_serializer);

	xml_serializer->StartDocument();

	xml_serializer->OpenElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenDXLMessage));

	// add namespace specification xmlns:dxl="...."
	CWStringDynamic namespace_specification_string(mp);
	namespace_specification_string.AppendFormat(
		GPOS_WSZ_LIT("%ls%ls%ls"),
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespaceAttr)->GetBuffer(),
		CDXLTokens::GetDXLTokenStr(EdxltokenColon)->GetBuffer(),
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix)->GetBuffer());

	xml_serializer->AddAttribute(
		&namespace_specification_string,
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespaceURI));
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::SerializeFooter
//
//	@doc:
//		Serialize the DXL document footer
//
//---------------------------------------------------------------------------
void
CDXLUtils::SerializeFooter(CXMLSerializer *xml_serializer)
{
	GPOS_ASSERT(nullptr != xml_serializer);

	xml_serializer->CloseElement(
		CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix),
		CDXLTokens::GetDXLTokenStr(EdxltokenDXLMessage));
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::CreateDynamicStringFromXMLChArray
//
//	@doc:
//		Create a GPOS string object from a Xerces XMLCh* string.
//		The function allocates the returned string in the provided memory pool
//		and it is the responsibility of the caller to release it.
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::CreateDynamicStringFromXMLChArray(CDXLMemoryManager *memory_manager,
											 const XMLCh *xml_string)
{
	GPOS_ASSERT(nullptr != memory_manager);
	GPOS_ASSERT(nullptr != xml_string);

	CMemoryPool *mp = memory_manager->Pmp();

	{
		CHAR *sz = XMLString::transcode(xml_string, memory_manager);

		CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);
		dxl_string->AppendFormat(GPOS_WSZ_LIT("%s"), sz);

		// cleanup temporary buffer
		XMLString::release(&sz, memory_manager);

		return dxl_string;
	}
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::PstrFromBase64XMLStr
//
//	@doc:
//		Create a decoded byte array from a base 64 encoded XML string.
//
//---------------------------------------------------------------------------
BYTE *
CDXLUtils::CreateStringFrom64XMLStr(
	CDXLMemoryManager *memory_manager, const XMLCh *xml_string,
	ULONG *length  // output: length of constructed byte array
)
{
	GPOS_ASSERT(nullptr != memory_manager);
	GPOS_ASSERT(nullptr != xml_string);

	CMemoryPool *mp = memory_manager->Pmp();

	// find out xml string length
	ULONG len = XMLString::stringLen(xml_string);

	// convert XML string into array of XMLByte
	CAutoRg<XMLByte> data_in_byte;
	data_in_byte = (XMLByte *) GPOS_NEW_ARRAY(mp, XMLByte, len + 1);
	for (ULONG ul = 0; ul < len; ul++)
	{
		GPOS_ASSERT(xml_string[ul] <= 256 &&
					"XML string not in Base64 encoding");

		data_in_byte[ul] = (XMLByte) xml_string[ul];
	}
	data_in_byte[len] = 0;

	// decode string
	XMLSize_t xml_size = 0;
	XMLByte *xml_byte_out =
		Base64::decode(data_in_byte.Rgt(), &xml_size, memory_manager);
	*length = static_cast<ULONG>(xml_size);

	return static_cast<BYTE *>(xml_byte_out);
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::CreateDynamicStringFromCharArray
//
//	@doc:
//		Create a GPOS string object from a character array.
//		The function allocates the returned string in the provided memory pool
//		and it is the responsibility of the caller to release it.
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::CreateDynamicStringFromCharArray(CMemoryPool *mp, const CHAR *c)
{
	GPOS_ASSERT(nullptr != c);

	CAutoP<CWStringDynamic> string_var(GPOS_NEW(mp) CWStringDynamic(mp));
	string_var->AppendFormat(GPOS_WSZ_LIT("%s"), c);
	return string_var.Reset();
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::CreateMDNameFromCharArray
//
//	@doc:
//		Create a GPOS string object from a character array.
//		The function allocates the returned string in the provided memory pool
//		and it is the responsibility of the caller to release it.
//
//---------------------------------------------------------------------------
CMDName *
CDXLUtils::CreateMDNameFromCharArray(CMemoryPool *mp, const CHAR *c)
{
	GPOS_ASSERT(nullptr != c);

	CWStringDynamic *dxl_string =
		CDXLUtils::CreateDynamicStringFromCharArray(mp, c);
	CMDName *md_name = GPOS_NEW(mp) CMDName(mp, dxl_string);

	// CMDName ctor created a copy of the string
	GPOS_DELETE(dxl_string);

	return md_name;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::CreateMDNameFromXMLChar
//
//	@doc:
//		Create a GPOS string object from a Xerces character array.
//		The function allocates the returned string in the provided memory pool
//		and it is the responsibility of the caller to release it.
//
//---------------------------------------------------------------------------
CMDName *
CDXLUtils::CreateMDNameFromXMLChar(CDXLMemoryManager *memory_manager,
								   const XMLCh *xml_string)
{
	GPOS_ASSERT(nullptr != xml_string);

	CHAR *transcode_string = XMLString::transcode(xml_string, memory_manager);
	CMDName *md_name =
		CreateMDNameFromCharArray(memory_manager->Pmp(), transcode_string);

	// cleanup temporary buffer
	XMLString::release(&transcode_string, memory_manager);

	return md_name;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::EncodeByteArrayToString
//
//	@doc:
//		Use Base64 encoding to convert bytearray to a string.
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::EncodeByteArrayToString(CMemoryPool *mp, const BYTE *byte,
								   ULONG length)
{
	CAutoP<CDXLMemoryManager> a_pmm(GPOS_NEW(mp) CDXLMemoryManager(mp));
	CAutoP<CWStringDynamic> string_var(GPOS_NEW(mp) CWStringDynamic(mp));

	GPOS_ASSERT(length > 0);

	XMLSize_t output_length = 0;
	const XMLByte *input = (const XMLByte *) byte;

	XMLSize_t input_length = (XMLSize_t) length;

	CAutoRg<XMLByte> xml_byte_buffer;

	xml_byte_buffer =
		Base64::encode(input, input_length, &output_length, a_pmm.Value());

	GPOS_ASSERT(nullptr != xml_byte_buffer.Rgt());

	// assert that last byte is 0
	GPOS_ASSERT(0 == xml_byte_buffer[output_length]);

	// there may be padded bytes. We don't need them. We zero out there bytes.
#ifdef GPOS_DEBUG
	ULONG new_length = output_length;
#endif	// GPOS_DEBUG
	while (('\n' == xml_byte_buffer[output_length] ||
			0 == xml_byte_buffer[output_length]) &&
		   0 < output_length)
	{
		xml_byte_buffer[output_length] = 0;
#ifdef GPOS_DEBUG
		new_length = output_length;
#endif	// GPOS_DEBUG
		output_length--;
	}
	GPOS_ASSERT(0 == xml_byte_buffer[new_length]);

	CHAR *return_buffer = (CHAR *) (xml_byte_buffer.Rgt());
	string_var->AppendCharArray(return_buffer);

	return string_var.Reset();
}


//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::DecodeByteArrayFromString
//
//	@doc:
//		Decode byte array from Base64 encoded string.
//
//---------------------------------------------------------------------------
BYTE *
CDXLUtils::DecodeByteArrayFromString(CMemoryPool *mp,
									 const CWStringDynamic *dxl_string,
									 ULONG *length)
{
	CAutoP<CDXLMemoryManager> a_pmm(GPOS_NEW(mp) CDXLMemoryManager(mp));

	XMLSize_t xml_size = 0;

	const WCHAR *wc = dxl_string->GetBuffer();

	// We know that the input is encoded using Base64.
	XMLSize_t input_length = dxl_string->Length();

	CAutoRg<XMLByte> data_in_byte;

	data_in_byte = (XMLByte *) GPOS_NEW_ARRAY(mp, XMLByte, input_length + 1);

	for (XMLSize_t i = 0; i < input_length; i++)
	{
		GPOS_ASSERT(wc[i] <= 256);
		data_in_byte[i] = (XMLByte) wc[i];
	}

	data_in_byte[input_length] = 0;

	XMLByte *xml_byte = nullptr;

	xml_byte = Base64::decode(data_in_byte.Rgt(), &xml_size, a_pmm.Value());

	(*length) = static_cast<ULONG>(xml_size);

	return static_cast<BYTE *>(xml_byte);
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::Serialize
//
//	@doc:
//		Serialize a list of unsigned integers into a comma-separated string
//
//---------------------------------------------------------------------------
CWStringDynamic *
CDXLUtils::Serialize(CMemoryPool *mp, const ULongPtr2dArray *array_2D)
{
	const ULONG len = array_2D->Size();
	CWStringDynamic *keys_buffer = GPOS_NEW(mp) CWStringDynamic(mp);
	for (ULONG ul = 0; ul < len; ul++)
	{
		ULongPtrArray *pdrgpul = (*array_2D)[ul];
		CWStringDynamic *key_set_string = CDXLUtils::Serialize(mp, pdrgpul);

		keys_buffer->Append(key_set_string);

		if (ul < len - 1)
		{
			keys_buffer->AppendFormat(GPOS_WSZ_LIT("%ls"), GPOS_WSZ_LIT(";"));
		}

		GPOS_DELETE(key_set_string);
	}

	return keys_buffer;
}

// Serialize a list of chars into a comma-separated string
CWStringDynamic *
CDXLUtils::SerializeToCommaSeparatedString(CMemoryPool *mp,
										   const CharPtrArray *char_ptr_array)
{
	CWStringDynamic *dxl_string = GPOS_NEW(mp) CWStringDynamic(mp);

	ULONG length = char_ptr_array->Size();
	for (ULONG ul = 0; ul < length; ul++)
	{
		CHAR value = *((*char_ptr_array)[ul]);
		if (ul == length - 1)
		{
			// last element: do not print a comma
			dxl_string->AppendFormat(GPOS_WSZ_LIT("%c"), value);
		}
		else
		{
			dxl_string->AppendFormat(
				GPOS_WSZ_LIT("%c%ls"), value,
				CDXLTokens::GetDXLTokenStr(EdxltokenComma)->GetBuffer());
		}
	}

	return dxl_string;
}

//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::CreateMultiByteCharStringFromWCString
//
//	@doc:
//		Converts a wide character string into a character array in the provided memory pool
//
//---------------------------------------------------------------------------
CHAR *
CDXLUtils::CreateMultiByteCharStringFromWCString(CMemoryPool *mp,
												 const WCHAR *wc)
{
	GPOS_ASSERT(nullptr != wc);

	ULONG max_length = GPOS_WSZ_LENGTH(wc) * GPOS_SIZEOF(WCHAR) + 1;
	CHAR *c = GPOS_NEW_ARRAY(mp, CHAR, max_length);
	CAutoRg<CHAR> char_wrapper(c);

#ifdef GPOS_DEBUG
	INT i = (INT)
#endif
		wcstombs(c, wc, max_length);
	GPOS_ASSERT(0 <= i);

	char_wrapper[max_length - 1] = '\0';

	return char_wrapper.RgtReset();
}

//---------------------------------------------------------------------------
//		CDXLUtils::Read
//
//	@doc:
//		Read a given text file in a character buffer.
//		The function allocates memory from the provided memory pool, and it is
//		the responsibility of the caller to deallocate it.
//
//---------------------------------------------------------------------------
CHAR *
CDXLUtils::Read(CMemoryPool *mp, const CHAR *filename)
{
	GPOS_TRACE_FORMAT("opening file %s", filename);

	CFileReader fr;
	fr.Open(filename);

	ULONG_PTR file_size = (ULONG_PTR) fr.FileSize();
	CAutoRg<CHAR> read_buffer(GPOS_NEW_ARRAY(mp, CHAR, file_size + 1));

	ULONG_PTR read_bytes =
		fr.ReadBytesToBuffer((BYTE *) read_buffer.Rgt(), file_size);
	fr.Close();

	GPOS_ASSERT(read_bytes == file_size);

	read_buffer[read_bytes] = '\0';

	return read_buffer.RgtReset();
}

#ifdef GPOS_DEBUG
//---------------------------------------------------------------------------
//	@function:
//		CDXLUtils::DebugPrintMDIdArray
//
//	@doc:
//		Print an array of mdids
//
//---------------------------------------------------------------------------
void
CDXLUtils::DebugPrintMDIdArray(IOstream &os, IMdIdArray *mdid_array)
{
	ULONG len = mdid_array->Size();
	for (ULONG ul = 0; ul < len; ul++)
	{
		const IMDId *mdid = (*mdid_array)[ul];
		mdid->OsPrint(os);
		os << " ";
	}

	os << std::endl;
}
#endif

// EOF

相关信息

greenplumn 源码目录

相关文章

greenplumn CCostModelConfigSerializer 源码

greenplumn CIdGenerator 源码

greenplumn exception 源码

greenplumn init 源码

greenplumn traceflags 源码

0  赞