greenplumn cdbgroup 源码

2022-08-18
浏览 (831)

greenplumn cdbgroup 代码

文件路径：/src/backend/cdb/cdbgroup.c

/*-------------------------------------------------------------------------
 *
 * cdbgroup.c
 *	  Some helper routines for planning.
 *
 * This used to contain the code to implement MPP-aware GROUP BY planning,
 * hence the filename. But that's now handled in cdbgroupingpaths.c, only
 * a few functions remain here that are used elsewhere in the planner.
 *
 * Portions Copyright (c) 2005-2008, Greenplum inc
 * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	    src/backend/cdb/cdbgroup.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "cdb/cdbgroup.h"
#include "optimizer/tlist.h"

/*
 * Function: cdbpathlocus_collocates_pathkeys
 *
 * Is a relation with the given locus guaranteed to collocate tuples with
 * non-distinct values of the key.  The key is a list of PathKeys.
 *
 * Collocation is guaranteed if the locus specifies a single process or
 * if the result is partitioned on a subset of the keys that must be
 * collocated.
 *
 * We ignore other sorts of collocation, e.g., replication or partitioning
 * on a range since these cannot occur at the moment (MPP 2.3).
 */
bool
cdbpathlocus_collocates_pathkeys(PlannerInfo *root, CdbPathLocus locus, List *pathkeys,
								 bool exact_match)
{
	ListCell   *i;
	List	   *pk_eclasses;

	if (CdbPathLocus_IsBottleneck(locus))
		return true;

	if (!CdbPathLocus_IsHashed(locus))
	{
		/*
		 * Note: HashedOJ can *not* be used for grouping. In HashedOJ, NULL
		 * values can be located on any segment, so we would end up with
		 * multiple NULL groups.
		 */
		return false;
	}

	if (exact_match && list_length(pathkeys) != list_length(locus.distkey))
		return false;

	/*
	 * Extract a list of eclasses from the pathkeys.
	 */
	pk_eclasses = NIL;
	foreach(i, pathkeys)
	{
		PathKey    *pathkey = (PathKey *) lfirst(i);
		EquivalenceClass *ec;

		ec = pathkey->pk_eclass;
		while (ec->ec_merged != NULL)
			ec = ec->ec_merged;

		pk_eclasses = lappend(pk_eclasses, ec);
	}

	/*
	 * Check for containment of locus in pk_eclasses.
	 *
	 * We ignore constants in the locus hash key. A constant has the same
	 * value everywhere, so it doesn't affect collocation.
	 */
	return cdbpathlocus_is_hashed_on_eclasses(locus, pk_eclasses, true);
}

/*
 * Function: cdbpathlocus_collocates_tlist
 *
 * Like cdbpathlocus_collocates_pathkeys, but the key list is given
 * as a target list, instead of PathKeys.
 */
bool
cdbpathlocus_collocates_tlist(PlannerInfo *root, CdbPathLocus locus, List *tlist)
{
	if (CdbPathLocus_IsBottleneck(locus))
		return true;

	/*
	 * If there are no GROUP BY columns, the aggregation needs all rows in a
	 * single node.
	 */
	if (tlist == NIL)
		return false;

	if (!CdbPathLocus_IsHashed(locus))
	{
		/*
		 * Note: HashedOJ can *not* be used for grouping. In HashedOJ, NULL
		 * values can be located on any segment, so we would end up with
		 * multiple NULL groups.
		 */
		return false;
	}

	/*
	 * Check for containment of locus in pk_eclasses.
	 *
	 * We ignore constants in the locus hash key. A constant has the same
	 * value everywhere, so it doesn't affect collocation.
	 */
	return cdbpathlocus_is_hashed_on_tlist(locus, tlist, true);
}

/**
 * Update the scatter clause before a query tree's targetlist is about to
 * be modified. The scatter clause of a query tree will correspond to
 * old targetlist entries. If the query tree is modified and the targetlist
 * is to be modified, we must call this method to ensure that the scatter clause
 * is kept in sync with the new targetlist.
 */
void
UpdateScatterClause(Query *query, List *newtlist)
{
	Assert(query);
	Assert(query->targetList);
	Assert(newtlist);

	if (query->scatterClause
		&& list_nth(query->scatterClause, 0) != NULL	/* scattered randomly */
		)
	{
		Assert(list_length(query->targetList) == list_length(newtlist));
		List	   *scatterClause = NIL;
		ListCell   *lc = NULL;

		foreach(lc, query->scatterClause)
		{
			Expr	   *o = (Expr *) lfirst(lc);

			Assert(o);
			TargetEntry *tle = tlist_member(o, query->targetList);

			Assert(tle);
			TargetEntry *ntle = list_nth(newtlist, tle->resno - 1);

			scatterClause = lappend(scatterClause, copyObject(ntle->expr));
		}
		query->scatterClause = scatterClause;
	}
}

相关信息

greenplumn 源码目录

相关文章

greenplumn cdbappendonlystorageformat 源码

greenplumn cdbappendonlystorageread 源码

greenplumn cdbappendonlystoragewrite 源码

greenplumn cdbappendonlyxlog 源码

greenplumn cdbbufferedappend 源码

greenplumn cdbbufferedread 源码

greenplumn cdbcat 源码

greenplumn cdbcopy 源码

greenplumn cdbdistributedsnapshot 源码

greenplumn cdbdistributedxacts 源码

0 赞

所属分类： 大数据
本文标签： greenplum
版权声明： 原创文章如转载，请注明本文链接: https://www.seaxiang.com/blog/c7b07a2541664a4aa5eca36a303963a0

热门推荐

1、直接访问google.com
2、 - 优质文章
3、 gate.io
4、 harmony 鸿蒙hdc使用指导
5、 harmony 鸿蒙ArkUI组件（ArkTS）开发常见问题
6、 openharmony
7、 harmony 鸿蒙初识ArkTS语言
8、 flink kafka connector scan.startup.mode 的几个选项

Loading...