greenplumn regis 源码

  • 2022-08-18
  • 浏览 (319)

greenplumn regis 代码

文件路径:/src/backend/tsearch/regis.c

/*-------------------------------------------------------------------------
 *
 * regis.c
 *		Fast regex subset
 *
 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *	  src/backend/tsearch/regis.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "tsearch/dicts/regis.h"
#include "tsearch/ts_locale.h"

#define RS_IN_ONEOF 1
#define RS_IN_ONEOF_IN	2
#define RS_IN_NONEOF	3
#define RS_IN_WAIT	4


/*
 * Test whether a regex is of the subset supported here.
 * Keep this in sync with RS_compile!
 */
bool
RS_isRegis(const char *str)
{
	int			state = RS_IN_WAIT;
	const char *c = str;

	while (*c)
	{
		if (state == RS_IN_WAIT)
		{
			if (t_isalpha(c))
				 /* okay */ ;
			else if (t_iseq(c, '['))
				state = RS_IN_ONEOF;
			else
				return false;
		}
		else if (state == RS_IN_ONEOF)
		{
			if (t_iseq(c, '^'))
				state = RS_IN_NONEOF;
			else if (t_isalpha(c))
				state = RS_IN_ONEOF_IN;
			else
				return false;
		}
		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
		{
			if (t_isalpha(c))
				 /* okay */ ;
			else if (t_iseq(c, ']'))
				state = RS_IN_WAIT;
			else
				return false;
		}
		else
			elog(ERROR, "internal error in RS_isRegis: state %d", state);
		c += pg_mblen(c);
	}

	return (state == RS_IN_WAIT);
}

static RegisNode *
newRegisNode(RegisNode *prev, int len)
{
	RegisNode  *ptr;

	ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
	if (prev)
		prev->next = ptr;
	return ptr;
}

void
RS_compile(Regis *r, bool issuffix, const char *str)
{
	int			len = strlen(str);
	int			state = RS_IN_WAIT;
	const char *c = str;
	RegisNode  *ptr = NULL;

	memset(r, 0, sizeof(Regis));
	r->issuffix = (issuffix) ? 1 : 0;

	while (*c)
	{
		if (state == RS_IN_WAIT)
		{
			if (t_isalpha(c))
			{
				if (ptr)
					ptr = newRegisNode(ptr, len);
				else
					ptr = r->node = newRegisNode(NULL, len);
				COPYCHAR(ptr->data, c);
				ptr->type = RSF_ONEOF;
				ptr->len = pg_mblen(c);
			}
			else if (t_iseq(c, '['))
			{
				if (ptr)
					ptr = newRegisNode(ptr, len);
				else
					ptr = r->node = newRegisNode(NULL, len);
				ptr->type = RSF_ONEOF;
				state = RS_IN_ONEOF;
			}
			else				/* shouldn't get here */
				elog(ERROR, "invalid regis pattern: \"%s\"", str);
		}
		else if (state == RS_IN_ONEOF)
		{
			if (t_iseq(c, '^'))
			{
				ptr->type = RSF_NONEOF;
				state = RS_IN_NONEOF;
			}
			else if (t_isalpha(c))
			{
				COPYCHAR(ptr->data, c);
				ptr->len = pg_mblen(c);
				state = RS_IN_ONEOF_IN;
			}
			else				/* shouldn't get here */
				elog(ERROR, "invalid regis pattern: \"%s\"", str);
		}
		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
		{
			if (t_isalpha(c))
			{
				COPYCHAR(ptr->data + ptr->len, c);
				ptr->len += pg_mblen(c);
			}
			else if (t_iseq(c, ']'))
				state = RS_IN_WAIT;
			else				/* shouldn't get here */
				elog(ERROR, "invalid regis pattern: \"%s\"", str);
		}
		else
			elog(ERROR, "internal error in RS_compile: state %d", state);
		c += pg_mblen(c);
	}

	if (state != RS_IN_WAIT)	/* shouldn't get here */
		elog(ERROR, "invalid regis pattern: \"%s\"", str);

	ptr = r->node;
	while (ptr)
	{
		r->nchar++;
		ptr = ptr->next;
	}
}

void
RS_free(Regis *r)
{
	RegisNode  *ptr = r->node,
			   *tmp;

	while (ptr)
	{
		tmp = ptr->next;
		pfree(ptr);
		ptr = tmp;
	}

	r->node = NULL;
}

static bool
mb_strchr(char *str, char *c)
{
	int			clen,
				plen,
				i;
	char	   *ptr = str;
	bool		res = false;

	clen = pg_mblen(c);
	while (*ptr && !res)
	{
		plen = pg_mblen(ptr);
		if (plen == clen)
		{
			i = plen;
			res = true;
			while (i--)
				if (*(ptr + i) != *(c + i))
				{
					res = false;
					break;
				}
		}

		ptr += plen;
	}

	return res;
}

bool
RS_execute(Regis *r, char *str)
{
	RegisNode  *ptr = r->node;
	char	   *c = str;
	int			len = 0;

	while (*c)
	{
		len++;
		c += pg_mblen(c);
	}

	if (len < r->nchar)
		return 0;

	c = str;
	if (r->issuffix)
	{
		len -= r->nchar;
		while (len-- > 0)
			c += pg_mblen(c);
	}


	while (ptr)
	{
		switch (ptr->type)
		{
			case RSF_ONEOF:
				if (!mb_strchr((char *) ptr->data, c))
					return false;
				break;
			case RSF_NONEOF:
				if (mb_strchr((char *) ptr->data, c))
					return false;
				break;
			default:
				elog(ERROR, "unrecognized regis node type: %d", ptr->type);
		}
		ptr = ptr->next;
		c += pg_mblen(c);
	}

	return true;
}

相关信息

greenplumn 源码目录

相关文章

greenplumn dict 源码

greenplumn dict_ispell 源码

greenplumn dict_simple 源码

greenplumn dict_synonym 源码

greenplumn dict_thesaurus 源码

greenplumn spell 源码

greenplumn to_tsany 源码

greenplumn ts_locale 源码

greenplumn ts_parse 源码

greenplumn ts_selfuncs 源码

0  赞