greenplumn euc2004_sjis2004 源码

  • 2022-08-18
  • 浏览 (395)

greenplumn euc2004_sjis2004 代码

文件路径:/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c

/*-------------------------------------------------------------------------
 *
 *	  EUC_JIS_2004, SHIFT_JIS_2004
 *
 * Copyright (c) 2007-2019, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"

PG_MODULE_MAGIC;

PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);

static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);

/* ----------
 * conv_proc(
 *		INTEGER,	-- source encoding id
 *		INTEGER,	-- destination encoding id
 *		CSTRING,	-- source string (null terminated C string)
 *		CSTRING,	-- destination string (null terminated C string)
 *		INTEGER		-- source string length
 * ) returns VOID;
 * ----------
 */

Datum
euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);

	CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);

	euc_jis_20042shift_jis_2004(src, dest, len);

	PG_RETURN_VOID();
}

Datum
shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);

	CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);

	shift_jis_20042euc_jis_2004(src, dest, len);

	PG_RETURN_VOID();
}

/*
 * EUC_JIS_2004 -> SHIFT_JIS_2004
 */
static void
euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
{
	int			c1,
				ku,
				ten;
	int			l;

	while (len > 0)
	{
		c1 = *euc;
		if (!IS_HIGHBIT_SET(c1))
		{
			/* ASCII */
			if (c1 == 0)
				report_invalid_encoding(PG_EUC_JIS_2004,
										(const char *) euc, len);
			*p++ = c1;
			euc++;
			len--;
			continue;
		}

		l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);

		if (l < 0)
			report_invalid_encoding(PG_EUC_JIS_2004,
									(const char *) euc, len);

		if (c1 == SS2 && l == 2)	/* JIS X 0201 kana? */
		{
			*p++ = euc[1];
		}
		else if (c1 == SS3 && l == 3)	/* JIS X 0213 plane 2? */
		{
			ku = euc[1] - 0xa0;
			ten = euc[2] - 0xa0;

			switch (ku)
			{
				case 1:
				case 3:
				case 4:
				case 5:
				case 8:
				case 12:
				case 13:
				case 14:
				case 15:
					*p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
					break;
				default:
					if (ku >= 78 && ku <= 94)
					{
						*p++ = (ku + 0x19b) >> 1;
					}
					else
						report_invalid_encoding(PG_EUC_JIS_2004,
												(const char *) euc, len);
			}

			if (ku % 2)
			{
				if (ten >= 1 && ten <= 63)
					*p++ = ten + 0x3f;
				else if (ten >= 64 && ten <= 94)
					*p++ = ten + 0x40;
				else
					report_invalid_encoding(PG_EUC_JIS_2004,
											(const char *) euc, len);
			}
			else
				*p++ = ten + 0x9e;
		}

		else if (l == 2)		/* JIS X 0213 plane 1? */
		{
			ku = c1 - 0xa0;
			ten = euc[1] - 0xa0;

			if (ku >= 1 && ku <= 62)
				*p++ = (ku + 0x101) >> 1;
			else if (ku >= 63 && ku <= 94)
				*p++ = (ku + 0x181) >> 1;
			else
				report_invalid_encoding(PG_EUC_JIS_2004,
										(const char *) euc, len);

			if (ku % 2)
			{
				if (ten >= 1 && ten <= 63)
					*p++ = ten + 0x3f;
				else if (ten >= 64 && ten <= 94)
					*p++ = ten + 0x40;
				else
					report_invalid_encoding(PG_EUC_JIS_2004,
											(const char *) euc, len);
			}
			else
				*p++ = ten + 0x9e;
		}
		else
			report_invalid_encoding(PG_EUC_JIS_2004,
									(const char *) euc, len);

		euc += l;
		len -= l;
	}
	*p = '\0';
}

/*
 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
 * *ku = 0: "ku" = even
 * *ku = 1: "ku" = odd
 */
static int
get_ten(int b, int *ku)
{
	int			ten;

	if (b >= 0x40 && b <= 0x7e)
	{
		ten = b - 0x3f;
		*ku = 1;
	}
	else if (b >= 0x80 && b <= 0x9e)
	{
		ten = b - 0x40;
		*ku = 1;
	}
	else if (b >= 0x9f && b <= 0xfc)
	{
		ten = b - 0x9e;
		*ku = 0;
	}
	else
	{
		ten = -1;				/* error */
		*ku = 0;				/* keep compiler quiet */
	}
	return ten;
}

/*
 * SHIFT_JIS_2004 ---> EUC_JIS_2004
 */

static void
shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
{
	int			c1;
	int			ku,
				ten,
				kubun;
	int			plane;
	int			l;

	while (len > 0)
	{
		c1 = *sjis;

		if (!IS_HIGHBIT_SET(c1))
		{
			/* ASCII */
			if (c1 == 0)
				report_invalid_encoding(PG_SHIFT_JIS_2004,
										(const char *) sjis, len);
			*p++ = c1;
			sjis++;
			len--;
			continue;
		}

		l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);

		if (l < 0 || l > len)
			report_invalid_encoding(PG_SHIFT_JIS_2004,
									(const char *) sjis, len);

		if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
		{
			/* JIS X0201 (1 byte kana) */
			*p++ = SS2;
			*p++ = c1;
		}
		else if (l == 2)
		{
			int			c2 = sjis[1];

			plane = 1;
			ku = 1;
			ten = 1;

			/*
			 * JIS X 0213
			 */
			if (c1 >= 0x81 && c1 <= 0x9f)	/* plane 1 1ku-62ku */
			{
				ku = (c1 << 1) - 0x100;
				ten = get_ten(c2, &kubun);
				if (ten < 0)
					report_invalid_encoding(PG_SHIFT_JIS_2004,
											(const char *) sjis, len);
				ku -= kubun;
			}
			else if (c1 >= 0xe0 && c1 <= 0xef)	/* plane 1 62ku-94ku */
			{
				ku = (c1 << 1) - 0x180;
				ten = get_ten(c2, &kubun);
				if (ten < 0)
					report_invalid_encoding(PG_SHIFT_JIS_2004,

											(const char *) sjis, len);
				ku -= kubun;
			}
			else if (c1 >= 0xf0 && c1 <= 0xf3)	/* plane 2
												 * 1,3,4,5,8,12,13,14,15 ku */
			{
				plane = 2;
				ten = get_ten(c2, &kubun);
				if (ten < 0)
					report_invalid_encoding(PG_SHIFT_JIS_2004,
											(const char *) sjis, len);
				switch (c1)
				{
					case 0xf0:
						ku = kubun == 0 ? 8 : 1;
						break;
					case 0xf1:
						ku = kubun == 0 ? 4 : 3;
						break;
					case 0xf2:
						ku = kubun == 0 ? 12 : 5;
						break;
					default:
						ku = kubun == 0 ? 14 : 13;
						break;
				}
			}
			else if (c1 >= 0xf4 && c1 <= 0xfc)	/* plane 2 78-94ku */
			{
				plane = 2;
				ten = get_ten(c2, &kubun);
				if (ten < 0)
					report_invalid_encoding(PG_SHIFT_JIS_2004,
											(const char *) sjis, len);
				if (c1 == 0xf4 && kubun == 1)
					ku = 15;
				else
					ku = (c1 << 1) - 0x19a - kubun;
			}
			else
				report_invalid_encoding(PG_SHIFT_JIS_2004,
										(const char *) sjis, len);

			if (plane == 2)
				*p++ = SS3;

			*p++ = ku + 0xa0;
			*p++ = ten + 0xa0;
		}
		sjis += l;
		len -= l;
	}
	*p = '\0';
}

相关信息

greenplumn 源码目录

相关文章

greenplumn adminpack 源码

greenplumn verify_nbtree 源码

greenplumn auth_delay 源码

greenplumn auto_explain 源码

greenplumn blcost 源码

greenplumn blinsert 源码

greenplumn bloom 源码

greenplumn blscan 源码

greenplumn blutils 源码

greenplumn blvacuum 源码

0  赞