/*-
 * Copyright (c) 2010 Alistair Crooks <agc@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/types.h>
#include <sys/param.h>
#include <sys/wait.h>

#include <arpa/inet.h>

#include <ctype.h>
#include <inttypes.h>
#include <md5.h>
#include <netdb.h>
#include <regex.h>
#include <rmd160.h>
#include <sha1.h>
#include <sha2.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <vis.h>
#include <zlib.h>

#include "b64.h"
#include "hashes.h"
#include "codecs.h"

#ifndef USE_ARG
#define USE_ARG(x)    /*LINTED*/(void)&x
#endif

/* convert from percent-encoded URI to "ASCII" */
static int
uri2ascii(const char *in, const size_t len, const char *op, void *vp, size_t size)
{
	const char	*inp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; *inp && (size_t)(inp - in) < len && (size_t)(outp - out) < size; ) {
		if (*inp == '%') {
			*outp++ = ((*(inp + 1) - '0') * 16) + (*(inp + 2) - '0');
			inp += 3;
		} else {
			*outp++ = *inp++;
		}
	}
	*outp = 0x0;
	return (int)(outp - out);
}

static const char percents[] = "!*\"'();:@&=+$,/?%#[]";

#define HEX_CHAR_SET	"0123456789abcdef"

/* convert from "ASCII" to percent-encoded URI */
static int
ascii2uri(const char *in, const size_t len, const char *op, void *vp, size_t size)
{
	const char	*inp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; *inp && (size_t)(inp - in) < len && (size_t)(outp - out) < size; ) {
		if (strchr(percents, *inp) == NULL) {
			*outp++ = *inp++;
		} else {
			*outp++ = '%';
			*outp++ = HEX_CHAR_SET[((unsigned)(*inp & 0xf0) >> 4)];
			*outp++ = HEX_CHAR_SET[*inp & 0xf];
			inp += 1;
		}
	}
	*outp = 0x0;
	return (int)(outp - out);
}

static const char	lowers[] = "abcdefghijklmnopqrstuvwxyz";
static const char	uppers[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

/* convert to lower from mixed case */
static int
mixed2lower(const char *in, const size_t len, const char *op, void *vp, size_t size)
{
	const char	*inp;
	const char	*cp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; *inp && (size_t)(inp - in) < len && (size_t)(outp - out) < size; ) {
		if ((cp = strchr(uppers, *inp)) == NULL) {
			*outp++ = *inp++;
		} else {
			*outp++ = lowers[(int)(cp - uppers)];
			inp += 1;
		}
	}
	*outp = 0x0;
	return (int)(outp - out);
}

/* convert to upper from mixed case */
static int
mixed2upper(const char *in, const size_t len, const char *op, void *vp, size_t size)
{
	const char	*inp;
	const char	*cp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; *inp && (size_t)(inp - in) < len && (size_t)(outp - out) < size; ) {
		if ((cp = strchr(lowers, *inp)) == NULL) {
			*outp++ = *inp++;
		} else {
			*outp++ = uppers[(int)(cp - lowers)];
			inp += 1;
		}
	}
	*outp = 0x0;
	return (int)(outp - out);
}

/* convert to unix line endings */
static int
dos2unix(const char *in, const size_t len, const char *op, void *vp, size_t size)
{
	const char	*inp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; *inp && (size_t)(inp - in) < len && (size_t)(outp - out) < size; ) {
		if (*inp == '\r' && *(inp + 1) == '\n') {
			inp++;
		} 
		*outp++ = *inp++;
	}
	*outp = 0x0;
	return (int)(outp - out);
}

/* convert to dos line endings */
static int
unix2dos(const char *in, const size_t len, const char *op, void *vp, size_t size)
{
	const char	*inp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; *inp && (size_t)(inp - in) < len && (size_t)(outp - out) < size; ) {
		if (*inp == '\n') {
			*outp++ = '\r';
		} 
		*outp++ = *inp++;
	}
	*outp = 0x0;
	return (int)(outp - out);
}

#define HEXDUMP_LINELEN	16

#ifndef PRIsize
#define PRIsize	"z"
#endif

/* show hexadecimal/ascii dump */
static int 
hexdump(const char *in, const size_t len, const char *op, void *vp, size_t size)
{
	size_t	 i;
	char	 line[HEXDUMP_LINELEN + 1];
	char	*out = (char *)vp;
	int	 o;

	USE_ARG(op);
	for (i = 0, o = 0 ; i < len ; i++) {
		if (i % HEXDUMP_LINELEN == 0) {
			o += snprintf(&out[o], size - o, "%.5" PRIsize "u | ", i);
		}
		o += snprintf(&out[o], size - o, "%.02x ", (uint8_t)in[i]);
		line[i % HEXDUMP_LINELEN] = (isprint((uint8_t)in[i])) ? in[i] : '.';
		if (i % HEXDUMP_LINELEN == HEXDUMP_LINELEN - 1) {
			line[HEXDUMP_LINELEN] = 0x0;
			o += snprintf(&out[o], size - o, " | %s\n", line);
		}
	}
	if (i % HEXDUMP_LINELEN != 0) {
		for ( ; i % HEXDUMP_LINELEN != 0 ; i++) {
			o += snprintf(&out[o], size - o, "   ");
			line[i % HEXDUMP_LINELEN] = ' ';
		}
		line[HEXDUMP_LINELEN] = 0x0;
		o += snprintf(&out[o], size - o, " | %s\n", line);
	}
	return (int)o;
}

/* convert to visible string format */
static int
xlate_strvis_def(const char *in, const size_t len, const char *op, void *out, size_t size)
{
	USE_ARG(op);
	if (size < len * 3) {
		return -1;
	}
	return strvisx(out, in, len, 0);
}

/* convert from visible string format */
static int
xlate_strunvis_def(const char *in, const size_t len, const char *op, void *out, size_t size)
{
	USE_ARG(op);
	if (size < len / 3) {
		return -1;
	}
	return strunvisx(out, in, 0);
}

/* convert to visible string format C format */
static int
xlate_strvis_c(const char *in, const size_t len, const char *op, void *out, size_t size)
{
	USE_ARG(op);
	if (size < len * 3) {
		return -1;
	}
	return strvisx(out, in, len, VIS_CSTYLE);
}

/* convert from visible string format C format */
static int
xlate_strunvis_c(const char *in, const size_t len, const char *op, void *out, size_t size)
{
	USE_ARG(op);
	if (size < len / 3) {
		return -1;
	}
	return strunvisx(out, in, 0);
}

#define LINELEN	80

/* wrapper around base64 encoding */
static int
xlate_b64encode(const char *in, const size_t insize, const char *op, void *out, size_t size)
{
	USE_ARG(op);
	return b64encode(in, insize, out, size, LINELEN - 8);
}

/* wrapper around base64 decoding */
static int
xlate_b64decode(const char *in, const size_t insize, const char *op, void *out, size_t size)
{
	USE_ARG(op);
	return b64decode(in, insize, out, size);
}

/* uuenode/uudecode char set */
static const char uuchars[] = "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";

/* uuencoded line size - # of input (binary) characters */
#define UU_LINE_SIZE	45

/* given a number in the range 0 to 0x3f, return the uu character */
static uint8_t
uuenc_char(char ch)
{
	return uuchars[(unsigned)ch & 0x3f];
}

/* given a uu character, return a number in the range 0 to 0x3f */
static int
uudec_char(uint8_t uch)
{
	const char	*cp;

	if ((cp = strchr(uuchars, uch)) != NULL) {
		return (uint8_t)(cp - uuchars);
	}
	return 0;
}

/* uuencoding - probably redundant now we have base64 */
static int
uuencode(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	const char	*inp;
	uint32_t	 tmp;
	uint8_t		*out = vp;
	uint8_t		*outp;
	int		 i;
	int		 n;

	USE_ARG(op);
	for (outp = out, inp = in ; (size_t)(inp - in) < insize  && (size_t)(outp - out) < outsize; ) {
		/* 1 (up to) UU_LINE_SIZE (45) characters per line */ 
		n = MIN(UU_LINE_SIZE, (int)(insize - (int)(inp - in)));
		*outp++ = uuenc_char(n);
		for (i = 0; i < n ; i += 3, inp += 3) {
			tmp = ((inp[0] & 0xff) << 16);
			if ((size_t)(inp - in) + 1 < insize) {
				tmp |= ((inp[1] & 0xff) << 8);
			}
			if ((size_t)(inp - in) + 2 < insize) {
				tmp |= (inp[2] & 0xff);
			}
			*outp++ = uuenc_char((tmp >> 18) & 0x3f);
			*outp++ = uuenc_char((tmp >> 12) & 0x3f);
			*outp++ = uuenc_char((tmp >> 6) & 0x3f);
			*outp++ = uuenc_char((tmp) & 0x3f);
		}
		*outp++ = '\n';
	}
	return (int)(outp - out);
}

#define UU_HEADER	"begin "
#define UU_TRAILER	"`\nend\n"

/* full uuencoding - add the file header and tail */
static int
full_uuencode(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	char	*out = vp;
	int	 headc;
	int	 bodyc;
	int	 tailc;

	headc = snprintf(out, outsize, "%s644 uuencoded\n", UU_HEADER); 
	if ((bodyc = uuencode(in, insize, op, &out[headc], outsize - headc)) < 0) {
		return bodyc;
	}
	tailc = snprintf(&out[headc + bodyc], outsize - headc - bodyc, UU_TRAILER);
	return headc + bodyc + tailc;
}

/* uudecoding - probably redundant now we have base64 */
static int
uudecode(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	const char	*inp;
	uint32_t	 tmp;
	size_t		 incc;
	char		*out = vp;
	char		*outp;
	int		 i;
	int		 n;

	USE_ARG(op);
	inp = in;
	incc = insize;
	if (strncmp(in, UU_HEADER, strlen(UU_HEADER)) == 0) {
		/* we have a uuencode header and trailer - modify sizes */
		for ( ; (size_t)(inp - in) < insize && *inp != '\n' ; inp++) {
		}
		if (*inp == '\n') {
			inp += 1;
		}
		incc -= (size_t)(inp - in) - strlen(UU_TRAILER);
	}
	for (outp = out ; (size_t)(inp - in) < incc  && (size_t)(outp - out) < outsize; ) {
		n = uudec_char(*inp++);
		for (i = 0; i < n ; i += 3, inp += 4) {
			tmp = ((uudec_char(inp[0]) & 0x3f) << 18) |
				((uudec_char(inp[1]) & 0x3f) << 12) |
				((uudec_char(inp[2]) & 0x3f) << 6) |
				(uudec_char(inp[3]) & 0x3f);
			if (i < n) {
				*outp++ = (tmp >> 16) & 0xff;
			}
			if (i + 1 < n) {
				*outp++ = (tmp >> 8) & 0xff;
			}
			if (i + 2 < n) {
				*outp++ = tmp & 0xff;
			}
		}
		if (*inp == '\n') {
			inp += 1;
		}
	}
	return (int)(outp - out);
}

#define RUNELEN(r) (((r) > 0 && (r) <= 0x007f) ? 1 : ((r) >= 0x0800) ? 3 : 2)

/* translate to unicode-16 from utf8 */
static int
utf8_to_unicode16(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	const char	*inp;
	uint16_t	*out = vp;
	uint16_t	*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize;) {
		if ((*inp & 0x80) == 0) {
			*outp++ = (*inp & 0x7f);
			inp += 1;
		} else if ((*inp & 0xe0) == 0xc0 &&
			   (*(inp + 1) & 0xc0) == 0x80) {
			*outp++ = ((*inp & 0x1f) << 6) | (inp[1] & 0x3f);
			inp += 2;
		} else if ((*inp & 0xf0) == 0xe0 &&
			   (*(inp + 1) & 0xc0) == 0x80 &&
			   (*(inp + 2) & 0xc0) == 0x80) {
			*outp++ = ((*inp & 0x0f) << 12) |
					((inp[1] & 0x3f) << 6) |
					(inp[2] & 0x3f);
			inp += 3;
		}
	}
	return (int)(outp - out);
}

/* translate to utf8 from unicode-16 */
static int
unicode16_to_utf8(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	const uint16_t	*inp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	inp = (const uint16_t *)(const void *)in;
	for (outp = out ; (size_t)(inp - (const uint16_t *)(const void *)in) < insize && (size_t)(outp - out) < outsize; inp++) {
		switch(RUNELEN(*inp)) {
		case 1: 
			*outp++ = (unsigned char)(*inp & 0x7f);
			break;
		case 2: 
			*outp++ = (0xc0 | ((unsigned)(*inp & 0x07c0) >> 6));
			*outp++ = (0x80 | (*inp & 0x003f));
			break;
		case 3:
			*outp++ = 0xe0 | ((unsigned)(*inp & 0xf000) >> 12);  
			*outp++ = (0x80 | ((unsigned)(*inp & 0x0fc0) >> 6));
			*outp++ = (0x80 | (*inp & 0x003f));
			break;
		default:
			break;
		}
        }
	return (int)(outp - out);
}

/* translate fortran carriage control characters */
static int
asa(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	const char	*inp;
	char		*out = vp;
	char		*outp;

	USE_ARG(op);
	for (inp = in, outp = out ; *inp && (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize; ) {
		switch (*inp) {
		case '\n':
			switch(*(inp + 1)) {
			case ' ':
				*outp++ = *inp;
				inp += 2;
				break;
			case '0':
				*outp++ = '\n';
				inp += 2;
				break;
			case '1':
				*outp++ = '\f';
				inp += 2;
				break;
			case '+':
				*outp++ = '\r';
				inp += 2;
				break;
			default:
				*outp++ = *inp++;
				break;
			}
			break;
		default:
			*outp++ = *inp++;
			break;
		}
	}
	*outp = 0x0;
	return (int)(outp - out);
}

/* get a number from a substring */
static int
re2num(const char *s, int64_t from, int64_t to)
{
	int64_t	i;
	size_t	ret;
	int	sign;

	if ((i = from) < 0) {
		return 0;
	}
	sign = 1;
	if (s[(int)i] == '-') {
		sign = -1;
	}
	for (ret = 0 ; i < to ; i++) {
		if (isdigit((uint8_t)s[(int)i])) {
			ret = (ret * 10) + (s[(int)i] - '0');
		}
	}
	return sign * ret;
}

/* get a substring of the current string */
static int
substring(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	regmatch_t	 matchv[10];
	regex_t	 	 r;
	char		*out = vp;
	char		*outp;
	int		 from;
	int		 to;
	int		 i;

	/* XXX - find in translations table */
	(void) regcomp(&r, "^substring:([-]?[0-9]+):([-]?[0-9]+)", REG_EXTENDED);
	(void) regexec(&r, op, 10, matchv, 0);
	if ((from = re2num(op, matchv[1].rm_so, matchv[1].rm_eo)) < 0) {
		/* makes no sense */
		(void) fprintf(stderr, "weird 'from' substring index, assuming 0\n");
		from = 0;
	}
	if ((to = re2num(op, matchv[2].rm_so, matchv[2].rm_eo)) < 0) {
		/* adapt to length of input string */
		to = (int)insize;
	}
	for (outp = out, i = from ; (size_t)(outp - out) < outsize && i < to; i++) {
		*outp++ = in[i];
	}
	*outp = 0x0;
	return (int)(outp - out);
}

/* the base 85 functions are taken from: http://www.stillhq.com/svn/trunk/ascii85/ */
/* the source code is in the public domain */

/* utility routine to output a base 85 character */
static void
b85_write_char(char **outp, char **linep, const uint8_t ch)
{
	*(*outp)++ = ch;
	if ((int)(*outp - *linep) >= LINELEN - 8) {
		*(*outp)++ = '\n';
		*linep = *outp;
	}
}

/* output code for base85 encoding */
static void
b85enc_out(char **outp, char **linep, unsigned tmp, int n)
{
	char	 buf[5];
	char	*bufp;
	int	 i;

	for (i = 0, bufp = buf ; i < 5 ; i++) {
		*bufp++ = (tmp % 85);
		tmp /= 85;
	}
	for (i = n ; i > 0 ; --i) {
		b85_write_char(outp, linep, *--bufp + '!');
	}
}

/* base85 encoding - 1.25 growth ratio, 'z' compression support, used in PDF */
static int
base85encode(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	const char	*inp;
	uint32_t	 tmp;
	char		*out = vp;
	char		*linep;
	char		*outp;
	int		 nbytes;

	USE_ARG(op);
	linep = outp = out;
	*outp++ = '<';
	*outp++ = '~';
	tmp = 0; nbytes = 0;
	for (inp = in ; (size_t)(inp - in) <= insize && (size_t)(outp - out) < outsize - 2; ) {
		switch(nbytes++) {
		case 0:
			tmp |= ((*inp++ & 0xff) << 24);
			break;
		case 1:
			tmp |= ((*inp++ & 0xff) << 16);
			break;
		case 2:
			tmp |= ((*inp++ & 0xff) << 8);
			break;
		default:
			tmp |= (*inp++ & 0xff);
			b85enc_out(&outp, &linep, tmp, 5);
			tmp = 0;
			nbytes = 0;
			break;
		}
	}
	if (nbytes > 0) {
		b85enc_out(&outp, &linep, tmp, nbytes);
	}
	b85_write_char(&outp, &linep, '~');
	b85_write_char(&outp, &linep, '>');
	b85_write_char(&outp, &linep, '\n');
	return (int)(outp - out);
}

/* output char for char decode */
static void
b85dec_char(uint8_t **outp, uint32_t tmp, int count)
{
	switch (count) {
	case 4:
		*(*outp)++ = (tmp >> 24) & 0xff;
		*(*outp)++ = (tmp >> 16) & 0xff;
		*(*outp)++ = (tmp >> 8) & 0xff;
		*(*outp)++ = tmp & 0xff;
		break;
	case 3:
		*(*outp)++ = (tmp >> 24) & 0xff;
		*(*outp)++ = (tmp >> 16) & 0xff;
		*(*outp)++ = (tmp >> 8) & 0xff;
		break;
	case 2:
		*(*outp)++ = (tmp >> 24) & 0xff;
		*(*outp)++ = (tmp >> 16) & 0xff;
		break;
	case 1:
		*(*outp)++ = (tmp >> 24) & 0xff;
		break;
	}
}

/* base85 decoding */
static int
base85decode(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	static const unsigned	 pow85[] = { 85*85*85*85, 85*85*85, 85*85, 85, 1 };
	const char		*inp;
	uint32_t		 tmp;
	uint8_t			 ch;
	uint8_t			*out = vp;
	uint8_t			*outp;
	int			 nbytes;

	USE_ARG(op);
	tmp = 0;
	nbytes = 0;
	for (inp = &in[2], outp = out ; (size_t)(inp - in) <= insize && (size_t)(outp - out) < outsize; ) {
		switch(ch = *inp++) {
		case '\n':
			break;
		case 'z':
			b85dec_char(&outp, 0, 4);
			break;
		case '~':
			if (*inp == '\n') {
				inp += 1;
			}
			if (*inp == '>') {
				if (nbytes == 0) {
					outp -= 1;
				} else {
					tmp += pow85[nbytes - 1];
					b85dec_char(&outp, tmp, nbytes - 1);
				}
				return (int)(outp - out);
			}
			break;
		default:
			tmp += (ch - '!') * pow85[nbytes++];
			if (nbytes == 5) {
				b85dec_char(&outp, tmp, 4);
				nbytes = 0;
				tmp = 0;
			}
			break;
		}
	}
	return (int)(outp - out);
}

/* perform sed commands on the input */
static int
sed(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	regmatch_t	 matchv[10];
	const char	*sep;
	const char	*inp;
	regex_t		 r;
	char		 pat[512];
	char		*out = (char *)vp;
	char		*outp;
	char		*from;
	char		*to;
	int		 fromlen;
	int		 tolen;

	(void) memset(&r, 0x0, sizeof(r));
	for (sep = &op[3] ; *sep && *sep != ':' ; sep++) {
	}
	/* jump over 's' - next non-whitespace is the separator */
	for (sep += 2; *sep && (*sep == ' ' || *sep == '\t') ; sep++) {
	}
	if (*sep == 0x0) {
		(void) fprintf(stderr, "bad regexp '%s'\n", op);
		return 0;
	}
	if (sep[strlen(sep) - 1] == *sep) {
		(void) snprintf(pat, sizeof(pat), "^%c(.+)%c(.*)%c$", *sep, *sep, *sep);
	} else {
		(void) snprintf(pat, sizeof(pat), "^%c(.+)%c(.*)$", *sep, *sep);
	}
	if (regcomp(&r, pat, REG_EXTENDED) != 0 ||
	    regexec(&r, sep, 10, matchv, 0) != 0) {
		(void) fprintf(stderr, "bad regexec '%s'\n", pat);
		return 0;
	}
	regfree(&r);
	fromlen = (int)(matchv[1].rm_eo - matchv[1].rm_so);
	if ((from = calloc(1, (unsigned)(fromlen + 1))) == NULL) {
		(void) fprintf(stderr, "bad calloc 1\n");
		return 0;
	}
	(void) memcpy(from, &sep[(int)matchv[1].rm_so], (unsigned)fromlen);
	tolen = (int)(matchv[2].rm_eo - matchv[2].rm_so);
	if ((to = calloc(1, (unsigned)(tolen + 1))) == NULL) {
		free(from);
		(void) fprintf(stderr, "bad calloc 2\n");
		return 0;
	}
	(void) memcpy(to, &sep[(int)matchv[2].rm_so], (unsigned)tolen);
	if (regcomp(&r, from, REG_EXTENDED) != 0) {
		free(from);
		free(to);
		(void) fprintf(stderr, "bad regexp '%s'\n", from);
		return 0;
	}
	for (inp = in, outp = out ; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize; ) {
		if (regexec(&r, inp, 10, matchv, 0) != 0) {
			break;
		}
		/* copy from inp to matchv[0].rm_so into outp and advance */
		(void) memcpy(outp, inp, (size_t)matchv[0].rm_so);
		inp += (int)matchv[0].rm_eo;
		outp += (int)matchv[0].rm_eo - 1;
		/* do substitution */
		(void) memcpy(outp, to, (unsigned)tolen);
		outp += tolen;
	}
	/* copy from inp to end into outp and advance */
	(void) memcpy(outp, inp, (size_t)(insize - (size_t)(inp - in)));
	outp += (size_t)(insize - (size_t)(inp - in));
	regfree(&r);
	free(from);
	free(to);
	return (int)(outp - out);
}

/* hand edit the input */
static int
edit(const char *in, const size_t insize, const char *op, void *vp, size_t outsize)
{
	const char	*editor;
	const char	*tmpdir;
	char		*out = (char *)vp;
	char		*argv[10];
	char		 tmppath[MAXPATHLEN];
	int		 status;
	int		 pid;
	int		 cc;
	int		 wc;
	int		 fd;

	USE_ARG(op);
	if ((tmpdir = getenv("TMPDIR")) == NULL) {
		tmpdir = "/tmp";
	}
	(void) snprintf(tmppath, sizeof(tmppath), "%s/codecs.XXXXXX", tmpdir);
	if ((fd = mkstemp(tmppath)) < 0) {
		(void) fprintf(stderr, "bad temp file '%s'\n", tmppath);
		return 0;
	}
	for (cc = 0 ; (wc = write(fd, &in[cc], insize - cc)) > 0 ; cc += wc) {
	}
	if ((editor = getenv("VISUAL")) == NULL) {
		editor = getenv("EDITOR");
	}
	if (editor == NULL) {
		editor = "vi";
	}
	argv[0] = __UNCONST(editor);
	argv[1] = tmppath;
	argv[2] = NULL;
	switch(pid = fork()) {
	case -1:
		break;
	case 0:
		/* child */
		execv(editor, argv);
		break;
	default:
		while (wait(&status) != pid) {
		}
		break;
	}
	(void) lseek(fd, 0, SEEK_SET);
	for (cc = 0 ; (wc = read(fd, &out[cc], outsize - cc)) > 0 ; cc += wc) {
	}
	(void) unlink(tmppath);
	(void) close(fd);
	return cc;
}

/* convert a number of seconds in 'in' to a string */
static int
secs2str(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	struct tm	*tm;
	time_t		 t;
	char		*out = (char *)vp;

	USE_ARG(insize);
	USE_ARG(op);
	t = (time_t) strtoll(in, NULL, 10);
	tm = localtime(&t);
	return snprintf(out, outsize, "%4.4d:%2.2d:%2.2d:%2.2d:%2.2d:%2.2d\n",
		tm->tm_year + 1900,
		tm->tm_mon + 1,
		tm->tm_mday,
		tm->tm_hour,
		tm->tm_min,
		tm->tm_sec);
}

/* convert a string to a number of seconds */
static int
str2secs(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	regmatch_t	 matchv[10];
	struct tm	 tm;
	struct tm	*tp;
	regex_t		 r;
	time_t		 t;
	char		*out = (char *)vp;

	USE_ARG(insize);
	USE_ARG(op);
	t = time(NULL);
	tp = localtime(&t);
	(void) memcpy(&tm, tp, sizeof(tm));
	(void) regcomp(&r,
		"([0-9]+):([0-9]+):([0-9]+):([0-9]+):([0-9]+):([0-9]+)",
		REG_EXTENDED);
	if (regexec(&r, in, 10, matchv, 0) == 0) {
		tm.tm_year = re2num(in, matchv[1].rm_so, matchv[1].rm_eo) - 1900;
		tm.tm_mon = re2num(in, matchv[2].rm_so, matchv[2].rm_eo) - 1;
		tm.tm_mday = re2num(in, matchv[3].rm_so, matchv[3].rm_eo);
		tm.tm_hour = re2num(in, matchv[4].rm_so, matchv[4].rm_eo);
		tm.tm_min = re2num(in, matchv[5].rm_so, matchv[5].rm_eo);
		tm.tm_sec = re2num(in, matchv[6].rm_so, matchv[6].rm_eo);
	}
	t = mktime(&tm);
	return snprintf(out, outsize, "%lld\n", (long long)t);
}

/* randomise insize bytes in out */
static int
randomise(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	uint32_t	 r;
	size_t		 i;
	char		*out = (char *)vp;

	USE_ARG(in);
	USE_ARG(insize);
	USE_ARG(op);
	for (i = 0 ; i < outsize ; i++) {
		r = random();
		(void) memcpy(&out[i], &r, MIN((unsigned)(outsize - i), sizeof(r)));
	}
	return outsize;
}

/* rotate characters in the alphabet */
static int
rotate(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	const char	*cp;
	size_t		 i;
	char		*out = (char *)vp;
	int		 rot;

	USE_ARG(outsize);
	rot = re2num(op, 3, strlen(op));
	for (i = 0 ; i < insize ; i++) {
		if ((cp = strchr(lowers, in[i])) != NULL) {
			out[i] = lowers[((int)(cp - lowers) + rot) % 26];
		} else if ((cp = strchr(uppers, in[i])) != NULL) {
			out[i] = uppers[((int)(cp - uppers) + rot) % 26];
		} else {
			out[i] = in[i];
		}
	}
	return insize;
}

static const char	*rad50 = " ABCDEFGHIJKLMNOPQRSTUVWXYZ$.%0123456789";

/* DEC RADIX-50 encoding */
static int
rad50encode(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	const char	*cp;
	uint16_t	 ch;
	size_t		 i;
	size_t		 o;
	char		*out = (char *)vp;

	USE_ARG(op);
	USE_ARG(outsize);
	for (i = 0, o = 0 ; i < insize ; i += 3, o += 2) {
		ch = 0;
		if ((cp = strchr(rad50, toupper((uint8_t)in[i]))) != NULL) {
			ch = (unsigned)(cp - rad50) * (40 * 40);
		}
		if ((cp = strchr(rad50, toupper((uint8_t)in[i + 1]))) != NULL) {
			ch += (unsigned)(cp - rad50) * 40;
		}
		if ((cp = strchr(rad50, toupper((uint8_t)in[i + 2]))) != NULL) {
			ch += (unsigned)(cp - rad50);
		}
		(void) memcpy(&out[o], &ch, sizeof(ch));
	}
	return (int)o;
}

/* DEC RADIX-50 decode */
static int
rad50decode(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	uint16_t	 ch;
	size_t		 i;
	size_t		 o;
	char		*out = (char *)vp;

	USE_ARG(op);
	for (i = 0, o = 0 ; i < insize && o < outsize ; i += 2, o += 3) {
		(void) memcpy(&ch, &in[i], sizeof(ch));
		out[o] = rad50[ch / (40 * 40)];
		ch %= (40 * 40);
		out[o + 1] = rad50[ch / 40];
		ch %= 40;
		out[o + 2] = rad50[ch];
	}
	return (int)o;
}

/* bin2hex encoding */
static int
bin2hex(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	size_t	 i;
	size_t	 o;
	char	*out = (char *)vp;

	USE_ARG(op);
	for (i = 0, o = 0 ; i < insize && o < outsize ; i++, o += 4) {
		(void) snprintf(&out[o], outsize - o, "\\x%.02x", in[i]);
	}
	return (int)o;
}

/* hex2bin encoding */
static int
hex2bin(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	size_t	 i;
	size_t	 o;
	char	*out = (char *)vp;

	USE_ARG(op);
	for (i = 0, o = 0 ; i < insize && o < outsize ; i += 4, o++) {
		out[o] = ((in[i + 2] - '0') * 16) + (in[i + 3] - '0');
	}
	return (int)o;
}

/* perform an MD5 digest */
static int
md5(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	USE_ARG(op);
	USE_ARG(outsize);
	(void) MD5Data((const uint8_t *)in, insize, vp);
	return MD5_DIGEST_STRING_LENGTH;
}

/* perform an RMD160 digest */
static int
rmd160(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	USE_ARG(op);
	USE_ARG(outsize);
	(void) RMD160Data((const uint8_t *)in, insize, vp);
	return RMD160_DIGEST_STRING_LENGTH;
}

/* perform an SHA1 digest */
static int
sha1(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	USE_ARG(op);
	USE_ARG(outsize);
	(void) SHA1Data((const uint8_t *)in, insize, vp);
	return SHA1_DIGEST_STRING_LENGTH;
}

/* perform an SHA256 digest */
static int
sha256(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	USE_ARG(op);
	USE_ARG(outsize);
	(void) SHA256_Data((const uint8_t *)in, insize, vp);
	return SHA256_DIGEST_STRING_LENGTH;
}

/* perform an SHA512 digest */
static int
sha512(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	USE_ARG(op);
	USE_ARG(outsize);
	(void) SHA512_Data((const uint8_t *)in, insize, vp);
	return SHA512_DIGEST_STRING_LENGTH;
}

/* find the size of the input */
static int
sizefunc(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	USE_ARG(in);
	USE_ARG(op);
	USE_ARG(outsize);
	return snprintf(vp, outsize, "%lld\n", (long long) insize);
}

/* bswap* */
static int
byteswap(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	char		*out = (char *)vp;

	USE_ARG(insize);
	USE_ARG(outsize);
	if (strcmp(op, "bswap16") == 0) {
		out[0] = in[1];
		out[1] = in[0];
		return 2;
	}
	if (strcmp(op, "bswap32") == 0) {
		out[0] = in[3];
		out[1] = in[2];
		out[2] = in[1];
		out[3] = in[0];
		return 4;
	}
	if (strcmp(op, "bswap64") == 0) {
		out[0] = in[7];
		out[1] = in[6];
		out[2] = in[5];
		out[3] = in[4];
		out[4] = in[3];
		out[5] = in[2];
		out[6] = in[1];
		out[7] = in[0];
		return 4;
	}
	return 0;
}

/* zero insize bytes in out */
static int
zero(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	char		*out = (char *)vp;

	USE_ARG(in);
	USE_ARG(insize);
	USE_ARG(op);
	(void) memset(out, 0x0, outsize);
	return outsize;
}

static const uint8_t	ascii2ebcdictab[] = {
	0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
	0x10, 0x11, 0x12, 0x13, 0x3C, 0x5a, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
	0x40, 0x5a, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
	0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
	0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xba, 0xE0, 0xbb, 0x5F, 0x6D,
	0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
	0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x6A, 0xD0, 0xA1, 0x07,
	0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
	0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xE1,
	0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
	0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
	0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E,
	0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
	0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
	0xDC, 0xDD, 0xDE, 0xDF, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
};

/* ASCII to IBM EBCDIC encoding */
static int
ascii2ebcdic(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	size_t	 i;
	char	*out = (char *)vp;

	USE_ARG(op);
	USE_ARG(outsize);
	for (i = 0 ; i < insize ; i++) {
		out[i] = ascii2ebcdictab[(uint8_t)in[i]];
	}
	return (int)i;
}

static const uint8_t	ebcdic2asciitab[] = {
/* 0 */		0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
/* 16 */	0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
/* 32 */	0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
/* 48 */	0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
/* 64 */	0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xD5, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
/* 80 */	0x26, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
/* 96 */	0x2D, 0x2F, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0x7c, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
/* 112 */	0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
/* 128 */	0xC3, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9,
/* 144 */	0xCA, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0,
/* 160 */	0xD1, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD2, 0xD3, 0xD4, 0x5B, 0xD6, 0xD7,
/* 176 */	0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0x5b, 0x5d, 0xE4, 0x5D, 0xE6, 0xE7,
/* 192 */	0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
/* 208 */	0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
/* 224 */	0x5C, 0x9F, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9,
/* 240 */	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
};

/* IBM EBCDIC to ASCII encoding */
static int
ebcdic2ascii(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	size_t	 i;
	char	*out = (char *)vp;

	USE_ARG(op);
	USE_ARG(outsize);
	for (i = 0 ; i < insize ; i++) {
		out[i] = ebcdic2asciitab[(uint8_t)in[i]];
	}
	return (int)i;
}

enum {
	MAX_SOUNDEX	= 4
};

/* calculate the soundex component for the input string */
static int
soundex(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	size_t	 i;
	size_t	 o;
	char	 ch;
	char	*out = (char *)vp;

	USE_ARG(op);
	USE_ARG(outsize);
	out[0] = tolower((unsigned char)*in);
	for (o = 1, i = 1 ; i < insize && o < MAX_SOUNDEX ; i++) {
		switch(tolower((unsigned char)in[i])) {
		case 'b':
		case 'f':
		case 'p':
		case 'v':
			ch = '1';
			break;
		case 'c':
		case 'g':
		case 'j':
		case 'k':
		case 'q':
		case 's':
		case 'x':
		case 'z':
			ch = '2';
			break;
		case 'd':
		case 't':
			ch = '3';
			break;
		case 'l':
			ch = '4';
			break;
		case 'm':
		case 'n':
			ch = '5';
			break;
		case 'r':
			ch = '6';
			break;
		default:
			continue;
		}
		if (out[o - 1] != ch) {
			out[o++] = ch;
		}
	}
	for ( ; o < MAX_SOUNDEX ; o++) {
		out[o] = '0';
	}
	out[o] = 0x0;
	return o;
}

/* the following metaphone code is from:
 *	http://aspell.net/metaphone/metaphone-kuhn.txt
 * as posted to Usenet By Michael Kuhn in 1995
 */

/*
consonants:
      B  X  S  K  J  T  F  H  L  M  N  P  R  0  W  Y

exceptions:
^ae
^gn
^kn
^pn
^wr	-> all drop first letter
^x	-> s
^wh	-> w

transforms:
b -> B (unless at end of word after 'm', in which case lose it)
c -> X (if cia or ch)
  -> S (if ci or ce or cy)
  -> drop if sci, sce or scy
  -> K otherwise
d -> J (if dge, dgy or dgi)
  -> T otherwise
f -> F
g -> drop if gh and not at end or before vowel
  -> drop if gn or gned
  -> drop if dge as above
  -> K otherwise
h -> drop if after vowel and no vowel follows
  -> drop if after ch, sh, ph, th or gh
  -> H otherwise
j -> J
k -> drop if after c
  -> K otherwise
l -> L
m -> M
n -> N
p -> F if in ph
  -> P otherwise
q -> K
r -> R
s -> X if before h, or in sio
  -> S otherise
t -> X if tia or tio
  -> 0 if before h
  -> drop if in tch
  -> T otherwise
v -> F
w -> drop if not followed by a vowel
  -> W otherwise
x -> KS
y -> drop if not followed by a vowel
  -> Y if followed by a vowel
z -> S
*/

typedef struct except_t {
	const char	*match;
	int		 len;
	const char	*xform;
} except_t;

static except_t	exceptions[] = {
	{	"ae",	2,	"Ie"	},
	{	"gn",	2,	"In"	},
	{	"kn",	2,	"In"	},
	{	"pn",	2,	"In"	},
	{	"wr",	2,	"Ir"	},
	{	"wh",	2,	"Iw"	},
	{	NULL,	0,	NULL	}
};

#define STRMATCH2(s1,ind,c1,c2)	\
	((s1)[ind] == (c1) && (s1)[ind + 1] == (c2))
#define STRMATCH3(s1,ind,c1,c2,c3)	\
	((s1)[ind] == (c1) && (s1)[ind + 1] == (c2) && (s1)[ind + 2] == (c3))

static int
metaphone(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	except_t	*ep;
	size_t		 chars;
	size_t		 i;
	size_t		 r;
	size_t		 o;
	char		*out = (char *)vp;
	char		*buf;

	USE_ARG(op);
	USE_ARG(outsize);
	/* copy to working storage, expanding for any 'x's */
	if ((buf = calloc(1, insize)) == NULL) {
		(void) fprintf(stderr, "bad alloc\n");
		return 0;
	}
	for (r = o = i = 0 ; r < insize ; i++, o++, r++) {
		if ((buf[o] = buf[i] = tolower((uint8_t)in[r])) == 'x') {
			buf[i++] = buf[o++] = 'k';
			buf[i] = buf[o] = 's';
		}
	}
	buf[i] = 0x0;
	chars = i;
	/* beginning exceptions first */
	for (o = i = 0, ep = exceptions ; ep->len ; ep++) {
		if (strncmp(buf, ep->match, (unsigned)ep->len) == 0) {
			buf[o++] = ep->xform[i++];
			buf[o++] = ep->xform[i++];
		}
	}
	/* main transforms */
	for ( ; i < chars ; i++, o++) {
		switch(tolower((unsigned char)buf[i])) {
		case 'a':
		case 'e':
		case 'i':
		case 'o':
		case 'u':
			buf[o] = 'I';
			break;
		case 'b':
			buf[o] = (i == chars - 1) ? 'I' : 'B';
			break;
		case 'c':
			if (STRMATCH2(buf, i + 1, 'i', 'a')) {
				buf[o] = 'X';
			} else if (strchr("iey", buf[i + 1]) != NULL) {
				buf[o] = 'S';
			} else if (STRMATCH3(buf, i - 1, 's', 'c', 'e') ||
				   STRMATCH3(buf, i - 1, 's', 'c', 'i') ||
				   STRMATCH3(buf, i - 1, 's', 'c', 'y')) {
				buf[o] = 'I';
			} else {
				buf[o] = 'K';
			}
			break;
		case 'd':
			if (STRMATCH2(buf, i + 1, 'g', 'e') ||
			    STRMATCH2(buf, i + 1, 'g', 'y') ||
			    STRMATCH2(buf, i + 1, 'g', 'i')) {
				buf[o] = 'J';
				buf[o] = 'I';
				o += 1;
				i += 1;
			} else {
				buf[o] = 'T';
			}
			break;
		case 'f':
		case 'j':
		case 'l':
		case 'm':
		case 'n':
		case 'r':
			buf[o] = toupper((unsigned char)buf[i]);
			break;
		case 'g':
			if (buf[i + 1] == 'h' &&
			    (i < chars - 2 || strchr("aeiou", buf[i + 2]) == NULL)) {
				buf[o] = 'I';
			} else if (STRMATCH3(buf, i + 1, 'n', 'e', 'd')) {
				buf[o] = 'I';
			} else {
				buf[o] = 'K';
			}
			break;
		case 'h':
			if (strchr("aeiou", buf[i - 1]) != NULL && 
			    strchr("aeiou", buf[i + 1]) == NULL) {
				buf[o] = 'I';
			} else if (strchr("csptg", buf[i - 1]) != NULL) {
				buf[o] = 'I';
			} else {
				buf[o] = 'K';
			}
			break;
		case 'k':
			buf[o] = (buf[i - 1] == 'c') ? 'I' : 'K';
			break;
		case 'p':
			buf[o] = (buf[i + 1] == 'h') ? 'F' : 'P';
			break;
		case 'q':
			buf[o] = 'K';
			break;
		case 's':
			if (buf[i + 1] == 'h' || STRMATCH3(buf, i, 's', 'i', 'o')) {
				buf[o] = 'X';
			} else {
				buf[o] = 'S';
			}
			break;
		case 't':
			if (STRMATCH2(buf, i + 1, 'i', 'a') ||
			    STRMATCH2(buf, i + 1, 'i', 'o')) {
				buf[o] = 'X';
			} else if (buf[i + 1] == 'h') {
				buf[o] = '0';
			} else if (STRMATCH2(buf, i + 1, 'c', 'h')) {
				buf[o] = 'I';
			} else {
				buf[o] = 'T';
			}
			break;
		case 'v':
			buf[o] = 'F';
			break;
		case 'w':
			if (strchr("aeiou", buf[i + 1]) == NULL) {
				buf[o] = 'I';
			} else {
				buf[o] = 'W';
			}
			break;
		case 'y':
			if (strchr("aeiou", buf[i + 1]) == NULL) {
				buf[o] = 'I';
			} else {
				buf[o] = 'Y';
			}
			break;
		case 'z':
			buf[o] = 'S';
			break;
		}
	}
	/* copy to output buffer */
	for (o = 0, i = 0 ; i < chars ; i++) {
		if ((out[o] = buf[i]) != 'I' && out[o] != out[o - 1]) {
			o += 1;
		}
	}
	out[o] = 0x0;
	free(buf);
	return o;
}

/* return the IP address (v4 and v6 if available) for a host */
static int
reverse_resolve(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	struct sockaddr_in6	*sin6;
	struct sockaddr_in	*sin;
	struct addrinfo		*res0;
	struct addrinfo		*res;
	char			*out = (char *)vp;
	int			 done[7];
	int			 o;

	USE_ARG(insize);
	USE_ARG(op);
	if (getaddrinfo(in, NULL, NULL, &res0) != 0) {
		(void) fprintf(stderr, "can't find host info for '%s'\n", in);
		return 0;
	}
	o = 0;
	done[4] = done[6] = 0;
	for (res = res0 ; res != NULL ; res = res->ai_next) {
		if (res->ai_family == AF_INET && !done[4]) {
			sin = (struct sockaddr_in *)(void *)res->ai_addr;
			o += snprintf(&out[o], outsize - o, "%s",
				inet_ntoa(sin->sin_addr));
			done[4] = 1;
		}
		if (res->ai_family == AF_INET6 && !done[6]) {
			sin6 = (struct sockaddr_in6 *)(void *)res->ai_addr;
			(void) getnameinfo((const struct sockaddr *)(void *)sin6,
				(unsigned)sin6->sin6_len,
				&out[o], outsize - o,
				NULL, 0, NI_NUMERICHOST);
			o += strlen(&out[o]);
			out[o++] = ' ';
			done[6] = 1;
		}
	}
	freeaddrinfo(res0);
	out[o] = 0x0;
	return o;
}

/* return the hostname for an IP address (v4 or v6) */
static int
resolve(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	struct sockaddr_in6	*sin6;
	struct sockaddr_in	*sin;
	struct addrinfo		 hints;
	struct addrinfo		*res0;
	struct addrinfo		*res;
	char			*out = (char *)vp;
	int			 o;

	USE_ARG(insize);
	USE_ARG(op);
	(void) memset(&hints, 0x0, sizeof(hints));
	hints.ai_flags = AI_NUMERICHOST;
	if (getaddrinfo(in, NULL, &hints, &res0) != 0) {
		(void) fprintf(stderr, "can't find host info for '%s'\n", in);
		return 0;
	}
	o = 0;
	for (res = res0 ; res != NULL ; res = res->ai_next) {
		if (res->ai_family == AF_INET) {
			sin = (struct sockaddr_in *)(void *)res->ai_addr;
			if (getnameinfo((const struct sockaddr *)(void *)sin,
					(unsigned)sin->sin_len,
					&out[o], outsize - o,
					NULL, 0, NI_NAMEREQD) == 0) {
				o = strlen(out);
				break;
			}
		}
		if (res->ai_family == AF_INET6) {
			sin6 = (struct sockaddr_in6 *)(void *)res->ai_addr;
			if (getnameinfo((const struct sockaddr *)(void *)sin6,
					(unsigned)sin6->sin6_len,
					&out[o], outsize - o,
					NULL, 0, NI_NAMEREQD) == 0) {
				o = strlen(out);
				break;
			}
		}
	}
	freeaddrinfo(res0);
	return o;
}

/* compress using zlib */
static int
zlib_compress(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	unsigned long	 size;
	uint64_t	 origsize;
	char		*out = (char *)vp;

	USE_ARG(op);
	size = outsize - sizeof(origsize);
	if (compress((uint8_t *)&out[sizeof(origsize)], &size,
		(const uint8_t *)in, (unsigned long)insize) != Z_OK) {
		return 0;
	}
	origsize = (uint64_t)insize;
	(void) memcpy(out, &origsize, sizeof(origsize));
	return size + sizeof(origsize);
}

/* decompress using zlib */
static int
zlib_decompress(const char *in, const size_t insize, const char *op, void *vp,
	size_t outsize)
{
	unsigned long	 size;
	uint64_t	 origsize;

	USE_ARG(op);
	size = outsize - sizeof(origsize);
	(void) memcpy(&origsize, in, sizeof(origsize));
	if (uncompress(vp, &size, (const uint8_t *)&in[sizeof(origsize)],
		(unsigned long)(insize - sizeof(origsize))) != Z_OK) {
		return 0;
	}
	return (int)origsize;
}

/* support functions from here */

/* return the value which is guaranteed to hold the output */
static int
multiplier(const char *mult, size_t insize)
{
	static regex_t	r;
	regmatch_t	matchv[5];
	static int	compiled;

	if (!compiled) {
		compiled = 1;
		(void) regcomp(&r, "^([0-9]+)%([-+][0-9]+)?", REG_EXTENDED);
	}
	if (regexec(&r, mult, 5, matchv, 0) != 0) {
		(void) fprintf(stderr, "bad multiplier '%s'\n", mult);
		return 0;
	}
	return ((re2num(mult, matchv[1].rm_so, matchv[1].rm_eo) * insize) / 100) +
		re2num(mult, matchv[2].rm_so, matchv[2].rm_eo);
}

/* find the operation to perform */
static codec_t *
findop(codecs_t *codecs, const char *op, const unsigned matchc,
	regmatch_t *matchv)
{
	unsigned	 i;
	codec_t		*codec;

	for (codec = codecs->v, i = 0 ; op && i < codecs->c ; i++, codec++) {
		if (regexec(codec->r, op, matchc, matchv, 0) == 0) {
			return codec;
		}
	}
	return NULL;
}

/* add format-related transformations */
static void
codecs_add_format(codecs_t *codecs)
{
	codecs_add(codecs, "^asa", asa, "110%", 1);
	codecs_add(codecs, "^bswap(16|32|64)", byteswap, "0%+8", 1);
	codecs_add(codecs, "^(dos( to |2)unix)", dos2unix, "50%", 1);
	codecs_add(codecs, "^hexdump", hexdump, "500%+80", 1);
	codecs_add(codecs, "^rot[0-9]+", rotate, "100%", 1);
	codecs_add(codecs, "^secs2str", secs2str, "100%+30", 1);
	codecs_add(codecs, "^str2secs", str2secs, "100%+30", 1);
}

/* add charset-related transformations */
static void
codecs_add_charset(codecs_t *codecs)
{
	codecs_add(codecs, "^ascii2ebcdic", ascii2ebcdic, "100%", 1);
	codecs_add(codecs, "^(atob|b(ase)?64[ ]*decode|from([-_ ])?base64)",
			xlate_b64decode, "80%", 1);
	codecs_add(codecs, "^(btoa|b(ase)?64[ ]*encode|to([-_ ])?base64)",
			xlate_b64encode, "150%", 1);
	codecs_add(codecs, "^b(ase)?85decode", base85decode, "90%", 1);
	codecs_add(codecs, "^b(ase)?85encode", base85encode, "150%", 1);
	codecs_add(codecs, "^bin2hex", bin2hex, "400%+10", 1);
	codecs_add(codecs, "^(capitali[sz]e|(to([-_ ])?|2)upper)",
			mixed2upper, "100%", 1);
	codecs_add(codecs, "^ebcdic2ascii", ebcdic2ascii, "100%", 1);
	codecs_add(codecs, "^from([-_ ])?(uri|html)", uri2ascii, "100%", 1);
	codecs_add(codecs, "^full(([-_ ])?)?uuencode", full_uuencode,
			"150%+30", 1);
	codecs_add(codecs, "^hex2bin", hex2bin, "25% + 10", 1);
	codecs_add(codecs, "^metaphone", metaphone, "100%", 1);
	codecs_add(codecs, "^rad50decode", rad50decode, "140%", 1);
	codecs_add(codecs, "^rad50encode", rad50encode, "80%", 1);
	codecs_add(codecs, "^soundex", soundex, "0%+4", 1);
	codecs_add(codecs, "^strvis([-_ ])?c", xlate_strvis_c, "300%", 1);
	codecs_add(codecs, "^strunvis([-_ ])?c", xlate_strunvis_c, "33%", 1);
	codecs_add(codecs, "^strunvis", xlate_strunvis_def, "33%", 1);
	codecs_add(codecs, "^strvis", xlate_strvis_def, "300%", 1);
	codecs_add(codecs, "^(to([-_ ])?|2)(html|uri)", ascii2uri, "300%", 1);
	codecs_add(codecs, "^(to([-_ ])?|2)lower", mixed2lower, "100%", 1);
	codecs_add(codecs, "^(to([-_ ])?|2)(runes|unicode)",
			utf8_to_unicode16, "150%", 1);
	codecs_add(codecs, "^(to([-_ ])?|2)upper", mixed2upper, "100%", 1);
	codecs_add(codecs, "^(to([-_ ])?|2)utf8", unicode16_to_utf8, "150%", 1);
	codecs_add(codecs, "^(unix(to|2)dos)", unix2dos, "200%", 1);
	codecs_add(codecs, "^uudecode", uudecode, "80%", 1);
	codecs_add(codecs, "^uuencode", uuencode, "200%", 1);
}

/* add compression-related transformations */
static void
codecs_add_compression(codecs_t *codecs)
{
	codecs_add(codecs, "^compress|gzip", zlib_compress, "110%+8", 1);
	codecs_add(codecs, "^decompress|gunzip", zlib_decompress, "900%", 1);
}

/* add edit-related transformations */
static void
codecs_add_edit(codecs_t *codecs)
{
	codecs_add(codecs, "^edit",  edit,   "300%", 1);
	codecs_add(codecs, "^sed", sed, "300%", 1);
	codecs_add(codecs, "^substring:([-]?[0-9]+):([-]?[0-9]+)",
			substring, "100%", 1);
}

/* add network-related transformations */
static void
codecs_add_network(codecs_t *codecs)
{
	codecs_add(codecs, "^(gethostinfo|resolve)", resolve, "0%+1024", 1);
	codecs_add(codecs, "^(getipaddress|rev[-_]?resolve|reverse[-_]?resolve)",
			reverse_resolve, "0%+1024", 1);
}

/* add digest-related transformations */
static void
codecs_add_digest(codecs_t *codecs)
{
	codecs_add(codecs, "^md5", md5, "0%+33", 1);
	codecs_add(codecs, "^rmd160", rmd160, "0%+41", 1);
	codecs_add(codecs, "^sha1", sha1, "0%+41", 1);
	codecs_add(codecs, "^sha256", sha256, "0%+65", 1);
	codecs_add(codecs, "^sha512", sha512, "0%+129", 1);
	codecs_add(codecs, "^size", sizefunc, "0%+129", 1);
}

/* add fill-related transformations */
static void
codecs_add_fill(codecs_t *codecs)
{
	codecs_add(codecs, "^randomi[sz]e", randomise, "100%", 0);
	codecs_add(codecs, "^zero", zero, "100%", 0);
}

/* add hash-related transformations */
static void
codecs_add_hash(codecs_t *codecs)
{
	codecs_add(codecs, "^(dumb|dumbmul|lennart|crc|perl|perlxor|python|"
		"mouse|benstein|honeyman|pjw|bob|torek|byacc|tcl|gawk|"
		"gcc3_|gcc3_2_|nem)hash", hashfunc, "0%+4", 1);
}

/****************************************************************************/
/* exported functions start here */
/****************************************************************************/

/* return the number of bytes to hold the output */
int
codecs_size(codecs_t *codecs, const char *op, const unsigned insize)
{
	regmatch_t	 matchv[10];
	codec_t		*xp;

	if (op == NULL) {
		(void) fprintf(stderr, "no operation\n");
		return 0;
	}
	if (codecs->size == 0) {
		codecs_begin(codecs, "all", NULL);
	}
	if ((xp = findop(codecs, op, 10, matchv)) == NULL) {
		(void) fprintf(stderr, "no translation '%s' found\n", op);
		return 0;
	}
	return multiplier(xp->multiplier, insize);
}

/* return an indication of whether an input buffer is needed */
int
codecs_input_needed(codecs_t *codecs, const char *op)
{
	regmatch_t	 matchv[10];
	codec_t		*xp;

	if (op == NULL) {
		(void) fprintf(stderr, "no operation\n");
		return 0;
	}
	if (codecs->size == 0) {
		codecs_begin(codecs, "all", NULL);
	}
	if ((xp = findop(codecs, op, 10, matchv)) == NULL) {
		(void) fprintf(stderr, "no translation '%s' found\n", op);
		return 0;
	}
	return xp->inputneeded;
}

/* translate input to output - separate arrays */
int
codecs_transform(codecs_t *codecs, const char *in, const size_t len, const char *op,
	void *vp, size_t size)
{
	regmatch_t	 matchv[10];
	codec_t		*xp;
	char		*out = (char *)vp;

	if (codecs->size == 0) {
		codecs_begin(codecs, "all", NULL);
	}
	if ((xp = findop(codecs, op, 10, matchv)) == NULL) {
		(void) fprintf(stderr, "no translation '%s' found\n", op);
		return 0;
	}
	if (in == NULL && xp->inputneeded) {
		(void) fprintf(stderr, "NULL input buffer\n");
		return -1;
	}
	return (*xp->func)(in, len, op, out, size);
}

/* translate input to output - dynamically-sized output array */
int
codecs_alloc_transform(codecs_t *codecs, const char *in, const size_t len, const char *op,
		void *vp, size_t *size)
{
	regmatch_t	 matchv[10];
	codec_t		*xp;
	char		**dyn = (char **)vp;

	if (codecs->size == 0) {
		codecs_begin(codecs, "all", NULL);
	}
	if ((xp = findop(codecs, op, 10, matchv)) == NULL) {
		(void) fprintf(stderr, "no translation '%s' found\n", op);
		return -1;
	}
	if (in == NULL && xp->inputneeded) {
		(void) fprintf(stderr, "NULL input buffer\n");
		return -1;
	}
	*size = multiplier(xp->multiplier, (const unsigned)len);
	if ((*dyn = calloc(1, *size)) == NULL) {
		(void) fprintf(stderr, "bad alloc (%zu)\n", *size);
		return -1;
	}
	return (*xp->func)(in, (const unsigned)len, op, *dyn, *size);
}

/* translate input to output - in place */
int
codecs_inplace_transform(codecs_t *codecs, void *vp, int len, const char *op)
{
	size_t	 outsize;
	char	*out;
	char	*in = (char *)vp;
	int	 ret;

	if (in == NULL) {
		(void) fprintf(stderr, "NULL input buffer\n");
		return -1;
	}
	if (codecs->size == 0) {
		codecs_begin(codecs, "all", NULL);
	}
	if ((ret = codecs_alloc_transform(codecs, in, (const unsigned)len, op, &out, &outsize)) < 0) {
		return -1;
	}
	(void) memcpy(in, out, (size_t)ret);
	in[ret] = 0x0;
	free(out);
	return ret;
}

/* return non-zero if this is a valid operation */
int
codecs_valid_op(codecs_t *codecs, const char *op)
{
	regmatch_t	 matchv[10];

	if (codecs->size == 0) {
		codecs_begin(codecs, "all", NULL);
	}
	return (findop(codecs, op, 10, matchv) != NULL);
}

/* add the function to the table */
int
codecs_add(codecs_t *codecs, const char *op,
	int (*func)(const char *, const size_t, const char *, void *, size_t),
	const char *mult, const int inputneeded)
{
	unsigned	 newsize;
	codec_t		*codec;

	if (codecs->locked) {
		(void) fprintf(stderr, "codecs table is locked\n");
		return 0;
	}
	newsize = 0;
	if (codecs->c == 0) {
		newsize = 63;
		codec = calloc(codecs->size, sizeof(*codecs->v));
	} else if (codecs->c == codecs->size) {
		newsize += 31;
		codec = realloc(codecs->v,
				codecs->size * sizeof(*codecs->v));
	}
	if (newsize) {
		if (codec == NULL) {
			(void) fprintf(stderr, "can't add codec '%s'\n", op);
			return 0;
		}
		codecs->size = newsize;
		codecs->v = codec;
	}
	codec = &codecs->v[codecs->c++];
	if ((codec->op = strdup(op)) == NULL ||
	    (codec->r = calloc(1, sizeof(*codec->r))) == NULL ||
	    (codec->multiplier = strdup(mult)) == NULL) {
		(void) fprintf(stderr, "can't allocate space for codec\n");
		return 0;
	}
	if (regcomp(codec->r, op, REG_EXTENDED) != 0) {
		(void) fprintf(stderr, "can't compile regexp '%s'\n", op);
		return 0;
	}
	codec->func = func;
	codec->inputneeded = inputneeded;
	return 1;
}

/* load the table */
int
codecs_begin(codecs_t *codecs, const char *subset, ...)
{
	va_list	args;

	if (codecs->locked) {
		(void) fprintf(stderr, "codecs table is locked\n");
		return 0;
	}
	va_start(args, subset);
	while (subset != NULL) {
		if (strcmp(subset, "charset") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_charset(codecs);
		}
		if (strcmp(subset, "compression") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_compression(codecs);
		}
		if (strcmp(subset, "digest") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_digest(codecs);
		}
		if (strcmp(subset, "fill") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_fill(codecs);
		}
		if (strcmp(subset, "format") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_format(codecs);
		}
		if (strcmp(subset, "edit") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_edit(codecs);
		}
		if (strcmp(subset, "hash") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_hash(codecs);
		}
		if (strcmp(subset, "network") == 0 || strcmp(subset, "all") == 0) {
			codecs_add_network(codecs);
		}
		subset = va_arg(args, char *);
	}
	va_end(args);
	return 1;
}

/* placeholder */
int
codecs_end(codecs_t *codecs)
{
	USE_ARG(codecs);
	return 1;
}

/* make sure no changes can happen to the table */
int
codecs_lockdown(codecs_t *codecs)
{
	time_t	t;

	if (codecs->locked) {
		(void) fprintf(stderr, "codecs table is locked\n");
		return 0;
	}
	t = time(NULL);
	codecs->locked = t;
	return 1;
}
