/*
 * GNU m4 -- A simple macro processor
 * Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 1, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * Code for all builtin macros, initialisation of symbol table, and
 * expansion of user defined macros.
 */

#include "m4.h"
#include "regex.h"

#define ARG(i)	(argc > (i) ? TOKEN_DATA_TEXT(argv[i]) : "")


/*
 * Initialisation of builtin and predefined macros.  The table
 * "builtin_tab" is both used for initialisation, and by the "builtin"
 * builtin.
 */
static void m4___file__();
static void m4___line__();
static void m4_builtin();
static void m4_changecom();
static void m4_changequote();
static void m4_debugmode();
static void m4_debugfile();
static void m4_decr();
static void m4_define();
static void m4_defn();
static void m4_divert();
static void m4_divnum();
static void m4_dnl();
static void m4_dumpdef();
static void m4_errprint();
static void m4_esyscmd();
static void m4_eval();
static void m4_format();
static void m4_ifdef();
static void m4_ifelse();
static void m4_include();
static void m4_incr();
static void m4_index();
static void m4_indir();
static void m4_len();
static void m4_m4exit();
static void m4_m4wrap();
static void m4_maketemp();
static void m4_patsubst();
static void m4_popdef();
static void m4_pushdef();
static void m4_regexp();
static void m4_shift();
static void m4_sinclude();
static void m4_substr();
static void m4_syscmd();
static void m4_sysval();
static void m4_traceoff();
static void m4_traceon();
static void m4_translit();
static void m4_undefine();
static void m4_undivert();
void m4_test();

static builtin
builtin_tab[] = {

    /* name		gnu	macro-args	function */

    { "__file__",	true,	false,		m4___file__ },
    { "__line__",	true,	false,		m4___line__ },
    { "builtin",	true,	false,		m4_builtin },
    { "changecom",	false,	false,		m4_changecom },
    { "changequote",	false,	false,		m4_changequote },
    { "debugmode",	true,	false,		m4_debugmode },
    { "debugfile",	true,	false,		m4_debugfile },
    { "decr",		false,	false,		m4_decr },
    { "define",		false,	true,		m4_define },
    { "defn",		false,	false,		m4_defn },
    { "divert",		false,	false,		m4_divert },
    { "divnum",		false,	false,		m4_divnum },
    { "dnl",		false,	false,		m4_dnl },
    { "dumpdef",	false,	false,		m4_dumpdef },
    { "errprint",	false,	false,		m4_errprint },
    { "esyscmd",	true,	false,		m4_esyscmd },
    { "eval",		false,	false,		m4_eval },
    { "format",		true,	false,		m4_format },
    { "ifdef",		false,	false,		m4_ifdef },
    { "ifelse",		false,	false,		m4_ifelse },
    { "include",	false,	false,		m4_include },
    { "incr",		false,	false,		m4_incr },
    { "index",		false,	false,		m4_index },
    { "indir",		true,	false,		m4_indir },
    { "len",		false,	false,		m4_len },
    { "m4exit",		false,	false,		m4_m4exit },
    { "m4wrap",		false,	false,		m4_m4wrap },
    { "maketemp",	false,	false,		m4_maketemp },
    { "patsubst",	true,	false,		m4_patsubst },
    { "popdef",		false,	false,		m4_popdef },
    { "pushdef",	false,	true,		m4_pushdef },
    { "regexp",		true,	false,		m4_regexp },
    { "shift",		false,	false,		m4_shift },
    { "sinclude",	false,	false,		m4_sinclude },
    { "substr"	,	false,	false,		m4_substr },
    { "syscmd",		false,	false,		m4_syscmd },
    { "sysval",		false,	false,		m4_sysval },
    { "traceoff",	false,	false,		m4_traceoff },
    { "traceon",	false,	false,		m4_traceon },
    { "translit",	false,	false,		m4_translit },
    { "undefine",	false,	false,		m4_undefine },
    { "undivert",	false,	false,		m4_undivert },

    { 0, false,	false, 0 },
};

static predefined
predefined_tab[] = {
    { "unix",	"__unix__",	"" },
    { nil,	"__gnu__",	"" },

    { nil,	nil,		nil },
};

/* The number of the currently active diversion */
static int current_diversion;


/*
 * Find the builtin, which lives on ADDR
 */

builtin *
find_builtin_by_addr(func)
    builtin_func *func;
{
    builtin *bp;

    for (bp = &builtin_tab[0]; bp->name != nil; bp++)
	if (bp->func == func)
	    return bp;
    return nil;
}

/*
 * Find the builtin, which has NAME
 */

builtin *
find_builtin_by_name(name)
    char *name;
{
    builtin *bp;

    for (bp = &builtin_tab[0]; bp->name != nil; bp++)
	if (strcmp(bp->name, name) == 0)
	    return bp;
    return nil;
}


/*
 * Install a builtin macro with name NAME, bound to the C function given
 * in BP.  MODE is SYMBOL_INSERT or SYMBOL_PUSHDEF.  TRACED defines
 * wheather NAME is to be traced.
 */
static void
define_builtin(name, bp, mode, traced)
    char *name;
    builtin *bp;
    symbol_lookup mode;
    boolean traced;
{
    symbol *sym;

    sym = lookup_symbol(name, mode);
    SYMBOL_TYPE(sym) = TOKEN_FUNC;
    SYMBOL_MACRO_ARGS(sym) = bp->groks_macro_args;
    SYMBOL_FUNC(sym) = bp->func;
    SYMBOL_TRACED(sym) = traced;
}

/*
 * Define a predefined or user-defined macro, with name NAME, and
 * expansion TEXT.  MODE destinguishes between the "define" and the
 * "pushdef" case.  It is also used from main().
 */
void
define_user_macro(name, text, mode)
    char *name;
    char *text;
    symbol_lookup mode;
{
    symbol *s;

    s = lookup_symbol(name, mode);
    if (SYMBOL_TYPE(s) == TOKEN_TEXT)
	xfree(SYMBOL_TEXT(s));

    SYMBOL_TYPE(s) = TOKEN_TEXT;
    SYMBOL_TEXT(s) = xstrdup(text);
}

/*
 * Initialise all builtin and predefined macros.
 */
void
builtin_init()
{
    builtin *bp;
    predefined *pp;

    for (bp = &builtin_tab[0]; bp->name != nil; bp++) {
	if (!(no_gnu_extensions && bp->gnu_extension))
	    define_builtin(bp->name, bp, SYMBOL_INSERT, false);
    }

    for (pp = &predefined_tab[0]; pp->func != nil; pp++) {
	if (no_gnu_extensions) {
	    if (pp->unix_name != nil)
		define_user_macro(pp->unix_name, pp->func, SYMBOL_INSERT);
	} else {
	    if (pp->gnu_name != nil)
		define_user_macro(pp->gnu_name, pp->func, SYMBOL_INSERT);
	}
    }

    current_diversion = 0;
}

/*
 * Give friendly warnings if a builtin macro is passed an inappropriate
 * number of arguments.  NAME is macro name for messages, ARGC is actual
 * number of arguments, MIN is minimum number of acceptable arguments,
 * negative if applicable, MAX is maximum number, negative if in not
 * applicable.
 */
static boolean
bad_argc(name, argc, min, max)
    char *name;
    int argc;
    int min;
    int max;
{
    if (min > 0 && argc < min) {
	warning("too few arguments to %s", name);
	return true;
    } else if (max > 0 && argc > max)
	warning("excess arguments to %s ignored", name);
    return false;
}

/*
 * The function numeric_arg() converts ARG to an int pointed to by
 * VALUEP.  If the conversion fails, print error message for macro
 * MACRO.   Return true iff conversion succeeds.
 *
 */
static boolean
numeric_arg(macro, arg, valuep)
    char *macro;
    char *arg;
    int *valuep;
{
    if (sscanf(arg, "%d", valuep) != 1) {
	error("non-numeric argument to %s", macro);
	return false;
    }
    return true;
}

/*
 * The function ntoa() converts VALUE to an ascii representation in
 * radix RADIX.  The representation is always unsigned, unless the radix
 * is 10.
 */

/* digits for number to ascii conversions. */
static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";

static char *
ntoa(value, radix)
    register int value;
    int radix;
{
    unsigned int uvalue;
    static char str[256];
    register char *s = &str[sizeof str];
    boolean negative = false;

    *--s = '\0';

    if (radix == 10 && value < 0) {
	int tmp;

	negative = true;
	value = -(value+1);

	tmp = value%radix;
	if (tmp == radix - 1) {
	    *--s = '0';
	    value = value/radix + 1;
	} else {
	    *--s = digits[tmp+1];
	    value /= radix;
	}
	if (value == 0) {
	    *--s = '-';
	    return s;
	}
    }

    uvalue = (unsigned int)value;
    do {
	*--s = digits[uvalue%radix];
	uvalue /= radix;
    } while (uvalue > 0);

    if (negative)
	*--s = '-';
    return s;
}

/*
 * Format an int VAL, and stuff it into an obstack OBS.  Used for macros
 * expanding to numbers.
 */
static void
shipout_int(obs, val)
    struct obstack *obs;
    int val;
{
    char *s;

    s = ntoa(val, 10);
    obstack_grow(obs, s, strlen(s));
}


/*
 * Print ARGC arguments from the table ARGV to obstack OBS, separated by
 * SEP, and quoted by the current quotes, if QUOTED is true.
 */
static void
dump_args(obs, argc, argv, sep, quoted)
    struct obstack *obs;
    int argc;
    token_data **argv;
    char *sep;
    boolean quoted;
{
    int i;
    int len = strlen(sep);

    for (i = 1; i < argc; i++) {
	if (i > 1)
	    obstack_grow(obs, sep, len);
	if (quoted)
	    obstack_grow(obs, lquote, len_lquote);
	obstack_grow(obs, TOKEN_DATA_TEXT(argv[i]), strlen(TOKEN_DATA_TEXT(argv[i])));
	if (quoted)
	    obstack_grow(obs, rquote, len_rquote);
    }

}


/*
 * The rest of this file is code for builtins and expansion of user
 * defined macros.  All the functions for builtins have a prototype as:
 *
 * 	void m4_MACRONAME(struct obstack *obs, int argc, char *argv[]);
 *
 * The function are expected to leave their expansion on the obstack
 * OBS, as an unfinished object.  ARGV is a table of ARGC pointers to
 * the individual arguments to the macro.  Please note that in general
 * argv[argc] != nil.
 */

/*
 * The first section are macros for definining, undefining, examining,
 * changing, ... other macros.
 */

/*
 * The function define_macro is common for the builtins "define",
 * "undefine", "pushdef" and "popdef".  ARGC and ARGV is as for the
 * caller, and MODE argument determines how the macro name is entered
 * into the symbol table.
 */
static void
define_macro(argc, argv, mode)
    int argc;
    token_data **argv;
    symbol_lookup mode;
{
    builtin *bp;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 3, 3))
	return;

    if (TOKEN_DATA_TYPE(argv[1]) != TOKEN_TEXT)
	return;

    switch (TOKEN_DATA_TYPE(argv[2])) {
    case TOKEN_TEXT:
	define_user_macro(ARG(1), ARG(2), mode);
	break;
    case TOKEN_FUNC:
	bp = find_builtin_by_addr(TOKEN_DATA_FUNC(argv[2]));
	if (bp == nil)
	    return;
	else
	    define_builtin(ARG(1), bp, mode, TOKEN_DATA_FUNC_TRACED(argv[2]));
	break;
    default:
	internal_error("Bad token data type in define_macro()");
	break;
    }
    return;
}

static void
m4_define(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    define_macro(argc, argv, SYMBOL_INSERT);
}


static void
m4_undefine(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;
    lookup_symbol(ARG(1), SYMBOL_DELETE);
}

static void
m4_pushdef(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    define_macro(argc, argv,  SYMBOL_PUSHDEF);
}


static void
m4_popdef(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;
    lookup_symbol(ARG(1), SYMBOL_POPDEF);
}


/*
 * Conditionals of m4.
 */
static void
m4_ifdef(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    symbol *s;
    char *result;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 3, 4))
	return;
    s = lookup_symbol(ARG(1), SYMBOL_LOOKUP);

    if (s != nil)
	result = ARG(2);
    else if (argc == 4)
	result = ARG(3);
    else
	result = nil;

    if (result != nil)
	obstack_grow(obs, result, strlen(result));
}

static void
m4_ifelse(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    char *result;
    char *name = TOKEN_DATA_TEXT(argv[0]);

    --argc, ++argv;

    result = nil;
    while (result == nil) {
	bad_argc(name, argc, 3, -1);

	if (strcmp(ARG(0), ARG(1)) == 0) {
	    result = ARG(2);
	} else {
	    switch (argc) {
	    case 2:
		result = "";
		break;
	    case 3:
	    case 4:
		result = ARG(3);
		break;
	    default:
		argc -= 3;
		argv += 3;
		break;
	    }
	}
    }
    obstack_grow(obs, result, strlen(result));
}


/*
 * The function dump_symbol() is for use by "dumpdef".  It builds up a
 * table of all defined, un-shadowed, symbols.  The structure
 * dump_symbol_data is used to pass the information needed from call to
 * call to dump_symbol.
 */

struct dump_symbol_data {
    struct obstack *obs;		/* obstack for table */
    symbol **base;			/* base of table */
    int size;				/* size of table */
};

static void
dump_symbol(sym, data)
    symbol *sym;
    struct dump_symbol_data *data;
{
    if (!SYMBOL_SHADOWED(sym) && SYMBOL_TYPE(sym) != TOKEN_VOID) {
	obstack_blank(data->obs, sizeof(symbol*));
	data->base = (symbol **)obstack_base(data->obs);
	data->base[data->size++] = sym;
    }
}

/*
 * qsort comparison routine, for sorting the table made in m4_dumpdef().
 */
static int
dumpdef_cmp(s1, s2)
    symbol **s1, **s2;
{
    return strcmp(SYMBOL_NAME(*s1), SYMBOL_NAME(*s2));
}

/*
 * Implementation of "dumpdef" itself.  It builds up a table of pointers
 * to symbols, sorts it and prints the sorted table.
 */
static void
m4_dumpdef(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    symbol *s;
    int i;
    struct dump_symbol_data data;
    builtin *bp;

    data.obs = obs;
    data.base = (symbol **)obstack_base(obs);
    data.size = 0;

    if (argc == 1) {
	hack_all_symbols(dump_symbol, (char *)&data);
    } else {
	for (i = 1; i < argc; i++) {
	    s = lookup_symbol(TOKEN_DATA_TEXT(argv[i]), SYMBOL_LOOKUP);
	    if (s != nil && SYMBOL_TYPE(s) != TOKEN_VOID)
		dump_symbol(s, (char *)&data);
	    else
		error("Undefined name %s", TOKEN_DATA_TEXT(argv[i]));
	}
    }

    qsort((char*)data.base, data.size, sizeof(symbol*), dumpdef_cmp);

    for ( ; data.size > 0; --data.size, data.base++) {
	debug_print("%s:\t", SYMBOL_NAME(data.base[0]));

	switch (SYMBOL_TYPE(data.base[0])) {
	case TOKEN_TEXT:
	    if (debug_level & debug_trace_quote)
		debug_print("%s%s%s\n", lquote, SYMBOL_TEXT(data.base[0]), rquote);
	    else
		debug_print("%s\n", SYMBOL_TEXT(data.base[0]));
	    break;
	case TOKEN_FUNC:
	    bp = find_builtin_by_addr(SYMBOL_FUNC(data.base[0]));
	    if (bp == nil)
		internal_error("builtin not found in builtin table!");
	    debug_print("<%s>\n", bp->name);
	    break;
	default:
	    internal_error("Bad token data type in m4_dumpdef()");
	    break;
	}
    }
}

/*
 * This is GNU specific.  The builtin "builtin" allows calls to builtin
 * macros, even if their definition has been overridden or shadowed.  It
 * is thus possible to redefine builtins, and still access their
 * original definition.
 *
 * This macro is not available in compatibility mode.
 */
static void
m4_builtin(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    struct builtin *bp;
    char *name = ARG(1);

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, -1))
	return;

    bp = find_builtin_by_name(name);
    if (bp == nil)
	error("Undefined name %s", name);
    else
	(*bp->func)(obs, argc-1, argv+1);
}

/*
 * This is GNU specific.  The builtin "indir" allows indirect calls to
 * macros, even if their name is not a proper macro name.  It
 * is thus possible to define macros with ill-formed names for internal
 * use in larger macro packages.
 *
 * This macro is not available in compatibility mode.
 */
static void
m4_indir(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    symbol *s;
    char *name = ARG(1);

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, -1))
	return;

    s = lookup_symbol(name, SYMBOL_LOOKUP);
    if (s == nil)
	error("Undefined macro `%s'", name);
    else
	call_macro(s, argc - 1, argv + 1, obs);
}

/*
 * The macro "defn" returns the quoted definition of the macro named by
 * the first argument.  If the macro is builtin, it will push a special
 * macro-definition token on ht input stack.
 */
static void
m4_defn(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    symbol *s;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;

    s = lookup_symbol(ARG(1), SYMBOL_LOOKUP);
    if (s == nil)
	return;

    switch (SYMBOL_TYPE(s)) {
    case TOKEN_TEXT:
	obstack_grow(obs, lquote, len_lquote);
	obstack_grow(obs, SYMBOL_TEXT(s), strlen(SYMBOL_TEXT(s)));
	obstack_grow(obs, rquote, len_rquote);
	break;
    case TOKEN_FUNC:
	push_macro(SYMBOL_FUNC(s), SYMBOL_TRACED(s));
	break;
    case TOKEN_VOID:
	break;
    default:
	internal_error("Bad symbol type in m4_defn()");
	break;
    }
}


/*
 * This section contains macros to handle the builtins "syscmd",
 * "esyscmd" and "sysval".  "esyscmd" is GNU specific.
 */

/* Exit code from last "syscmd" command. */
static int sysval;

static void
m4_syscmd(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;
    sysval = system(ARG(1));
}

static void
m4_esyscmd(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    FILE *pin;
    int ch;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;

    pin = popen(ARG(1), "r");
    if (pin == nil) {
	error("Cannot open pipe to command \"%s\": %s", ARG(1), syserr());
	sysval = 0xff << 8;
    } else {
	while ((ch = getc(pin)) != EOF)
	    obstack_1grow(obs, (char)ch);
	sysval = pclose(pin);
    }
}

static void
m4_sysval(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
#ifdef MSDOS
    shipout_int(obs, sysval);
#else
    shipout_int(obs, sysval>>8);
#endif
}



/*
 * This section contains the top level code for the "eval" builtin.  The
 * actual work is done in the function evaluate(), which lives in
 * eval.c.
 */

static void
m4_eval(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int value;
    int radix = 10;
    int min = 1;
    char *s;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 4))
	return;

    if (argc >= 3 && !numeric_arg(argv[0], ARG(2), &radix))
	return;

    if (radix <= 1 || radix > strlen(digits)) {
	error("radix in eval out of range (radix = %d)", radix);
	return;
    }

    if (argc >= 4 && !numeric_arg(argv[0], ARG(3), &min))
	return;
    if  (min <= 0) {
	error("negative width to eval");
	return;
    }

    if (evaluate(ARG(1), &value))
	return;

    s = ntoa(value, radix);

    if (*s == '-') {
	obstack_1grow(obs, '-');
	min--;
	s++;
    }
    for (min -= strlen(s); --min >= 0; )
	obstack_1grow(obs, '0');

    obstack_grow(obs, s, strlen(s));
}

static void
m4_incr(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int value;
    char *s;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;

    if (!numeric_arg(argv[0], ARG(1), &value))
	return;

    shipout_int(obs, value+1);
}

static void
m4_decr(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int value;
    char *s;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;

    if (!numeric_arg(argv[0], ARG(1), &value))
	return;

    shipout_int(obs, value-1);
}


/*
 * This section contains the macros "divert", "undivert" and "divnum"
 * for handling diversion.  The utility functions used lives in output.c
 */

/*
 * Divert further output to the diversion given by ARGV[1].  Out of
 * range means discard further output.
 */
static void
m4_divert(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int i = 0;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 2))
	return;

    if (argc == 2 && !numeric_arg(argv[0], ARG(1), &i))
	return;

    make_divertion(i);
    current_diversion = i;
}

/*
 * Expand to the current diversion number, -1 if none.
 */
static void
m4_divnum(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 1))
	return;
    shipout_int(obs, current_diversion);
}

/*
 * Bring back the diversion given by the argument list.  If none is
 * specified, bring back all diversions.  GNU specific is the option of
 * undiverting named files, by passing a non-numeric argument to
 * undivert().
 */
static void
m4_undivert(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int i, div;
    FILE *fp;

    if (argc == 1) {
	undivert_all();
    } else {
	for (i = 1; i < argc; i++) {
	    if (sscanf(ARG(i), "%d", &div) == 1)
		insert_divertion(div);
	    else if (no_gnu_extensions)
		error("non-numeric argument to %s", TOKEN_DATA_TEXT(argv[0]));
	    else {
		fp = path_search(ARG(i));
		if (fp != nil) {
		    insert_file(fp);
		    fclose(fp);
		} else
		    error("can't undivert %s: %s", ARG(i), syserr());
	    }
	}
    }
}


/*
 * This section contains various macros, which does not fall into any
 * specific group.  These are "dnl", "shift", "changequote" and
 * "changecom".
 */

/*
 * Delete all subsequent whitespace from input.  The function
 * skip_line() lives in input.c.
 */
static void
m4_dnl(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    skip_line();
}

/*
 * Shift all argument one to the left, discarding the first argument.
 * Each output argument is quoted with the current quotes.
 */
static void
m4_shift(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    dump_args(obs, argc-1, argv+1, ",", true);
}

/*
 * Change the current quotes.  The function set_quotes() lives in
 * input.c
 */
static void
m4_changequote(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 3))
	return;

    set_quotes((argc >= 2) ? TOKEN_DATA_TEXT(argv[1]) : nil,
	       (argc >= 3) ? TOKEN_DATA_TEXT(argv[2]) : nil);
}

/*
 * Change the current comment delimiters.  The function set_comment()
 * lives in input.c
 */
static void
m4_changecom(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 3))
	return;

    if (argc == 1)
	set_comment("", "");		/* disable comments */
    else
	set_comment(TOKEN_DATA_TEXT(argv[1]),
		    (argc >= 3) ? TOKEN_DATA_TEXT(argv[2]) : nil);
}


/*
 * This section contains macros for inclusion of other files --
 * "include" and "sinclude".  This differs from bringing back
 * diversions, in that the input is scanned before being copied to the
 * output.
 */
/*
 * Generic include function.  Include the file given by the first
 * argument, if it exists.  Complain about inaccesible files iff
 * SILENT is false.
 */
static void
include(argc, argv, silent)
    int argc;
    token_data **argv;
    boolean silent;
{
    FILE *fp;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;

    fp = path_search(ARG(1));
    if (fp == nil) {
	if (!silent)
	    error("can't open %s: %s", ARG(1), syserr());
	return;
    }

    push_file(fp, ARG(1));
}

/*
 * Include a file, complaining in case of errors.
 */
static void
m4_include(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    include(argc, argv, false);
}

/*
 * Include a file, ignoring errors.
 */
static void
m4_sinclude(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    include(argc, argv, true);
}


/*
 * More miscellaneous builtins -- "maketemp", "errprint", "__file__" and
 * "__line__".  The last two are GNU specific.
 */

/*
 * Use the first argument as at template for a temporary file name.
 */
static void
m4_maketemp(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
#ifdef MSDOS
    char name[256];

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;

    strcpy(name, ARG(1));
    Mktemp(name);
    obstack_grow(obs, name, strlen(name));
#else
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;
    mktemp(ARG(1));
    obstack_grow(obs, ARG(1), strlen(ARG(1)));
#endif
}

/*
 * Print all arguments on standard error.
 */
static void
m4_errprint(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    dump_args(obs, argc, argv, " ", false);
    obstack_1grow(obs, '\0');
    fprintf(stderr, "%s", obstack_finish(obs));
    fflush(stderr);
}

static void
m4___file__(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 1))
	return;
    obstack_grow(obs, lquote, len_lquote);
    obstack_grow(obs, current_file, strlen(current_file));
    obstack_grow(obs, rquote, len_rquote);
}

static void
m4___line__(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 1))
	return;
    shipout_int(obs, current_line);
}

/*
 * This section contains various macros for exiting, saving input until
 * EOF is seen, and tracing macro calls.  That is: "m4exit", "m4wrap",
 * "traceon" and "traceoff".
 */
/*
 * Exit immediately, with exitcode specified by the first argument, 0 if
 * no arguments are present.
 */
static void
m4_m4exit(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int exit_code = 0;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 2))
	return;
    if (argc == 2  && !numeric_arg(argv[0], ARG(1), &exit_code))
	exit_code = 0;

    exit(exit_code);
}

/*
 * Save the argument text until EOF has been seen, allowing for user
 * specified cleanup action.  GNU version saves all arguments, the
 * standard version only the first.
 */
static void
m4_m4wrap(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (no_gnu_extensions) {
	obstack_grow(obs, ARG(1), strlen(ARG(1)));
    } else
	dump_args(obs, argc, argv, " ", false);
    obstack_1grow(obs, '\0');
    push_wrapup(obstack_finish(obs));
}


/*
 * Enable tracing of all specified macros, or all, if none is specified.
 * Tracing is disabled by default, when a macro is defined.  This can be
 * overridden by the "t" debug flag.
 */

/*
 * Set_trace() is used by "traceon" and "traceoff" to enable and disable
 * tracing of a macro.  It disables tracing if DATA is nil, otherwise it
 * enable tracing.
 */
static void
set_trace(sym, data)
    symbol *sym;
    char *data;
{
    SYMBOL_TRACED(sym) = (boolean)(data != nil);
}

static void
m4_traceon(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    symbol *s;
    int i;

    if (argc == 1)
	hack_all_symbols(set_trace, (char *)obs);
    else
	for (i = 1; i < argc; i++) {
	    s = lookup_symbol(TOKEN_DATA_TEXT(argv[i]), SYMBOL_LOOKUP);
	    if (s != nil)
		set_trace(s, (char *)obs);
	    else
		error("Undefined name %s", TOKEN_DATA_TEXT(argv[i]));
	}
}

/*
 * Disable tracing of all specified macros, or all, if none is
 * specified.
 */
static void
m4_traceoff(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    symbol *s;
    int i;

    if (argc == 1)
	hack_all_symbols(set_trace, nil);
    else
	for (i = 1; i < argc; i++) {
	    s = lookup_symbol(TOKEN_DATA_TEXT(argv[i]), SYMBOL_LOOKUP);
	    if (s != nil)
		set_trace(s, nil);
	    else
		error("Undefined name %s", TOKEN_DATA_TEXT(argv[i]));
	}
}

/*
 * On-the-fly control of the format of the tracing output.  It takes one
 * argument, which is a character string like given to the -d option, or
 * none in which case the debug_level is zeroed.
 */
static void
m4_debugmode(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int new_debug_level;
    int change_flag;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 2))
	return;

    if (argc == 1)
	debug_level = 0;
    else {
	if (ARG(1)[0] == '+' || ARG(1)[0] == '-') {
	    change_flag = ARG(1)[0];
	    new_debug_level = debug_decode(ARG(1)+1);
	} else {
	    change_flag = 0;
	    new_debug_level = debug_decode(ARG(1));
	}

	if (new_debug_level < 0)
	    error("debugmode: bad debug flags: `%s'", ARG(1));
	else {
	    switch (change_flag) {
	    case 0:
		debug_level = new_debug_level;
		break;
	    case '+':
		debug_level |= new_debug_level;
		break;
	    case '-':
		debug_level &= ~new_debug_level;
		break;
	    }
	}
    }
}

/*
 * Specify the destination of the debugging output.  With one argument,
 * the argument is taken as a file name, with no arguments, revert to
 * stderr.
 */
static void
m4_debugfile(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 1, 2))
	return;

    if (argc == 1)
	debug_set_output(nil);
    else if (!debug_set_output(ARG(1)))
	error("cannot set error file: %s: %s", ARG(1), syserr());
}


/*
 * This section contains text processing macros: "len", "index",
 * "substr", "translit", "format", "regexp" and "patsubst".  The last
 * three are GNU specific.
 */

/*
 * Expand to the length of the first argument.
 */
static void
m4_len(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 2, 2))
	return;
    shipout_int(obs, strlen(ARG(1)));
}

/*
 * The macro expands to the first index of the second argument in the
 * first argument.
 */
static void
m4_index(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    char *cp, *last;
    int l1, l2, retval;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 3, 3))
	return;

    l1 = strlen(ARG(1));
    l2 = strlen(ARG(2));

    last = ARG(1) + l1 - l2;

    for (cp = ARG(1); cp <= last; cp++) {
	if (strncmp(cp, ARG(2), l2) == 0)
	    break;
    }
    retval = (cp <= last) ? cp - ARG(1) : -1;

    shipout_int(obs, retval);
}

/*
 * The macro "substr" extracts substrings from the first argument,
 * starting from the index given by the second argument, extending for a
 * length given by the third argument.  If the third argument is
 * missing, the substring extends to the end of the first argument.
 */
static void
m4_substr(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    int start, length, avail;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 3, 4))
	return;

    length = avail = strlen(ARG(1));
    if (!numeric_arg(argv[0], ARG(2), &start))
	return;

    if (argc == 4 && !numeric_arg(argv[0], ARG(3), &length))
	return;

    if (start < 0 || length <= 0 || start >= avail)
	return;

    if (start + length > avail)
	length = avail - start;
    obstack_grow(obs, ARG(1) + start, length);
}

/*
 * The macro "translit" translates all characters in the first
 * argument, which are present in the second argument, into the
 * corresponding character from the third argument.  If the third
 * argument is shorter than the second, the extra characters in the
 * second argument, are delete from the first (pueh).
 *
 * Ranges are allowed in the second and third argument.  They are
 * expanded in the following function, and the expanded strings, without
 * any ranges left, are used to translate the characters of the first
 * argument.  A single - (dash) can be included in the strings by being
 * the first or the last character in the string.  If the first
 * character in a range is after the first in the character set, the
 * range is made backwards, thus 9-0 is the string 9876543210.
 */

static char *
expand_ranges(s, obs)
    char *s;
    struct obstack *obs;
{
    char from;
    char to;

    for (from = '\0'; *s != '\0'; from = *s++) {
	if (*s == '-' && from != '\0') {
	    to = *++s;
	    if (to == '\0')
		obstack_1grow(obs, '-'); /* trailing dash */
	    else if (from <= to) {
		while (from++ < to)
		    obstack_1grow(obs, from);
	    } else {
		while (--from >= to)
		    obstack_1grow(obs, from);
	    }
	} else
	    obstack_1grow(obs, *s);
    }
    obstack_1grow(obs, '\0');
    return obstack_finish(obs);
}

static void
m4_translit(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    register char *data, *tmp;
    char *from, *to;
    int tolen;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 3, 4))
	return;

    from = ARG(2);
    if (index(from, '-') != nil) {
	from = expand_ranges(from, obs);
	if (from == nil)
	    return;
    }

    if (argc == 4) {
	to = ARG(3);
	if (index(to, '-') != nil) {
	    to = expand_ranges(to, obs);
	    if (to == nil)
		return;
	}
    } else
	to = "";

    tolen = strlen(to);

    for (data = ARG(1); *data; data++) {
	tmp = (char*)index(from, *data);
	if (tmp == nil) {
	    obstack_1grow(obs, *data);
	} else {
	    if (tmp - from < tolen)
		obstack_1grow(obs, *(to + (tmp - from)));
	}
    }
}

/*
 * Frontend for printf like formatting.  The function format() lives in
 * the file format.c.
 */
static void
m4_format(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    format(obs, argc-1, argv+1);
}

/*
 * Function to perform substitution by regular expressions.  Used by the
 * builtins regexp and patsubst.  The changed text is place on the
 * obstack.  The substitution is REPL, with \& substituted by VICTIM (of
 * length LENGTH), and \N substituted by the text matched by the Nth
 * parenthesized sub-expression, taken from REGS.
 */

static void
substitute(obs, victim, length, repl, regs)
    struct obstack *obs;
    char *victim;
    int length;				/* lenght of victim */
    char *repl;				/* replacement string */
    struct re_registers *regs;		/* for subexpression matches */
{
    register char ch;

    for (;;) {
	while ((ch = *repl++) != '\\') {
	    if (ch == '\0')
		return;
	    obstack_1grow(obs, ch);
	}

	switch ((ch = *repl++)) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    ch -= '0';
	    if (regs->end[ch] > 0)
		obstack_grow(obs, victim + regs->start[ch], regs->end[ch] - regs->start[ch]);
	    break;
	case '&':
	    obstack_grow(obs, victim, length);
	    break;
	default:
	    obstack_1grow(obs, ch);
	    break;
	}
    }
}

/*
 * Regular expression version of index.  Given two arguments, expand to
 * the index of the first match of the second argument (a regexp) in the
 * first.  Expand to -1 if here is no match.  Given a third argument, is
 * changes the expansion to this argument,
 */

static void
m4_regexp(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    char *victim;			/* first argument */
    char *regexp;			/* regular expression */
    char *repl;				/* replacement string */

    struct re_pattern_buffer buf;	/* compiled regular expression */
    struct re_registers regs;		/* for subexpression matches */
    char *msg;				/* error message from re_compile_pattern */
    int startpos;			/* start position of match */
    int length;				/* length of first argument */

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 3, 4))
	return;

    victim = TOKEN_DATA_TEXT(argv[1]);
    regexp = TOKEN_DATA_TEXT(argv[2]);
    if (argc == 4)
	repl = TOKEN_DATA_TEXT(argv[3]);

    buf.buffer = nil;
    buf.allocated = 0;
    buf.fastmap = nil;
    buf.translate = nil;
    msg = re_compile_pattern(regexp, strlen(regexp), &buf);

    if (msg != nil) {
	error("bad regular expression: \"%s\": %s", regexp, msg);
	return;
    }

    length = strlen(victim);
    startpos = re_search_2(&buf, nil, 0, victim, length, 0, length, &regs, length);
    xfree(buf.buffer);

    if (startpos  == -2) {
	error("error matching regular expression \"%s\"", regexp);
	return;
    }

    if (argc == 3)
	shipout_int(obs, startpos);
    else if (startpos >= 0)
	substitute(obs, victim, length, repl, &regs);

    return;
}

/*
 * Substitute all matches of a regexp occuring in a string.  Each match
 * of the second argument (a regexp) in the first argument is changed to
 * the third argument, with \& substituted by the matched text, and \N
 * substituted by the text matched by the Nth parenthesized
 * sub-expression.
 */

static void
m4_patsubst(obs, argc, argv)
    struct obstack *obs;
    int argc;
    token_data **argv;
{
    char *victim;			/* first argument */
    char *regexp;			/* regular expression */
    char *repl;				/* replacement string */

    struct re_pattern_buffer buf;	/* compiled regular expression */
    struct re_registers regs;		/* for subexpression matches */
    char *msg;				/* error message from re_compile_pattern */
    int matchpos;			/* start position of match */
    int offset;				/* current match offset */
    int length;				/* length of first argument */
    char ch;

    if (bad_argc(TOKEN_DATA_TEXT(argv[0]), argc, 3, 4))
	return;

    regexp = TOKEN_DATA_TEXT(argv[2]);

    buf.buffer = nil;
    buf.allocated = 0;
    buf.fastmap = nil;
    buf.translate = nil;
    msg = re_compile_pattern(regexp, strlen(regexp), &buf);

    if (msg != nil) {
	error("bad regular expression: \"%s\": %s", regexp, msg);
	if (buf.buffer != nil)
	    xfree(buf.buffer);
	return;
    }

    victim = TOKEN_DATA_TEXT(argv[1]);
    length = strlen(victim);

    offset = 0;
    matchpos = 0;
    while (offset < length) {
	matchpos = re_search_2(&buf, nil, 0, victim, length,
			       offset, length - offset, &regs, length);
	if (matchpos < 0) {
	    /*
	     * Match failed -- either error or there is no match in the
	     * rest of the string, in which case the rest of the string
	     * is copied verbatim.
	     */
	    if (matchpos == -2)
		error("error matching regular expression \"%s\"", regexp);
	    else if (offset < length)
		obstack_grow(obs, victim + offset, length - offset);
	    break;
	}
	/*
	 * Copy the part of the string that was skipped by re_search().
	 */
	if (matchpos > offset)
	    obstack_grow(obs, victim + offset, matchpos - offset);

	/*
	 * Handle the part of the string that was covered by the match.
	 */
	substitute(obs, victim, length, ARG(3), &regs);

	/*
	 * Update the offset to the end of the match.  If the regexp
	 * matched a null string, advance offset one more, to avoid
	 * infinite loops.
	 */
	offset = regs.end[0];
	if (regs.start[0] == regs.end[0])
	    obstack_1grow(obs, victim[offset++]);
    }
    obstack_1grow(obs, '\0');

    xfree(buf.buffer);
    return;
}

/*
 * This function handles all expansion of user defined and predefined
 * macros.  It is called with an obstack OBS, where the macros expansion
 * will be placed, as an unfinished object.  SYM points to the macro
 * definition, giving the expansion text.  ARGC and ARGV are the
 * arguments, as usual.
 */
void
expand_user_macro(obs, sym, argc, argv)
    struct obstack *obs;
    symbol *sym;
    int argc;
    token_data **argv;
{
    register char *text;
    int i;

    for  (text = SYMBOL_TEXT(sym); *text != '\0'; ) {
	if (*text != '$') {
	    obstack_1grow(obs, *text);
	    text++;
	    continue;
	}
	text++;
	switch (*text) {
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    if (no_gnu_extensions) {
		i = *text++ - '0';
	    } else {
		for (i = 0; isdigit(*text); text++)
		    i = i*10 + (*text - '0');
	    }
	    if (i < argc)
		obstack_grow(obs, TOKEN_DATA_TEXT(argv[i]), strlen(TOKEN_DATA_TEXT(argv[i])));
	    break;

	case '#':			/* number of arguments */
	    shipout_int(obs, argc-1);
	    text++;
	    break;

	case '*':			/* all arguments */
	case '@':			/* ... same, but quoted */
	    dump_args(obs, argc, argv, ",", *text == '@');
	    text++;
	    break;

	default:
	    obstack_1grow(obs, '$');
	    break;
	}
    }
}
