From 73b16af8feec390afbabd9356d6e5e83c0390838 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 15 May 2015 10:20:47 +0200 Subject: busybox: imported from http://www.busybox.net/downloads/busybox-1.13.3.tar.bz2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bjørn Mork --- editors/Config.in | 196 +++ editors/Kbuild | 14 + editors/awk.c | 2919 ++++++++++++++++++++++++++++++++++ editors/cmp.c | 135 ++ editors/diff.c | 1344 ++++++++++++++++ editors/ed.c | 1049 +++++++++++++ editors/patch.c | 254 +++ editors/sed.c | 1349 ++++++++++++++++ editors/sed1line.txt | 425 +++++ editors/sed_summary.htm | 223 +++ editors/vi.c | 3954 +++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 11862 insertions(+) create mode 100644 editors/Config.in create mode 100644 editors/Kbuild create mode 100644 editors/awk.c create mode 100644 editors/cmp.c create mode 100644 editors/diff.c create mode 100644 editors/ed.c create mode 100644 editors/patch.c create mode 100644 editors/sed.c create mode 100644 editors/sed1line.txt create mode 100644 editors/sed_summary.htm create mode 100644 editors/vi.c (limited to 'editors') diff --git a/editors/Config.in b/editors/Config.in new file mode 100644 index 0000000..7dbc9b6 --- /dev/null +++ b/editors/Config.in @@ -0,0 +1,196 @@ +# +# For a description of the syntax of this configuration file, +# see scripts/kbuild/config-language.txt. +# + +menu "Editors" + +config AWK + bool "awk" + default n + help + Awk is used as a pattern scanning and processing language. This is + the BusyBox implementation of that programming language. + +config FEATURE_AWK_LIBM + bool "Enable math functions (requires libm)" + default n + depends on AWK + help + Enable math functions of the Awk programming language. + NOTE: This will require libm to be present for linking. + +config CMP + bool "cmp" + default n + help + cmp is used to compare two files and returns the result + to standard output. + +config DIFF + bool "diff" + default n + help + diff compares two files or directories and outputs the + differences between them in a form that can be given to + the patch command. + +config FEATURE_DIFF_BINARY + bool "Enable checks for binary files" + default y + depends on DIFF + help + This option enables support for checking for binary files + before a comparison is carried out. + +config FEATURE_DIFF_DIR + bool "Enable directory support" + default y + depends on DIFF + help + This option enables support for directory and subdirectory + comparison. + +config FEATURE_DIFF_MINIMAL + bool "Enable -d option to find smaller sets of changes" + default n + depends on DIFF + help + Enabling this option allows the use of -d to make diff + try hard to find the smallest possible set of changes. + +config ED + bool "ed" + default n + help + The original 1970's Unix text editor, from the days of teletypes. + Small, simple, evil. Part of SUSv3. If you're not already using + this, you don't need it. + +config PATCH + bool "patch" + default n + help + Apply a unified diff formatted patch. + +config SED + bool "sed" + default n + help + sed is used to perform text transformations on a file + or input from a pipeline. + +config VI + bool "vi" + default n + help + 'vi' is a text editor. More specifically, it is the One True + text editor . It does, however, have a rather steep + learning curve. If you are not already comfortable with 'vi' + you may wish to use something else. + +config FEATURE_VI_MAX_LEN + int "Maximum screen width in vi" + range 256 16384 + default 4096 + depends on VI + help + Contrary to what you may think, this is not eating much. + Make it smaller than 4k only if you are very limited on memory. + +config FEATURE_VI_8BIT + bool "Allow vi to display 8-bit chars (otherwise shows dots)" + default y + depends on VI + help + If your terminal can display characters with high bit set, + you may want to enable this. Note: vi is not Unicode-capable. + If your terminal combines several 8-bit bytes into one character + (as in Unicode mode), this will not work properly. + +config FEATURE_VI_COLON + bool "Enable \":\" colon commands (no \"ex\" mode)" + default y + depends on VI + help + Enable a limited set of colon commands for vi. This does not + provide an "ex" mode. + +config FEATURE_VI_YANKMARK + bool "Enable yank/put commands and mark cmds" + default y + depends on VI + help + This will enable you to use yank and put, as well as mark in + busybox vi. + +config FEATURE_VI_SEARCH + bool "Enable search and replace cmds" + default y + depends on VI + help + Select this if you wish to be able to do search and replace in + busybox vi. + +config FEATURE_VI_USE_SIGNALS + bool "Catch signals" + default y + depends on VI + help + Selecting this option will make busybox vi signal aware. This will + make busybox vi support SIGWINCH to deal with Window Changes, catch + Ctrl-Z and Ctrl-C and alarms. + +config FEATURE_VI_DOT_CMD + bool "Remember previous cmd and \".\" cmd" + default y + depends on VI + help + Make busybox vi remember the last command and be able to repeat it. + +config FEATURE_VI_READONLY + bool "Enable -R option and \"view\" mode" + default y + depends on VI + help + Enable the read-only command line option, which allows the user to + open a file in read-only mode. + +config FEATURE_VI_SETOPTS + bool "Enable set-able options, ai ic showmatch" + default y + depends on VI + help + Enable the editor to set some (ai, ic, showmatch) options. + +config FEATURE_VI_SET + bool "Support for :set" + default y + depends on VI + help + Support for ":set". + +config FEATURE_VI_WIN_RESIZE + bool "Handle window resize" + default y + depends on VI + help + Make busybox vi behave nicely with terminals that get resized. + +config FEATURE_VI_OPTIMIZE_CURSOR + bool "Optimize cursor movement" + default y + depends on VI + help + This will make the cursor movement faster, but requires more memory + and it makes the applet a tiny bit larger. + +config FEATURE_ALLOW_EXEC + bool "Allow vi and awk to execute shell commands" + default y + depends on VI || AWK + help + Enables vi and awk features which allows user to execute + shell commands (using system() C call). + +endmenu diff --git a/editors/Kbuild b/editors/Kbuild new file mode 100644 index 0000000..76302aa --- /dev/null +++ b/editors/Kbuild @@ -0,0 +1,14 @@ +# Makefile for busybox +# +# Copyright (C) 1999-2005 by Erik Andersen +# +# Licensed under the GPL v2, see the file LICENSE in this tarball. + +lib-y:= +lib-$(CONFIG_AWK) += awk.o +lib-$(CONFIG_CMP) += cmp.o +lib-$(CONFIG_DIFF) += diff.o +lib-$(CONFIG_ED) += ed.o +lib-$(CONFIG_PATCH) += patch.o +lib-$(CONFIG_SED) += sed.o +lib-$(CONFIG_VI) += vi.o diff --git a/editors/awk.c b/editors/awk.c new file mode 100644 index 0000000..64371f0 --- /dev/null +++ b/editors/awk.c @@ -0,0 +1,2919 @@ +/* vi: set sw=4 ts=4: */ +/* + * awk implementation for busybox + * + * Copyright (C) 2002 by Dmitry Zakharov + * + * Licensed under the GPL v2 or later, see the file LICENSE in this tarball. + */ + +#include "libbb.h" +#include "xregex.h" +#include + +/* This is a NOEXEC applet. Be very careful! */ + + +#define MAXVARFMT 240 +#define MINNVBLOCK 64 + +/* variable flags */ +#define VF_NUMBER 0x0001 /* 1 = primary type is number */ +#define VF_ARRAY 0x0002 /* 1 = it's an array */ + +#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */ +#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ +#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ +#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ +#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */ +#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ +#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ + +/* these flags are static, don't change them when value is changed */ +#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY) + +/* Variable */ +typedef struct var_s { + unsigned type; /* flags */ + double number; + char *string; + union { + int aidx; /* func arg idx (for compilation stage) */ + struct xhash_s *array; /* array ptr */ + struct var_s *parent; /* for func args, ptr to actual parameter */ + char **walker; /* list of array elements (for..in) */ + } x; +} var; + +/* Node chain (pattern-action chain, BEGIN, END, function bodies) */ +typedef struct chain_s { + struct node_s *first; + struct node_s *last; + const char *programname; +} chain; + +/* Function */ +typedef struct func_s { + unsigned nargs; + struct chain_s body; +} func; + +/* I/O stream */ +typedef struct rstream_s { + FILE *F; + char *buffer; + int adv; + int size; + int pos; + smallint is_pipe; +} rstream; + +typedef struct hash_item_s { + union { + struct var_s v; /* variable/array hash */ + struct rstream_s rs; /* redirect streams hash */ + struct func_s f; /* functions hash */ + } data; + struct hash_item_s *next; /* next in chain */ + char name[1]; /* really it's longer */ +} hash_item; + +typedef struct xhash_s { + unsigned nel; /* num of elements */ + unsigned csize; /* current hash size */ + unsigned nprime; /* next hash size in PRIMES[] */ + unsigned glen; /* summary length of item names */ + struct hash_item_s **items; +} xhash; + +/* Tree node */ +typedef struct node_s { + uint32_t info; + unsigned lineno; + union { + struct node_s *n; + var *v; + int i; + char *s; + regex_t *re; + } l; + union { + struct node_s *n; + regex_t *ire; + func *f; + int argno; + } r; + union { + struct node_s *n; + } a; +} node; + +/* Block of temporary variables */ +typedef struct nvblock_s { + int size; + var *pos; + struct nvblock_s *prev; + struct nvblock_s *next; + var nv[0]; +} nvblock; + +typedef struct tsplitter_s { + node n; + regex_t re[2]; +} tsplitter; + +/* simple token classes */ +/* Order and hex values are very important!!! See next_token() */ +#define TC_SEQSTART 1 /* ( */ +#define TC_SEQTERM (1 << 1) /* ) */ +#define TC_REGEXP (1 << 2) /* /.../ */ +#define TC_OUTRDR (1 << 3) /* | > >> */ +#define TC_UOPPOST (1 << 4) /* unary postfix operator */ +#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */ +#define TC_BINOPX (1 << 6) /* two-opnd operator */ +#define TC_IN (1 << 7) +#define TC_COMMA (1 << 8) +#define TC_PIPE (1 << 9) /* input redirection pipe */ +#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */ +#define TC_ARRTERM (1 << 11) /* ] */ +#define TC_GRPSTART (1 << 12) /* { */ +#define TC_GRPTERM (1 << 13) /* } */ +#define TC_SEMICOL (1 << 14) +#define TC_NEWLINE (1 << 15) +#define TC_STATX (1 << 16) /* ctl statement (for, next...) */ +#define TC_WHILE (1 << 17) +#define TC_ELSE (1 << 18) +#define TC_BUILTIN (1 << 19) +#define TC_GETLINE (1 << 20) +#define TC_FUNCDECL (1 << 21) /* `function' `func' */ +#define TC_BEGIN (1 << 22) +#define TC_END (1 << 23) +#define TC_EOF (1 << 24) +#define TC_VARIABLE (1 << 25) +#define TC_ARRAY (1 << 26) +#define TC_FUNCTION (1 << 27) +#define TC_STRING (1 << 28) +#define TC_NUMBER (1 << 29) + +#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) + +/* combined token classes */ +#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) +#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) +#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ + | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER) + +#define TC_STATEMNT (TC_STATX | TC_WHILE) +#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) + +/* word tokens, cannot mean something else if not expected */ +#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \ + | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END) + +/* discard newlines after these */ +#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ + | TC_BINOP | TC_OPTERM) + +/* what can expression begin with */ +#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP) +/* what can group begin with */ +#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART) + +/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */ +/* operator is inserted between them */ +#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ + | TC_STRING | TC_NUMBER | TC_UOPPOST) +#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE) + +#define OF_RES1 0x010000 +#define OF_RES2 0x020000 +#define OF_STR1 0x040000 +#define OF_STR2 0x080000 +#define OF_NUM1 0x100000 +#define OF_CHECKED 0x200000 + +/* combined operator flags */ +#define xx 0 +#define xV OF_RES2 +#define xS (OF_RES2 | OF_STR2) +#define Vx OF_RES1 +#define VV (OF_RES1 | OF_RES2) +#define Nx (OF_RES1 | OF_NUM1) +#define NV (OF_RES1 | OF_NUM1 | OF_RES2) +#define Sx (OF_RES1 | OF_STR1) +#define SV (OF_RES1 | OF_STR1 | OF_RES2) +#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2) + +#define OPCLSMASK 0xFF00 +#define OPNMASK 0x007F + +/* operator priority is a highest byte (even: r->l, odd: l->r grouping) + * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, + * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string + */ +#define P(x) (x << 24) +#define PRIMASK 0x7F000000 +#define PRIMASK2 0x7E000000 + +/* Operation classes */ + +#define SHIFT_TIL_THIS 0x0600 +#define RECUR_FROM_THIS 0x1000 + +enum { + OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300, + OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600, + + OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900, + OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00, + OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00, + + OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200, + OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500, + OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800, + OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00, + OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00, + OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100, + OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400, + OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700, + OC_DONE = 0x2800, + + ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200, + ST_WHILE = 0x3300 +}; + +/* simple builtins */ +enum { + F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr, + F_ti, F_le, F_sy, F_ff, F_cl +}; + +/* builtins */ +enum { + B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up, + B_ge, B_gs, B_su, + B_an, B_co, B_ls, B_or, B_rs, B_xo, +}; + +/* tokens and their corresponding info values */ + +#define NTC "\377" /* switch to next token class (tc<<1) */ +#define NTCC '\377' + +#define OC_B OC_BUILTIN + +static const char tokenlist[] ALIGN1 = + "\1(" NTC + "\1)" NTC + "\1/" NTC /* REGEXP */ + "\2>>" "\1>" "\1|" NTC /* OUTRDR */ + "\2++" "\2--" NTC /* UOPPOST */ + "\2++" "\2--" "\1$" NTC /* UOPPRE1 */ + "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ + "\2*=" "\2/=" "\2%=" "\2^=" + "\1+" "\1-" "\3**=" "\2**" + "\1/" "\1%" "\1^" "\1*" + "\2!=" "\2>=" "\2<=" "\1>" + "\1<" "\2!~" "\1~" "\2&&" + "\2||" "\1?" "\1:" NTC + "\2in" NTC + "\1," NTC + "\1|" NTC + "\1+" "\1-" "\1!" NTC /* UOPPRE2 */ + "\1]" NTC + "\1{" NTC + "\1}" NTC + "\1;" NTC + "\1\n" NTC + "\2if" "\2do" "\3for" "\5break" /* STATX */ + "\10continue" "\6delete" "\5print" + "\6printf" "\4next" "\10nextfile" + "\6return" "\4exit" NTC + "\5while" NTC + "\4else" NTC + + "\3and" "\5compl" "\6lshift" "\2or" + "\6rshift" "\3xor" + "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ + "\3cos" "\3exp" "\3int" "\3log" + "\4rand" "\3sin" "\4sqrt" "\5srand" + "\6gensub" "\4gsub" "\5index" "\6length" + "\5match" "\5split" "\7sprintf" "\3sub" + "\6substr" "\7systime" "\10strftime" + "\7tolower" "\7toupper" NTC + "\7getline" NTC + "\4func" "\10function" NTC + "\5BEGIN" NTC + "\3END" "\0" + ; + +static const uint32_t tokeninfo[] = { + 0, + 0, + OC_REGEXP, + xS|'a', xS|'w', xS|'|', + OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', + OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', + OC_FIELD|xV|P(5), + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), + OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', + OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', + OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', + OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', + OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', + OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, + OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, + OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', + OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), + OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', + OC_COLON|xx|P(67)|':', + OC_IN|SV|P(49), + OC_COMMA|SS|P(80), + OC_PGETLINE|SV|P(37), + OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', + OC_UNARY|xV|P(19)|'!', + 0, + 0, + 0, + 0, + 0, + ST_IF, ST_DO, ST_FOR, OC_BREAK, + OC_CONTINUE, OC_DELETE|Vx, OC_PRINT, + OC_PRINTF, OC_NEXT, OC_NEXTFILE, + OC_RETURN|Vx, OC_EXIT|Nx, + ST_WHILE, + 0, + + OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), + OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), + OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), + OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, + OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, + OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le, + OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), + OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), + OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), + OC_GETLINE|SV|P(0), + 0, 0, + 0, + 0 +}; + +/* internal variable names and their initial values */ +/* asterisk marks SPECIAL vars; $ is just no-named Field0 */ +enum { + CONVFMT, OFMT, FS, OFS, + ORS, RS, RT, FILENAME, + SUBSEP, ARGIND, ARGC, ARGV, + ERRNO, FNR, + NR, NF, IGNORECASE, + ENVIRON, F0, NUM_INTERNAL_VARS +}; + +static const char vNames[] ALIGN1 = + "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0" + "ORS\0" "RS\0*" "RT\0" "FILENAME\0" + "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0" + "ERRNO\0" "FNR\0" + "NR\0" "NF\0*" "IGNORECASE\0*" + "ENVIRON\0" "$\0*" "\0"; + +static const char vValues[] ALIGN1 = + "%.6g\0" "%.6g\0" " \0" " \0" + "\n\0" "\n\0" "\0" "\0" + "\034\0" + "\377"; + +/* hash size may grow to these values */ +#define FIRST_PRIME 61 +static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 }; + + +/* Globals. Split in two parts so that first one is addressed + * with (mostly short) negative offsets. + * NB: it's unsafe to put members of type "double" + * into globals2 (gcc may fail to align them). + */ +struct globals { + double t_double; + chain beginseq, mainseq, endseq; + chain *seq; + node *break_ptr, *continue_ptr; + rstream *iF; + xhash *vhash, *ahash, *fdhash, *fnhash; + const char *g_progname; + int g_lineno; + int nfields; + int maxfields; /* used in fsrealloc() only */ + var *Fields; + nvblock *g_cb; + char *g_pos; + char *g_buf; + smallint icase; + smallint exiting; + smallint nextrec; + smallint nextfile; + smallint is_f0_split; +}; +struct globals2 { + uint32_t t_info; /* often used */ + uint32_t t_tclass; + char *t_string; + int t_lineno; + int t_rollback; + + var *intvar[NUM_INTERNAL_VARS]; /* often used */ + + /* former statics from various functions */ + char *split_f0__fstrings; + + uint32_t next_token__save_tclass; + uint32_t next_token__save_info; + uint32_t next_token__ltclass; + smallint next_token__concat_inserted; + + smallint next_input_file__files_happen; + rstream next_input_file__rsm; + + var *evaluate__fnargs; + unsigned evaluate__seed; + regex_t evaluate__sreg; + + var ptest__v; + + tsplitter exec_builtin__tspl; + + /* biggest and least used members go last */ + tsplitter fsplitter, rsplitter; +}; +#define G1 (ptr_to_globals[-1]) +#define G (*(struct globals2 *)ptr_to_globals) +/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ +/*char G1size[sizeof(G1)]; - 0x74 */ +/*char Gsize[sizeof(G)]; - 0x1c4 */ +/* Trying to keep most of members accessible with short offsets: */ +/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ +#define t_double (G1.t_double ) +#define beginseq (G1.beginseq ) +#define mainseq (G1.mainseq ) +#define endseq (G1.endseq ) +#define seq (G1.seq ) +#define break_ptr (G1.break_ptr ) +#define continue_ptr (G1.continue_ptr) +#define iF (G1.iF ) +#define vhash (G1.vhash ) +#define ahash (G1.ahash ) +#define fdhash (G1.fdhash ) +#define fnhash (G1.fnhash ) +#define g_progname (G1.g_progname ) +#define g_lineno (G1.g_lineno ) +#define nfields (G1.nfields ) +#define maxfields (G1.maxfields ) +#define Fields (G1.Fields ) +#define g_cb (G1.g_cb ) +#define g_pos (G1.g_pos ) +#define g_buf (G1.g_buf ) +#define icase (G1.icase ) +#define exiting (G1.exiting ) +#define nextrec (G1.nextrec ) +#define nextfile (G1.nextfile ) +#define is_f0_split (G1.is_f0_split ) +#define t_info (G.t_info ) +#define t_tclass (G.t_tclass ) +#define t_string (G.t_string ) +#define t_lineno (G.t_lineno ) +#define t_rollback (G.t_rollback ) +#define intvar (G.intvar ) +#define fsplitter (G.fsplitter ) +#define rsplitter (G.rsplitter ) +#define INIT_G() do { \ + SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \ + G.next_token__ltclass = TC_OPTERM; \ + G.evaluate__seed = 1; \ +} while (0) + + +/* function prototypes */ +static void handle_special(var *); +static node *parse_expr(uint32_t); +static void chain_group(void); +static var *evaluate(node *, var *); +static rstream *next_input_file(void); +static int fmt_num(char *, int, const char *, double, int); +static int awk_exit(int) NORETURN; + +/* ---- error handling ---- */ + +static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; +static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; +static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; +static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; +static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier"; +static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin"; +static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array"; +static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error"; +static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; +#if !ENABLE_FEATURE_AWK_LIBM +static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; +#endif + +static void zero_out_var(var * vp) +{ + memset(vp, 0, sizeof(*vp)); +} + +static void syntax_error(const char *const message) NORETURN; +static void syntax_error(const char *const message) +{ + bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); +} + +/* ---- hash stuff ---- */ + +static unsigned hashidx(const char *name) +{ + unsigned idx = 0; + + while (*name) idx = *name++ + (idx << 6) - idx; + return idx; +} + +/* create new hash */ +static xhash *hash_init(void) +{ + xhash *newhash; + + newhash = xzalloc(sizeof(xhash)); + newhash->csize = FIRST_PRIME; + newhash->items = xzalloc(newhash->csize * sizeof(hash_item *)); + + return newhash; +} + +/* find item in hash, return ptr to data, NULL if not found */ +static void *hash_search(xhash *hash, const char *name) +{ + hash_item *hi; + + hi = hash->items [ hashidx(name) % hash->csize ]; + while (hi) { + if (strcmp(hi->name, name) == 0) + return &(hi->data); + hi = hi->next; + } + return NULL; +} + +/* grow hash if it becomes too big */ +static void hash_rebuild(xhash *hash) +{ + unsigned newsize, i, idx; + hash_item **newitems, *hi, *thi; + + if (hash->nprime == ARRAY_SIZE(PRIMES)) + return; + + newsize = PRIMES[hash->nprime++]; + newitems = xzalloc(newsize * sizeof(hash_item *)); + + for (i = 0; i < hash->csize; i++) { + hi = hash->items[i]; + while (hi) { + thi = hi; + hi = thi->next; + idx = hashidx(thi->name) % newsize; + thi->next = newitems[idx]; + newitems[idx] = thi; + } + } + + free(hash->items); + hash->csize = newsize; + hash->items = newitems; +} + +/* find item in hash, add it if necessary. Return ptr to data */ +static void *hash_find(xhash *hash, const char *name) +{ + hash_item *hi; + unsigned idx; + int l; + + hi = hash_search(hash, name); + if (!hi) { + if (++hash->nel / hash->csize > 10) + hash_rebuild(hash); + + l = strlen(name) + 1; + hi = xzalloc(sizeof(hash_item) + l); + memcpy(hi->name, name, l); + + idx = hashidx(name) % hash->csize; + hi->next = hash->items[idx]; + hash->items[idx] = hi; + hash->glen += l; + } + return &(hi->data); +} + +#define findvar(hash, name) ((var*) hash_find((hash), (name))) +#define newvar(name) ((var*) hash_find(vhash, (name))) +#define newfile(name) ((rstream*)hash_find(fdhash, (name))) +#define newfunc(name) ((func*) hash_find(fnhash, (name))) + +static void hash_remove(xhash *hash, const char *name) +{ + hash_item *hi, **phi; + + phi = &(hash->items[hashidx(name) % hash->csize]); + while (*phi) { + hi = *phi; + if (strcmp(hi->name, name) == 0) { + hash->glen -= (strlen(name) + 1); + hash->nel--; + *phi = hi->next; + free(hi); + break; + } + phi = &(hi->next); + } +} + +/* ------ some useful functions ------ */ + +static void skip_spaces(char **s) +{ + char *p = *s; + + while (1) { + if (*p == '\\' && p[1] == '\n') { + p++; + t_lineno++; + } else if (*p != ' ' && *p != '\t') { + break; + } + p++; + } + *s = p; +} + +static char *nextword(char **s) +{ + char *p = *s; + + while (*(*s)++) /* */; + + return p; +} + +static char nextchar(char **s) +{ + char c, *pps; + + c = *((*s)++); + pps = *s; + if (c == '\\') c = bb_process_escape_sequence((const char**)s); + if (c == '\\' && *s == pps) c = *((*s)++); + return c; +} + +static ALWAYS_INLINE int isalnum_(int c) +{ + return (isalnum(c) || c == '_'); +} + +static double my_strtod(char **pp) +{ +#if ENABLE_DESKTOP + if ((*pp)[0] == '0' + && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1])) + ) { + return strtoull(*pp, pp, 0); + } +#endif + return strtod(*pp, pp); +} + +/* -------- working with variables (set/get/copy/etc) -------- */ + +static xhash *iamarray(var *v) +{ + var *a = v; + + while (a->type & VF_CHILD) + a = a->x.parent; + + if (!(a->type & VF_ARRAY)) { + a->type |= VF_ARRAY; + a->x.array = hash_init(); + } + return a->x.array; +} + +static void clear_array(xhash *array) +{ + unsigned i; + hash_item *hi, *thi; + + for (i = 0; i < array->csize; i++) { + hi = array->items[i]; + while (hi) { + thi = hi; + hi = hi->next; + free(thi->data.v.string); + free(thi); + } + array->items[i] = NULL; + } + array->glen = array->nel = 0; +} + +/* clear a variable */ +static var *clrvar(var *v) +{ + if (!(v->type & VF_FSTR)) + free(v->string); + + v->type &= VF_DONTTOUCH; + v->type |= VF_DIRTY; + v->string = NULL; + return v; +} + +/* assign string value to variable */ +static var *setvar_p(var *v, char *value) +{ + clrvar(v); + v->string = value; + handle_special(v); + return v; +} + +/* same as setvar_p but make a copy of string */ +static var *setvar_s(var *v, const char *value) +{ + return setvar_p(v, (value && *value) ? xstrdup(value) : NULL); +} + +/* same as setvar_s but set USER flag */ +static var *setvar_u(var *v, const char *value) +{ + setvar_s(v, value); + v->type |= VF_USER; + return v; +} + +/* set array element to user string */ +static void setari_u(var *a, int idx, const char *s) +{ + char sidx[sizeof(int)*3 + 1]; + var *v; + + sprintf(sidx, "%d", idx); + v = findvar(iamarray(a), sidx); + setvar_u(v, s); +} + +/* assign numeric value to variable */ +static var *setvar_i(var *v, double value) +{ + clrvar(v); + v->type |= VF_NUMBER; + v->number = value; + handle_special(v); + return v; +} + +static const char *getvar_s(var *v) +{ + /* if v is numeric and has no cached string, convert it to string */ + if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) { + fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE); + v->string = xstrdup(g_buf); + v->type |= VF_CACHED; + } + return (v->string == NULL) ? "" : v->string; +} + +static double getvar_i(var *v) +{ + char *s; + + if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) { + v->number = 0; + s = v->string; + if (s && *s) { + v->number = my_strtod(&s); + if (v->type & VF_USER) { + skip_spaces(&s); + if (*s != '\0') + v->type &= ~VF_USER; + } + } else { + v->type &= ~VF_USER; + } + v->type |= VF_CACHED; + } + return v->number; +} + +/* Used for operands of bitwise ops */ +static unsigned long getvar_i_int(var *v) +{ + double d = getvar_i(v); + + /* Casting doubles to longs is undefined for values outside + * of target type range. Try to widen it as much as possible */ + if (d >= 0) + return (unsigned long)d; + /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */ + return - (long) (unsigned long) (-d); +} + +static var *copyvar(var *dest, const var *src) +{ + if (dest != src) { + clrvar(dest); + dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR)); + dest->number = src->number; + if (src->string) + dest->string = xstrdup(src->string); + } + handle_special(dest); + return dest; +} + +static var *incvar(var *v) +{ + return setvar_i(v, getvar_i(v) + 1.); +} + +/* return true if v is number or numeric string */ +static int is_numeric(var *v) +{ + getvar_i(v); + return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY)); +} + +/* return 1 when value of v corresponds to true, 0 otherwise */ +static int istrue(var *v) +{ + if (is_numeric(v)) + return (v->number == 0) ? 0 : 1; + return (v->string && *(v->string)) ? 1 : 0; +} + +/* temporary variables allocator. Last allocated should be first freed */ +static var *nvalloc(int n) +{ + nvblock *pb = NULL; + var *v, *r; + int size; + + while (g_cb) { + pb = g_cb; + if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break; + g_cb = g_cb->next; + } + + if (!g_cb) { + size = (n <= MINNVBLOCK) ? MINNVBLOCK : n; + g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var)); + g_cb->size = size; + g_cb->pos = g_cb->nv; + g_cb->prev = pb; + /*g_cb->next = NULL; - xzalloc did it */ + if (pb) pb->next = g_cb; + } + + v = r = g_cb->pos; + g_cb->pos += n; + + while (v < g_cb->pos) { + v->type = 0; + v->string = NULL; + v++; + } + + return r; +} + +static void nvfree(var *v) +{ + var *p; + + if (v < g_cb->nv || v >= g_cb->pos) + syntax_error(EMSG_INTERNAL_ERROR); + + for (p = v; p < g_cb->pos; p++) { + if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { + clear_array(iamarray(p)); + free(p->x.array->items); + free(p->x.array); + } + if (p->type & VF_WALK) + free(p->x.walker); + + clrvar(p); + } + + g_cb->pos = v; + while (g_cb->prev && g_cb->pos == g_cb->nv) { + g_cb = g_cb->prev; + } +} + +/* ------- awk program text parsing ------- */ + +/* Parse next token pointed by global pos, place results into global ttt. + * If token isn't expected, give away. Return token class + */ +static uint32_t next_token(uint32_t expected) +{ +#define concat_inserted (G.next_token__concat_inserted) +#define save_tclass (G.next_token__save_tclass) +#define save_info (G.next_token__save_info) +/* Initialized to TC_OPTERM: */ +#define ltclass (G.next_token__ltclass) + + char *p, *pp, *s; + const char *tl; + uint32_t tc; + const uint32_t *ti; + int l; + + if (t_rollback) { + t_rollback = FALSE; + + } else if (concat_inserted) { + concat_inserted = FALSE; + t_tclass = save_tclass; + t_info = save_info; + + } else { + p = g_pos; + readnext: + skip_spaces(&p); + g_lineno = t_lineno; + if (*p == '#') + while (*p != '\n' && *p != '\0') + p++; + + if (*p == '\n') + t_lineno++; + + if (*p == '\0') { + tc = TC_EOF; + + } else if (*p == '\"') { + /* it's a string */ + t_string = s = ++p; + while (*p != '\"') { + if (*p == '\0' || *p == '\n') + syntax_error(EMSG_UNEXP_EOS); + *(s++) = nextchar(&p); + } + p++; + *s = '\0'; + tc = TC_STRING; + + } else if ((expected & TC_REGEXP) && *p == '/') { + /* it's regexp */ + t_string = s = ++p; + while (*p != '/') { + if (*p == '\0' || *p == '\n') + syntax_error(EMSG_UNEXP_EOS); + *s = *p++; + if (*s++ == '\\') { + pp = p; + *(s-1) = bb_process_escape_sequence((const char **)&p); + if (*pp == '\\') + *s++ = '\\'; + if (p == pp) + *s++ = *p++; + } + } + p++; + *s = '\0'; + tc = TC_REGEXP; + + } else if (*p == '.' || isdigit(*p)) { + /* it's a number */ + t_double = my_strtod(&p); + if (*p == '.') + syntax_error(EMSG_UNEXP_TOKEN); + tc = TC_NUMBER; + + } else { + /* search for something known */ + tl = tokenlist; + tc = 0x00000001; + ti = tokeninfo; + while (*tl) { + l = *(tl++); + if (l == NTCC) { + tc <<= 1; + continue; + } + /* if token class is expected, token + * matches and it's not a longer word, + * then this is what we are looking for + */ + if ((tc & (expected | TC_WORD | TC_NEWLINE)) + && *tl == *p && strncmp(p, tl, l) == 0 + && !((tc & TC_WORD) && isalnum_(p[l])) + ) { + t_info = *ti; + p += l; + break; + } + ti++; + tl += l; + } + + if (!*tl) { + /* it's a name (var/array/function), + * otherwise it's something wrong + */ + if (!isalnum_(*p)) + syntax_error(EMSG_UNEXP_TOKEN); + + t_string = --p; + while (isalnum_(*(++p))) { + *(p-1) = *p; + } + *(p-1) = '\0'; + tc = TC_VARIABLE; + /* also consume whitespace between functionname and bracket */ + if (!(expected & TC_VARIABLE)) + skip_spaces(&p); + if (*p == '(') { + tc = TC_FUNCTION; + } else { + if (*p == '[') { + p++; + tc = TC_ARRAY; + } + } + } + } + g_pos = p; + + /* skipping newlines in some cases */ + if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) + goto readnext; + + /* insert concatenation operator when needed */ + if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) { + concat_inserted = TRUE; + save_tclass = tc; + save_info = t_info; + tc = TC_BINOP; + t_info = OC_CONCAT | SS | P(35); + } + + t_tclass = tc; + } + ltclass = t_tclass; + + /* Are we ready for this? */ + if (!(ltclass & expected)) + syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? + EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); + + return ltclass; +#undef concat_inserted +#undef save_tclass +#undef save_info +#undef ltclass +} + +static void rollback_token(void) +{ + t_rollback = TRUE; +} + +static node *new_node(uint32_t info) +{ + node *n; + + n = xzalloc(sizeof(node)); + n->info = info; + n->lineno = g_lineno; + return n; +} + +static node *mk_re_node(const char *s, node *n, regex_t *re) +{ + n->info = OC_REGEXP; + n->l.re = re; + n->r.ire = re + 1; + xregcomp(re, s, REG_EXTENDED); + xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); + + return n; +} + +static node *condition(void) +{ + next_token(TC_SEQSTART); + return parse_expr(TC_SEQTERM); +} + +/* parse expression terminated by given argument, return ptr + * to built subtree. Terminator is eaten by parse_expr */ +static node *parse_expr(uint32_t iexp) +{ + node sn; + node *cn = &sn; + node *vn, *glptr; + uint32_t tc, xtc; + var *v; + + sn.info = PRIMASK; + sn.r.n = glptr = NULL; + xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp; + + while (!((tc = next_token(xtc)) & iexp)) { + if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { + /* input redirection (<) attached to glptr node */ + cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); + cn->a.n = glptr; + xtc = TC_OPERAND | TC_UOPPRE; + glptr = NULL; + + } else if (tc & (TC_BINOP | TC_UOPPOST)) { + /* for binary and postfix-unary operators, jump back over + * previous operators with higher priority */ + vn = cn; + while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) + || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) ) + vn = vn->a.n; + if ((t_info & OPCLSMASK) == OC_TERNARY) + t_info += P(6); + cn = vn->a.n->r.n = new_node(t_info); + cn->a.n = vn->a.n; + if (tc & TC_BINOP) { + cn->l.n = vn; + xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; + if ((t_info & OPCLSMASK) == OC_PGETLINE) { + /* it's a pipe */ + next_token(TC_GETLINE); + /* give maximum priority to this pipe */ + cn->info &= ~PRIMASK; + xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; + } + } else { + cn->r.n = vn; + xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; + } + vn->a.n = cn; + + } else { + /* for operands and prefix-unary operators, attach them + * to last node */ + vn = cn; + cn = vn->r.n = new_node(t_info); + cn->a.n = vn; + xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; + if (tc & (TC_OPERAND | TC_REGEXP)) { + xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; + /* one should be very careful with switch on tclass - + * only simple tclasses should be used! */ + switch (tc) { + case TC_VARIABLE: + case TC_ARRAY: + cn->info = OC_VAR; + v = hash_search(ahash, t_string); + if (v != NULL) { + cn->info = OC_FNARG; + cn->l.i = v->x.aidx; + } else { + cn->l.v = newvar(t_string); + } + if (tc & TC_ARRAY) { + cn->info |= xS; + cn->r.n = parse_expr(TC_ARRTERM); + } + break; + + case TC_NUMBER: + case TC_STRING: + cn->info = OC_VAR; + v = cn->l.v = xzalloc(sizeof(var)); + if (tc & TC_NUMBER) + setvar_i(v, t_double); + else + setvar_s(v, t_string); + break; + + case TC_REGEXP: + mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); + break; + + case TC_FUNCTION: + cn->info = OC_FUNC; + cn->r.f = newfunc(t_string); + cn->l.n = condition(); + break; + + case TC_SEQSTART: + cn = vn->r.n = parse_expr(TC_SEQTERM); + cn->a.n = vn; + break; + + case TC_GETLINE: + glptr = cn; + xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; + break; + + case TC_BUILTIN: + cn->l.n = condition(); + break; + } + } + } + } + return sn.r.n; +} + +/* add node to chain. Return ptr to alloc'd node */ +static node *chain_node(uint32_t info) +{ + node *n; + + if (!seq->first) + seq->first = seq->last = new_node(0); + + if (seq->programname != g_progname) { + seq->programname = g_progname; + n = chain_node(OC_NEWSOURCE); + n->l.s = xstrdup(g_progname); + } + + n = seq->last; + n->info = info; + seq->last = n->a.n = new_node(OC_DONE); + + return n; +} + +static void chain_expr(uint32_t info) +{ + node *n; + + n = chain_node(info); + n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); + if (t_tclass & TC_GRPTERM) + rollback_token(); +} + +static node *chain_loop(node *nn) +{ + node *n, *n2, *save_brk, *save_cont; + + save_brk = break_ptr; + save_cont = continue_ptr; + + n = chain_node(OC_BR | Vx); + continue_ptr = new_node(OC_EXEC); + break_ptr = new_node(OC_EXEC); + chain_group(); + n2 = chain_node(OC_EXEC | Vx); + n2->l.n = nn; + n2->a.n = n; + continue_ptr->a.n = n2; + break_ptr->a.n = n->r.n = seq->last; + + continue_ptr = save_cont; + break_ptr = save_brk; + + return n; +} + +/* parse group and attach it to chain */ +static void chain_group(void) +{ + uint32_t c; + node *n, *n2, *n3; + + do { + c = next_token(TC_GRPSEQ); + } while (c & TC_NEWLINE); + + if (c & TC_GRPSTART) { + while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { + if (t_tclass & TC_NEWLINE) continue; + rollback_token(); + chain_group(); + } + } else if (c & (TC_OPSEQ | TC_OPTERM)) { + rollback_token(); + chain_expr(OC_EXEC | Vx); + } else { /* TC_STATEMNT */ + switch (t_info & OPCLSMASK) { + case ST_IF: + n = chain_node(OC_BR | Vx); + n->l.n = condition(); + chain_group(); + n2 = chain_node(OC_EXEC); + n->r.n = seq->last; + if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { + chain_group(); + n2->a.n = seq->last; + } else { + rollback_token(); + } + break; + + case ST_WHILE: + n2 = condition(); + n = chain_loop(NULL); + n->l.n = n2; + break; + + case ST_DO: + n2 = chain_node(OC_EXEC); + n = chain_loop(NULL); + n2->a.n = n->a.n; + next_token(TC_WHILE); + n->l.n = condition(); + break; + + case ST_FOR: + next_token(TC_SEQSTART); + n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); + if (t_tclass & TC_SEQTERM) { /* for-in */ + if ((n2->info & OPCLSMASK) != OC_IN) + syntax_error(EMSG_UNEXP_TOKEN); + n = chain_node(OC_WALKINIT | VV); + n->l.n = n2->l.n; + n->r.n = n2->r.n; + n = chain_loop(NULL); + n->info = OC_WALKNEXT | Vx; + n->l.n = n2->l.n; + } else { /* for (;;) */ + n = chain_node(OC_EXEC | Vx); + n->l.n = n2; + n2 = parse_expr(TC_SEMICOL); + n3 = parse_expr(TC_SEQTERM); + n = chain_loop(n3); + n->l.n = n2; + if (!n2) + n->info = OC_EXEC; + } + break; + + case OC_PRINT: + case OC_PRINTF: + n = chain_node(t_info); + n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); + if (t_tclass & TC_OUTRDR) { + n->info |= t_info; + n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); + } + if (t_tclass & TC_GRPTERM) + rollback_token(); + break; + + case OC_BREAK: + n = chain_node(OC_EXEC); + n->a.n = break_ptr; + break; + + case OC_CONTINUE: + n = chain_node(OC_EXEC); + n->a.n = continue_ptr; + break; + + /* delete, next, nextfile, return, exit */ + default: + chain_expr(t_info); + } + } +} + +static void parse_program(char *p) +{ + uint32_t tclass; + node *cn; + func *f; + var *v; + + g_pos = p; + t_lineno = 1; + while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | + TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { + + if (tclass & TC_OPTERM) + continue; + + seq = &mainseq; + if (tclass & TC_BEGIN) { + seq = &beginseq; + chain_group(); + + } else if (tclass & TC_END) { + seq = &endseq; + chain_group(); + + } else if (tclass & TC_FUNCDECL) { + next_token(TC_FUNCTION); + g_pos++; + f = newfunc(t_string); + f->body.first = NULL; + f->nargs = 0; + while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) { + v = findvar(ahash, t_string); + v->x.aidx = (f->nargs)++; + + if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) + break; + } + seq = &(f->body); + chain_group(); + clear_array(ahash); + + } else if (tclass & TC_OPSEQ) { + rollback_token(); + cn = chain_node(OC_TEST); + cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); + if (t_tclass & TC_GRPSTART) { + rollback_token(); + chain_group(); + } else { + chain_node(OC_PRINT); + } + cn->r.n = mainseq.last; + + } else /* if (tclass & TC_GRPSTART) */ { + rollback_token(); + chain_group(); + } + } +} + + +/* -------- program execution part -------- */ + +static node *mk_splitter(const char *s, tsplitter *spl) +{ + regex_t *re, *ire; + node *n; + + re = &spl->re[0]; + ire = &spl->re[1]; + n = &spl->n; + if ((n->info & OPCLSMASK) == OC_REGEXP) { + regfree(re); + regfree(ire); // TODO: nuke ire, use re+1? + } + if (strlen(s) > 1) { + mk_re_node(s, n, re); + } else { + n->info = (uint32_t) *s; + } + + return n; +} + +/* use node as a regular expression. Supplied with node ptr and regex_t + * storage space. Return ptr to regex (if result points to preg, it should + * be later regfree'd manually + */ +static regex_t *as_regex(node *op, regex_t *preg) +{ + var *v; + const char *s; + + if ((op->info & OPCLSMASK) == OC_REGEXP) { + return icase ? op->r.ire : op->l.re; + } + v = nvalloc(1); + s = getvar_s(evaluate(op, v)); + xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED); + nvfree(v); + return preg; +} + +/* gradually increasing buffer */ +static void qrealloc(char **b, int n, int *size) +{ + if (!*b || n >= *size) { + *size = n + (n>>1) + 80; + *b = xrealloc(*b, *size); + } +} + +/* resize field storage space */ +static void fsrealloc(int size) +{ + int i; + + if (size >= maxfields) { + i = maxfields; + maxfields = size + 16; + Fields = xrealloc(Fields, maxfields * sizeof(var)); + for (; i < maxfields; i++) { + Fields[i].type = VF_SPECIAL; + Fields[i].string = NULL; + } + } + + if (size < nfields) { + for (i = size; i < nfields; i++) { + clrvar(Fields + i); + } + } + nfields = size; +} + +static int awk_split(const char *s, node *spl, char **slist) +{ + int l, n = 0; + char c[4]; + char *s1; + regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... + + /* in worst case, each char would be a separate field */ + *slist = s1 = xzalloc(strlen(s) * 2 + 3); + strcpy(s1, s); + + c[0] = c[1] = (char)spl->info; + c[2] = c[3] = '\0'; + if (*getvar_s(intvar[RS]) == '\0') + c[2] = '\n'; + + if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ + if (!*s) + return n; /* "": zero fields */ + n++; /* at least one field will be there */ + do { + l = strcspn(s, c+2); /* len till next NUL or \n */ + if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 + && pmatch[0].rm_so <= l + ) { + l = pmatch[0].rm_so; + if (pmatch[0].rm_eo == 0) { + l++; + pmatch[0].rm_eo++; + } + n++; /* we saw yet another delimiter */ + } else { + pmatch[0].rm_eo = l; + if (s[l]) pmatch[0].rm_eo++; + } + memcpy(s1, s, l); + s1[l] = '\0'; + nextword(&s1); + s += pmatch[0].rm_eo; + } while (*s); + return n; + } + if (c[0] == '\0') { /* null split */ + while (*s) { + *s1++ = *s++; + *s1++ = '\0'; + n++; + } + return n; + } + if (c[0] != ' ') { /* single-character split */ + if (icase) { + c[0] = toupper(c[0]); + c[1] = tolower(c[1]); + } + if (*s1) n++; + while ((s1 = strpbrk(s1, c))) { + *s1++ = '\0'; + n++; + } + return n; + } + /* space split */ + while (*s) { + s = skip_whitespace(s); + if (!*s) break; + n++; + while (*s && !isspace(*s)) + *s1++ = *s++; + *s1++ = '\0'; + } + return n; +} + +static void split_f0(void) +{ +/* static char *fstrings; */ +#define fstrings (G.split_f0__fstrings) + + int i, n; + char *s; + + if (is_f0_split) + return; + + is_f0_split = TRUE; + free(fstrings); + fsrealloc(0); + n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings); + fsrealloc(n); + s = fstrings; + for (i = 0; i < n; i++) { + Fields[i].string = nextword(&s); + Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY); + } + + /* set NF manually to avoid side effects */ + clrvar(intvar[NF]); + intvar[NF]->type = VF_NUMBER | VF_SPECIAL; + intvar[NF]->number = nfields; +#undef fstrings +} + +/* perform additional actions when some internal variables changed */ +static void handle_special(var *v) +{ + int n; + char *b; + const char *sep, *s; + int sl, l, len, i, bsize; + + if (!(v->type & VF_SPECIAL)) + return; + + if (v == intvar[NF]) { + n = (int)getvar_i(v); + fsrealloc(n); + + /* recalculate $0 */ + sep = getvar_s(intvar[OFS]); + sl = strlen(sep); + b = NULL; + len = 0; + for (i = 0; i < n; i++) { + s = getvar_s(&Fields[i]); + l = strlen(s); + if (b) { + memcpy(b+len, sep, sl); + len += sl; + } + qrealloc(&b, len+l+sl, &bsize); + memcpy(b+len, s, l); + len += l; + } + if (b) + b[len] = '\0'; + setvar_p(intvar[F0], b); + is_f0_split = TRUE; + + } else if (v == intvar[F0]) { + is_f0_split = FALSE; + + } else if (v == intvar[FS]) { + mk_splitter(getvar_s(v), &fsplitter); + + } else if (v == intvar[RS]) { + mk_splitter(getvar_s(v), &rsplitter); + + } else if (v == intvar[IGNORECASE]) { + icase = istrue(v); + + } else { /* $n */ + n = getvar_i(intvar[NF]); + setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1); + /* right here v is invalid. Just to note... */ + } +} + +/* step through func/builtin/etc arguments */ +static node *nextarg(node **pn) +{ + node *n; + + n = *pn; + if (n && (n->info & OPCLSMASK) == OC_COMMA) { + *pn = n->r.n; + n = n->l.n; + } else { + *pn = NULL; + } + return n; +} + +static void hashwalk_init(var *v, xhash *array) +{ + char **w; + hash_item *hi; + unsigned i; + + if (v->type & VF_WALK) + free(v->x.walker); + + v->type |= VF_WALK; + w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen); + w[0] = w[1] = (char *)(w + 2); + for (i = 0; i < array->csize; i++) { + hi = array->items[i]; + while (hi) { + strcpy(*w, hi->name); + nextword(w); + hi = hi->next; + } + } +} + +static int hashwalk_next(var *v) +{ + char **w; + + w = v->x.walker; + if (w[1] == w[0]) + return FALSE; + + setvar_s(v, nextword(w+1)); + return TRUE; +} + +/* evaluate node, return 1 when result is true, 0 otherwise */ +static int ptest(node *pattern) +{ + /* ptest__v is "static": to save stack space? */ + return istrue(evaluate(pattern, &G.ptest__v)); +} + +/* read next record from stream rsm into a variable v */ +static int awk_getline(rstream *rsm, var *v) +{ + char *b; + regmatch_t pmatch[2]; + int a, p, pp=0, size; + int fd, so, eo, r, rp; + char c, *m, *s; + + /* we're using our own buffer since we need access to accumulating + * characters + */ + fd = fileno(rsm->F); + m = rsm->buffer; + a = rsm->adv; + p = rsm->pos; + size = rsm->size; + c = (char) rsplitter.n.info; + rp = 0; + + if (!m) qrealloc(&m, 256, &size); + do { + b = m + a; + so = eo = p; + r = 1; + if (p > 0) { + if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) { + if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, + b, 1, pmatch, 0) == 0) { + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + if (b[eo] != '\0') + break; + } + } else if (c != '\0') { + s = strchr(b+pp, c); + if (!s) s = memchr(b+pp, '\0', p - pp); + if (s) { + so = eo = s-b; + eo++; + break; + } + } else { + while (b[rp] == '\n') + rp++; + s = strstr(b+rp, "\n\n"); + if (s) { + so = eo = s-b; + while (b[eo] == '\n') eo++; + if (b[eo] != '\0') + break; + } + } + } + + if (a > 0) { + memmove(m, (const void *)(m+a), p+1); + b = m; + a = 0; + } + + qrealloc(&m, a+p+128, &size); + b = m + a; + pp = p; + p += safe_read(fd, b+p, size-p-1); + if (p < pp) { + p = 0; + r = 0; + setvar_i(intvar[ERRNO], errno); + } + b[p] = '\0'; + + } while (p > pp); + + if (p == 0) { + r--; + } else { + c = b[so]; b[so] = '\0'; + setvar_s(v, b+rp); + v->type |= VF_USER; + b[so] = c; + c = b[eo]; b[eo] = '\0'; + setvar_s(intvar[RT], b+so); + b[eo] = c; + } + + rsm->buffer = m; + rsm->adv = a + eo; + rsm->pos = p - eo; + rsm->size = size; + + return r; +} + +static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) +{ + int r = 0; + char c; + const char *s = format; + + if (int_as_int && n == (int)n) { + r = snprintf(b, size, "%d", (int)n); + } else { + do { c = *s; } while (c && *++s); + if (strchr("diouxX", c)) { + r = snprintf(b, size, format, (int)n); + } else if (strchr("eEfgG", c)) { + r = snprintf(b, size, format, n); + } else { + syntax_error(EMSG_INV_FMT); + } + } + return r; +} + + +/* formatted output into an allocated buffer, return ptr to buffer */ +static char *awk_printf(node *n) +{ + char *b = NULL; + char *fmt, *s, *f; + const char *s1; + int i, j, incr, bsize; + char c, c1; + var *v, *arg; + + v = nvalloc(1); + fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); + + i = 0; + while (*f) { + s = f; + while (*f && (*f != '%' || *(++f) == '%')) + f++; + while (*f && !isalpha(*f)) { + if (*f == '*') + syntax_error("%*x formats are not supported"); + f++; + } + + incr = (f - s) + MAXVARFMT; + qrealloc(&b, incr + i, &bsize); + c = *f; + if (c != '\0') f++; + c1 = *f; + *f = '\0'; + arg = evaluate(nextarg(&n), v); + + j = i; + if (c == 'c' || !c) { + i += sprintf(b+i, s, is_numeric(arg) ? + (char)getvar_i(arg) : *getvar_s(arg)); + } else if (c == 's') { + s1 = getvar_s(arg); + qrealloc(&b, incr+i+strlen(s1), &bsize); + i += sprintf(b+i, s, s1); + } else { + i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE); + } + *f = c1; + + /* if there was an error while sprintf, return value is negative */ + if (i < j) i = j; + } + + b = xrealloc(b, i + 1); + free(fmt); + nvfree(v); + b[i] = '\0'; + return b; +} + +/* common substitution routine + * replace (nm) substring of (src) that match (n) with (repl), store + * result into (dest), return number of substitutions. If nm=0, replace + * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable + * subexpression matching (\1-\9) + */ +static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) +{ + char *ds = NULL; + const char *s; + const char *sp; + int c, i, j, di, rl, so, eo, nbs, n, dssize; + regmatch_t pmatch[10]; + regex_t sreg, *re; + + re = as_regex(rn, &sreg); + if (!src) src = intvar[F0]; + if (!dest) dest = intvar[F0]; + + i = di = 0; + sp = getvar_s(src); + rl = strlen(repl); + while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) { + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + + qrealloc(&ds, di + eo + rl, &dssize); + memcpy(ds + di, sp, eo); + di += eo; + if (++i >= nm) { + /* replace */ + di -= (eo - so); + nbs = 0; + for (s = repl; *s; s++) { + ds[di++] = c = *s; + if (c == '\\') { + nbs++; + continue; + } + if (c == '&' || (ex && c >= '0' && c <= '9')) { + di -= ((nbs + 3) >> 1); + j = 0; + if (c != '&') { + j = c - '0'; + nbs++; + } + if (nbs % 2) { + ds[di++] = c; + } else { + n = pmatch[j].rm_eo - pmatch[j].rm_so; + qrealloc(&ds, di + rl + n, &dssize); + memcpy(ds + di, sp + pmatch[j].rm_so, n); + di += n; + } + } + nbs = 0; + } + } + + sp += eo; + if (i == nm) break; + if (eo == so) { + ds[di] = *sp++; + if (!ds[di++]) break; + } + } + + qrealloc(&ds, di + strlen(sp), &dssize); + strcpy(ds + di, sp); + setvar_p(dest, ds); + if (re == &sreg) regfree(re); + return i; +} + +static var *exec_builtin(node *op, var *res) +{ +#define tspl (G.exec_builtin__tspl) + + int (*to_xxx)(int); + var *tv; + node *an[4]; + var *av[4]; + const char *as[4]; + regmatch_t pmatch[2]; + regex_t sreg, *re; + node *spl; + uint32_t isr, info; + int nargs; + time_t tt; + char *s, *s1; + int i, l, ll, n; + + tv = nvalloc(4); + isr = info = op->info; + op = op->l.n; + + av[2] = av[3] = NULL; + for (i = 0; i < 4 && op; i++) { + an[i] = nextarg(&op); + if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]); + if (isr & 0x08000000) as[i] = getvar_s(av[i]); + isr >>= 1; + } + + nargs = i; + if ((uint32_t)nargs < (info >> 30)) + syntax_error(EMSG_TOO_FEW_ARGS); + + switch (info & OPNMASK) { + + case B_a2: +#if ENABLE_FEATURE_AWK_LIBM + setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1]))); +#else + syntax_error(EMSG_NO_MATH); +#endif + break; + + case B_sp: + if (nargs > 2) { + spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? + an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); + } else { + spl = &fsplitter.n; + } + + n = awk_split(as[0], spl, &s); + s1 = s; + clear_array(iamarray(av[1])); + for (i = 1; i <= n; i++) + setari_u(av[1], i, nextword(&s1)); + free(s); + setvar_i(res, n); + break; + + case B_ss: + l = strlen(as[0]); + i = getvar_i(av[1]) - 1; + if (i > l) i = l; + if (i < 0) i = 0; + n = (nargs > 2) ? getvar_i(av[2]) : l-i; + if (n < 0) n = 0; + s = xstrndup(as[0]+i, n); + setvar_p(res, s); + break; + + /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5: + * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */ + case B_an: + setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1])); + break; + + case B_co: + setvar_i(res, ~getvar_i_int(av[0])); + break; + + case B_ls: + setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1])); + break; + + case B_or: + setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1])); + break; + + case B_rs: + setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1])); + break; + + case B_xo: + setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1])); + break; + + case B_lo: + to_xxx = tolower; + goto lo_cont; + + case B_up: + to_xxx = toupper; + lo_cont: + s1 = s = xstrdup(as[0]); + while (*s1) { + *s1 = (*to_xxx)(*s1); + s1++; + } + setvar_p(res, s); + break; + + case B_ix: + n = 0; + ll = strlen(as[1]); + l = strlen(as[0]) - ll; + if (ll > 0 && l >= 0) { + if (!icase) { + s = strstr(as[0], as[1]); + if (s) n = (s - as[0]) + 1; + } else { + /* this piece of code is terribly slow and + * really should be rewritten + */ + for (i=0; i<=l; i++) { + if (strncasecmp(as[0]+i, as[1], ll) == 0) { + n = i+1; + break; + } + } + } + } + setvar_i(res, n); + break; + + case B_ti: + if (nargs > 1) + tt = getvar_i(av[1]); + else + time(&tt); + //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"; + i = strftime(g_buf, MAXVARFMT, + ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"), + localtime(&tt)); + g_buf[i] = '\0'; + setvar_s(res, g_buf); + break; + + case B_ma: + re = as_regex(an[1], &sreg); + n = regexec(re, as[0], 1, pmatch, 0); + if (n == 0) { + pmatch[0].rm_so++; + pmatch[0].rm_eo++; + } else { + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = -1; + } + setvar_i(newvar("RSTART"), pmatch[0].rm_so); + setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); + setvar_i(res, pmatch[0].rm_so); + if (re == &sreg) regfree(re); + break; + + case B_ge: + awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE); + break; + + case B_gs: + setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE)); + break; + + case B_su: + setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE)); + break; + } + + nvfree(tv); + return res; +#undef tspl +} + +/* + * Evaluate node - the heart of the program. Supplied with subtree + * and place where to store result. returns ptr to result. + */ +#define XC(n) ((n) >> 8) + +static var *evaluate(node *op, var *res) +{ +/* This procedure is recursive so we should count every byte */ +#define fnargs (G.evaluate__fnargs) +/* seed is initialized to 1 */ +#define seed (G.evaluate__seed) +#define sreg (G.evaluate__sreg) + + node *op1; + var *v1; + union { + var *v; + const char *s; + double d; + int i; + } L, R; + uint32_t opinfo; + int opn; + union { + char *s; + rstream *rsm; + FILE *F; + var *v; + regex_t *re; + uint32_t info; + } X; + + if (!op) + return setvar_s(res, NULL); + + v1 = nvalloc(2); + + while (op) { + opinfo = op->info; + opn = (opinfo & OPNMASK); + g_lineno = op->lineno; + + /* execute inevitable things */ + op1 = op->l.n; + if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1); + if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1); + if (opinfo & OF_STR1) L.s = getvar_s(L.v); + if (opinfo & OF_STR2) R.s = getvar_s(R.v); + if (opinfo & OF_NUM1) L.d = getvar_i(L.v); + + switch (XC(opinfo & OPCLSMASK)) { + + /* -- iterative node type -- */ + + /* test pattern */ + case XC( OC_TEST ): + if ((op1->info & OPCLSMASK) == OC_COMMA) { + /* it's range pattern */ + if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { + op->info |= OF_CHECKED; + if (ptest(op1->r.n)) + op->info &= ~OF_CHECKED; + + op = op->a.n; + } else { + op = op->r.n; + } + } else { + op = (ptest(op1)) ? op->a.n : op->r.n; + } + break; + + /* just evaluate an expression, also used as unconditional jump */ + case XC( OC_EXEC ): + break; + + /* branch, used in if-else and various loops */ + case XC( OC_BR ): + op = istrue(L.v) ? op->a.n : op->r.n; + break; + + /* initialize for-in loop */ + case XC( OC_WALKINIT ): + hashwalk_init(L.v, iamarray(R.v)); + break; + + /* get next array item */ + case XC( OC_WALKNEXT ): + op = hashwalk_next(L.v) ? op->a.n : op->r.n; + break; + + case XC( OC_PRINT ): + case XC( OC_PRINTF ): + X.F = stdout; + if (op->r.n) { + X.rsm = newfile(R.s); + if (!X.rsm->F) { + if (opn == '|') { + X.rsm->F = popen(R.s, "w"); + if (X.rsm->F == NULL) + bb_perror_msg_and_die("popen"); + X.rsm->is_pipe = 1; + } else { + X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a"); + } + } + X.F = X.rsm->F; + } + + if ((opinfo & OPCLSMASK) == OC_PRINT) { + if (!op1) { + fputs(getvar_s(intvar[F0]), X.F); + } else { + while (op1) { + L.v = evaluate(nextarg(&op1), v1); + if (L.v->type & VF_NUMBER) { + fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), + getvar_i(L.v), TRUE); + fputs(g_buf, X.F); + } else { + fputs(getvar_s(L.v), X.F); + } + + if (op1) fputs(getvar_s(intvar[OFS]), X.F); + } + } + fputs(getvar_s(intvar[ORS]), X.F); + + } else { /* OC_PRINTF */ + L.s = awk_printf(op1); + fputs(L.s, X.F); + free((char*)L.s); + } + fflush(X.F); + break; + + case XC( OC_DELETE ): + X.info = op1->info & OPCLSMASK; + if (X.info == OC_VAR) { + R.v = op1->l.v; + } else if (X.info == OC_FNARG) { + R.v = &fnargs[op1->l.i]; + } else { + syntax_error(EMSG_NOT_ARRAY); + } + + if (op1->r.n) { + clrvar(L.v); + L.s = getvar_s(evaluate(op1->r.n, v1)); + hash_remove(iamarray(R.v), L.s); + } else { + clear_array(iamarray(R.v)); + } + break; + + case XC( OC_NEWSOURCE ): + g_progname = op->l.s; + break; + + case XC( OC_RETURN ): + copyvar(res, L.v); + break; + + case XC( OC_NEXTFILE ): + nextfile = TRUE; + case XC( OC_NEXT ): + nextrec = TRUE; + case XC( OC_DONE ): + clrvar(res); + break; + + case XC( OC_EXIT ): + awk_exit(L.d); + + /* -- recursive node type -- */ + + case XC( OC_VAR ): + L.v = op->l.v; + if (L.v == intvar[NF]) + split_f0(); + goto v_cont; + + case XC( OC_FNARG ): + L.v = &fnargs[op->l.i]; + v_cont: + res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v; + break; + + case XC( OC_IN ): + setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0); + break; + + case XC( OC_REGEXP ): + op1 = op; + L.s = getvar_s(intvar[F0]); + goto re_cont; + + case XC( OC_MATCH ): + op1 = op->r.n; + re_cont: + X.re = as_regex(op1, &sreg); + R.i = regexec(X.re, L.s, 0, NULL, 0); + if (X.re == &sreg) regfree(X.re); + setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0)); + break; + + case XC( OC_MOVE ): + /* if source is a temporary string, jusk relink it to dest */ + if (R.v == v1+1 && R.v->string) { + res = setvar_p(L.v, R.v->string); + R.v->string = NULL; + } else { + res = copyvar(L.v, R.v); + } + break; + + case XC( OC_TERNARY ): + if ((op->r.n->info & OPCLSMASK) != OC_COLON) + syntax_error(EMSG_POSSIBLE_ERROR); + res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); + break; + + case XC( OC_FUNC ): + if (!op->r.f->body.first) + syntax_error(EMSG_UNDEF_FUNC); + + X.v = R.v = nvalloc(op->r.f->nargs+1); + while (op1) { + L.v = evaluate(nextarg(&op1), v1); + copyvar(R.v, L.v); + R.v->type |= VF_CHILD; + R.v->x.parent = L.v; + if (++R.v - X.v >= op->r.f->nargs) + break; + } + + R.v = fnargs; + fnargs = X.v; + + L.s = g_progname; + res = evaluate(op->r.f->body.first, res); + g_progname = L.s; + + nvfree(fnargs); + fnargs = R.v; + break; + + case XC( OC_GETLINE ): + case XC( OC_PGETLINE ): + if (op1) { + X.rsm = newfile(L.s); + if (!X.rsm->F) { + if ((opinfo & OPCLSMASK) == OC_PGETLINE) { + X.rsm->F = popen(L.s, "r"); + X.rsm->is_pipe = TRUE; + } else { + X.rsm->F = fopen_for_read(L.s); /* not xfopen! */ + } + } + } else { + if (!iF) iF = next_input_file(); + X.rsm = iF; + } + + if (!X.rsm->F) { + setvar_i(intvar[ERRNO], errno); + setvar_i(res, -1); + break; + } + + if (!op->r.n) + R.v = intvar[F0]; + + L.i = awk_getline(X.rsm, R.v); + if (L.i > 0) { + if (!op1) { + incvar(intvar[FNR]); + incvar(intvar[NR]); + } + } + setvar_i(res, L.i); + break; + + /* simple builtins */ + case XC( OC_FBLTIN ): + switch (opn) { + + case F_in: + R.d = (int)L.d; + break; + + case F_rn: + R.d = (double)rand() / (double)RAND_MAX; + break; +#if ENABLE_FEATURE_AWK_LIBM + case F_co: + R.d = cos(L.d); + break; + + case F_ex: + R.d = exp(L.d); + break; + + case F_lg: + R.d = log(L.d); + break; + + case F_si: + R.d = sin(L.d); + break; + + case F_sq: + R.d = sqrt(L.d); + break; +#else + case F_co: + case F_ex: + case F_lg: + case F_si: + case F_sq: + syntax_error(EMSG_NO_MATH); + break; +#endif + case F_sr: + R.d = (double)seed; + seed = op1 ? (unsigned)L.d : (unsigned)time(NULL); + srand(seed); + break; + + case F_ti: + R.d = time(NULL); + break; + + case F_le: + if (!op1) + L.s = getvar_s(intvar[F0]); + R.d = strlen(L.s); + break; + + case F_sy: + fflush(NULL); + R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s) + ? (system(L.s) >> 8) : 0; + break; + + case F_ff: + if (!op1) + fflush(stdout); + else { + if (L.s && *L.s) { + X.rsm = newfile(L.s); + fflush(X.rsm->F); + } else { + fflush(NULL); + } + } + break; + + case F_cl: + X.rsm = (rstream *)hash_search(fdhash, L.s); + if (X.rsm) { + R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F); + free(X.rsm->buffer); + hash_remove(fdhash, L.s); + } + if (R.i != 0) + setvar_i(intvar[ERRNO], errno); + R.d = (double)R.i; + break; + } + setvar_i(res, R.d); + break; + + case XC( OC_BUILTIN ): + res = exec_builtin(op, res); + break; + + case XC( OC_SPRINTF ): + setvar_p(res, awk_printf(op1)); + break; + + case XC( OC_UNARY ): + X.v = R.v; + L.d = R.d = getvar_i(R.v); + switch (opn) { + case 'P': + L.d = ++R.d; + goto r_op_change; + case 'p': + R.d++; + goto r_op_change; + case 'M': + L.d = --R.d; + goto r_op_change; + case 'm': + R.d--; + goto r_op_change; + case '!': + L.d = istrue(X.v) ? 0 : 1; + break; + case '-': + L.d = -R.d; + break; + r_op_change: + setvar_i(X.v, R.d); + } + setvar_i(res, L.d); + break; + + case XC( OC_FIELD ): + R.i = (int)getvar_i(R.v); + if (R.i == 0) { + res = intvar[F0]; + } else { + split_f0(); + if (R.i > nfields) + fsrealloc(R.i); + res = &Fields[R.i - 1]; + } + break; + + /* concatenation (" ") and index joining (",") */ + case XC( OC_CONCAT ): + case XC( OC_COMMA ): + opn = strlen(L.s) + strlen(R.s) + 2; + X.s = xmalloc(opn); + strcpy(X.s, L.s); + if ((opinfo & OPCLSMASK) == OC_COMMA) { + L.s = getvar_s(intvar[SUBSEP]); + X.s = xrealloc(X.s, opn + strlen(L.s)); + strcat(X.s, L.s); + } + strcat(X.s, R.s); + setvar_p(res, X.s); + break; + + case XC( OC_LAND ): + setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0); + break; + + case XC( OC_LOR ): + setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n)); + break; + + case XC( OC_BINARY ): + case XC( OC_REPLACE ): + R.d = getvar_i(R.v); + switch (opn) { + case '+': + L.d += R.d; + break; + case '-': + L.d -= R.d; + break; + case '*': + L.d *= R.d; + break; + case '/': + if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO); + L.d /= R.d; + break; + case '&': +#if ENABLE_FEATURE_AWK_LIBM + L.d = pow(L.d, R.d); +#else + syntax_error(EMSG_NO_MATH); +#endif + break; + case '%': + if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO); + L.d -= (int)(L.d / R.d) * R.d; + break; + } + res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d); + break; + + case XC( OC_COMPARE ): + if (is_numeric(L.v) && is_numeric(R.v)) { + L.d = getvar_i(L.v) - getvar_i(R.v); + } else { + L.s = getvar_s(L.v); + R.s = getvar_s(R.v); + L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s); + } + switch (opn & 0xfe) { + case 0: + R.i = (L.d > 0); + break; + case 2: + R.i = (L.d >= 0); + break; + case 4: + R.i = (L.d == 0); + break; + } + setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0); + break; + + default: + syntax_error(EMSG_POSSIBLE_ERROR); + } + if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) + op = op->a.n; + if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) + break; + if (nextrec) + break; + } + nvfree(v1); + return res; +#undef fnargs +#undef seed +#undef sreg +} + + +/* -------- main & co. -------- */ + +static int awk_exit(int r) +{ + var tv; + unsigned i; + hash_item *hi; + + zero_out_var(&tv); + + if (!exiting) { + exiting = TRUE; + nextrec = FALSE; + evaluate(endseq.first, &tv); + } + + /* waiting for children */ + for (i = 0; i < fdhash->csize; i++) { + hi = fdhash->items[i]; + while (hi) { + if (hi->data.rs.F && hi->data.rs.is_pipe) + pclose(hi->data.rs.F); + hi = hi->next; + } + } + + exit(r); +} + +/* if expr looks like "var=value", perform assignment and return 1, + * otherwise return 0 */ +static int is_assignment(const char *expr) +{ + char *exprc, *s, *s0, *s1; + + exprc = xstrdup(expr); + if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) { + free(exprc); + return FALSE; + } + + *(s++) = '\0'; + s0 = s1 = s; + while (*s) + *(s1++) = nextchar(&s); + + *s1 = '\0'; + setvar_u(newvar(exprc), s0); + free(exprc); + return TRUE; +} + +/* switch to next input file */ +static rstream *next_input_file(void) +{ +#define rsm (G.next_input_file__rsm) +#define files_happen (G.next_input_file__files_happen) + + FILE *F = NULL; + const char *fname, *ind; + + if (rsm.F) fclose(rsm.F); + rsm.F = NULL; + rsm.pos = rsm.adv = 0; + + do { + if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { + if (files_happen) + return NULL; + fname = "-"; + F = stdin; + } else { + ind = getvar_s(incvar(intvar[ARGIND])); + fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); + if (fname && *fname && !is_assignment(fname)) + F = xfopen_stdin(fname); + } + } while (!F); + + files_happen = TRUE; + setvar_s(intvar[FILENAME], fname); + rsm.F = F; + return &rsm; +#undef rsm +#undef files_happen +} + +int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int awk_main(int argc, char **argv) +{ + unsigned opt; + char *opt_F, *opt_W; + llist_t *list_v = NULL; + llist_t *list_f = NULL; + int i, j; + var *v; + var tv; + char **envp; + char *vnames = (char *)vNames; /* cheat */ + char *vvalues = (char *)vValues; + + INIT_G(); + + /* Undo busybox.c, or else strtod may eat ','! This breaks parsing: + * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */ + if (ENABLE_LOCALE_SUPPORT) + setlocale(LC_NUMERIC, "C"); + + zero_out_var(&tv); + + /* allocate global buffer */ + g_buf = xmalloc(MAXVARFMT + 1); + + vhash = hash_init(); + ahash = hash_init(); + fdhash = hash_init(); + fnhash = hash_init(); + + /* initialize variables */ + for (i = 0; *vnames; i++) { + intvar[i] = v = newvar(nextword(&vnames)); + if (*vvalues != '\377') + setvar_s(v, nextword(&vvalues)); + else + setvar_i(v, 0); + + if (*vnames == '*') { + v->type |= VF_SPECIAL; + vnames++; + } + } + + handle_special(intvar[FS]); + handle_special(intvar[RS]); + + newfile("/dev/stdin")->F = stdin; + newfile("/dev/stdout")->F = stdout; + newfile("/dev/stderr")->F = stderr; + + /* Huh, people report that sometimes environ is NULL. Oh well. */ + if (environ) for (envp = environ; *envp; envp++) { + /* environ is writable, thus we don't strdup it needlessly */ + char *s = *envp; + char *s1 = strchr(s, '='); + if (s1) { + *s1 = '\0'; + /* Both findvar and setvar_u take const char* + * as 2nd arg -> environment is not trashed */ + setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); + *s1 = '='; + } + } + opt_complementary = "v::f::"; /* -v and -f can occur multiple times */ + opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W); + argv += optind; + argc -= optind; + if (opt & 0x1) + setvar_s(intvar[FS], opt_F); // -F + while (list_v) { /* -v */ + if (!is_assignment(llist_pop(&list_v))) + bb_show_usage(); + } + if (list_f) { /* -f */ + do { + char *s = NULL; + FILE *from_file; + + g_progname = llist_pop(&list_f); + from_file = xfopen_stdin(g_progname); + /* one byte is reserved for some trick in next_token */ + for (i = j = 1; j > 0; i += j) { + s = xrealloc(s, i + 4096); + j = fread(s + i, 1, 4094, from_file); + } + s[i] = '\0'; + fclose(from_file); + parse_program(s + 1); + free(s); + } while (list_f); + } else { // no -f: take program from 1st parameter + if (!argc) + bb_show_usage(); + g_progname = "cmd. line"; + parse_program(*argv++); + argc--; + } + if (opt & 0x8) // -W + bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W); + + /* fill in ARGV array */ + setvar_i(intvar[ARGC], argc + 1); + setari_u(intvar[ARGV], 0, "awk"); + i = 0; + while (*argv) + setari_u(intvar[ARGV], ++i, *argv++); + + evaluate(beginseq.first, &tv); + if (!mainseq.first && !endseq.first) + awk_exit(EXIT_SUCCESS); + + /* input file could already be opened in BEGIN block */ + if (!iF) iF = next_input_file(); + + /* passing through input files */ + while (iF) { + nextfile = FALSE; + setvar_i(intvar[FNR], 0); + + while ((i = awk_getline(iF, intvar[F0])) > 0) { + nextrec = FALSE; + incvar(intvar[NR]); + incvar(intvar[FNR]); + evaluate(mainseq.first, &tv); + + if (nextfile) + break; + } + + if (i < 0) + syntax_error(strerror(errno)); + + iF = next_input_file(); + } + + awk_exit(EXIT_SUCCESS); + /*return 0;*/ +} diff --git a/editors/cmp.c b/editors/cmp.c new file mode 100644 index 0000000..2e98e6e --- /dev/null +++ b/editors/cmp.c @@ -0,0 +1,135 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini cmp implementation for busybox + * + * Copyright (C) 2000,2001 by Matt Kraai + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +/* BB_AUDIT SUSv3 (virtually) compliant -- uses nicer GNU format for -l. */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/cmp.html */ + +/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) + * + * Original version majorly reworked for SUSv3 compliance, bug fixes, and + * size optimizations. Changes include: + * 1) Now correctly distinguishes between errors and actual file differences. + * 2) Proper handling of '-' args. + * 3) Actual error checking of i/o. + * 4) Accept SUSv3 -l option. Note that we use the slightly nicer gnu format + * in the '-l' case. + */ + +#include "libbb.h" + +static const char fmt_eof[] ALIGN1 = "cmp: EOF on %s\n"; +static const char fmt_differ[] ALIGN1 = "%s %s differ: char %"OFF_FMT"d, line %d\n"; +// This fmt_l_opt uses gnu-isms. SUSv3 would be "%.0s%.0s%"OFF_FMT"d %o %o\n" +static const char fmt_l_opt[] ALIGN1 = "%.0s%.0s%"OFF_FMT"d %3o %3o\n"; + +static const char opt_chars[] ALIGN1 = "sl"; +#define CMP_OPT_s (1<<0) +#define CMP_OPT_l (1<<1) + +int cmp_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int cmp_main(int argc UNUSED_PARAM, char **argv) +{ + FILE *fp1, *fp2, *outfile = stdout; + const char *filename1, *filename2 = "-"; + USE_DESKTOP(off_t skip1 = 0, skip2 = 0;) + off_t char_pos = 0; + int line_pos = 1; /* Hopefully won't overflow... */ + const char *fmt; + int c1, c2; + unsigned opt; + int retval = 0; + + xfunc_error_retval = 2; /* 1 is returned if files are different. */ + + opt_complementary = "-1" + USE_DESKTOP(":?4") + SKIP_DESKTOP(":?2") + ":l--s:s--l"; + opt = getopt32(argv, opt_chars); + argv += optind; + + filename1 = *argv; + fp1 = xfopen_stdin(filename1); + + if (*++argv) { + filename2 = *argv; +#if ENABLE_DESKTOP + if (*++argv) { + skip1 = XATOOFF(*argv); + if (*++argv) { + skip2 = XATOOFF(*argv); + } + } +#endif + } + + fp2 = xfopen_stdin(filename2); + if (fp1 == fp2) { /* Paranoia check... stdin == stdin? */ + /* Note that we don't bother reading stdin. Neither does gnu wc. + * But perhaps we should, so that other apps down the chain don't + * get the input. Consider 'echo hello | (cmp - - && cat -)'. + */ + return 0; + } + + if (opt & CMP_OPT_l) + fmt = fmt_l_opt; + else + fmt = fmt_differ; + +#if ENABLE_DESKTOP + while (skip1) { getc(fp1); skip1--; } + while (skip2) { getc(fp2); skip2--; } +#endif + do { + c1 = getc(fp1); + c2 = getc(fp2); + ++char_pos; + if (c1 != c2) { /* Remember: a read error may have occurred. */ + retval = 1; /* But assume the files are different for now. */ + if (c2 == EOF) { + /* We know that fp1 isn't at EOF or in an error state. But to + * save space below, things are setup to expect an EOF in fp1 + * if an EOF occurred. So, swap things around. + */ + fp1 = fp2; + filename1 = filename2; + c1 = c2; + } + if (c1 == EOF) { + die_if_ferror(fp1, filename1); + fmt = fmt_eof; /* Well, no error, so it must really be EOF. */ + outfile = stderr; + /* There may have been output to stdout (option -l), so + * make sure we fflush before writing to stderr. */ + xfflush_stdout(); + } + if (!(opt & CMP_OPT_s)) { + if (opt & CMP_OPT_l) { + line_pos = c1; /* line_pos is unused in the -l case. */ + } + fprintf(outfile, fmt, filename1, filename2, char_pos, line_pos, c2); + if (opt) { /* This must be -l since not -s. */ + /* If we encountered an EOF, + * the while check will catch it. */ + continue; + } + } + break; + } + if (c1 == '\n') { + ++line_pos; + } + } while (c1 != EOF); + + die_if_ferror(fp1, filename1); + die_if_ferror(fp2, filename2); + + fflush_stdout_and_exit(retval); +} diff --git a/editors/diff.c b/editors/diff.c new file mode 100644 index 0000000..0e96c84 --- /dev/null +++ b/editors/diff.c @@ -0,0 +1,1344 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini diff implementation for busybox, adapted from OpenBSD diff. + * + * Copyright (C) 2006 by Robert Sullivan + * Copyright (c) 2003 Todd C. Miller + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" + +// #define FSIZE_MAX 32768 + +/* NOINLINEs added to prevent gcc from merging too much into diffreg() + * (it bites more than it can (efficiently) chew). */ + +/* + * Output flags + */ +enum { + /* Print a header/footer between files */ + /* D_HEADER = 1, - unused */ + /* Treat file as empty (/dev/null) */ + D_EMPTY1 = 2 * ENABLE_FEATURE_DIFF_DIR, + D_EMPTY2 = 4 * ENABLE_FEATURE_DIFF_DIR, +}; + +/* + * Status values for print_status() and diffreg() return values + * Guide: + * D_SAME - files are the same + * D_DIFFER - files differ + * D_BINARY - binary files differ + * D_COMMON - subdirectory common to both dirs + * D_ONLY - file only exists in one dir + * D_ISDIR1 - path1 a dir, path2 a file + * D_ISDIR2 - path1 a file, path2 a dir + * D_ERROR - error occurred + * D_SKIPPED1 - skipped path1 as it is a special file + * D_SKIPPED2 - skipped path2 as it is a special file + */ +#define D_SAME 0 +#define D_DIFFER (1 << 0) +#define D_BINARY (1 << 1) +#define D_COMMON (1 << 2) +/*#define D_ONLY (1 << 3) - unused */ +#define D_ISDIR1 (1 << 4) +#define D_ISDIR2 (1 << 5) +#define D_ERROR (1 << 6) +#define D_SKIPPED1 (1 << 7) +#define D_SKIPPED2 (1 << 8) + +/* Command line options */ +#define FLAG_a (1 << 0) +#define FLAG_b (1 << 1) +#define FLAG_d (1 << 2) +#define FLAG_i (1 << 3) +#define FLAG_L (1 << 4) +#define FLAG_N (1 << 5) +#define FLAG_q (1 << 6) +#define FLAG_r (1 << 7) +#define FLAG_s (1 << 8) +#define FLAG_S (1 << 9) +#define FLAG_t (1 << 10) +#define FLAG_T (1 << 11) +#define FLAG_U (1 << 12) +#define FLAG_w (1 << 13) + + +struct cand { + int x; + int y; + int pred; +}; + +struct line { + int serial; + int value; +}; + +/* + * The following struct is used to record change information + * doing a "context" or "unified" diff. (see routine "change" to + * understand the highly mnemonic field names) + */ +struct context_vec { + int a; /* start line in old file */ + int b; /* end line in old file */ + int c; /* start line in new file */ + int d; /* end line in new file */ +}; + + +#define g_read_buf bb_common_bufsiz1 + +struct globals { + bool anychange; + smallint exit_status; + int opt_U_context; + size_t max_context; /* size of context_vec_start */ + USE_FEATURE_DIFF_DIR(int dl_count;) + USE_FEATURE_DIFF_DIR(char **dl;) + char *opt_S_start; + const char *label1; + const char *label2; + int *J; /* will be overlaid on class */ + int clen; + int pref, suff; /* length of prefix and suffix */ + int nlen[2]; + int slen[2]; + int clistlen; /* the length of clist */ + struct cand *clist; /* merely a free storage pot for candidates */ + long *ixnew; /* will be overlaid on nfile[1] */ + long *ixold; /* will be overlaid on klist */ + struct line *nfile[2]; + struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ + struct context_vec *context_vec_start; + struct context_vec *context_vec_end; + struct context_vec *context_vec_ptr; + char *tempname1, *tempname2; + struct stat stb1, stb2; +}; +#define G (*ptr_to_globals) +#define anychange (G.anychange ) +#define exit_status (G.exit_status ) +#define opt_U_context (G.opt_U_context ) +#define max_context (G.max_context ) +#define dl_count (G.dl_count ) +#define dl (G.dl ) +#define opt_S_start (G.opt_S_start ) +#define label1 (G.label1 ) +#define label2 (G.label2 ) +#define J (G.J ) +#define clen (G.clen ) +#define pref (G.pref ) +#define suff (G.suff ) +#define nlen (G.nlen ) +#define slen (G.slen ) +#define clistlen (G.clistlen ) +#define clist (G.clist ) +#define ixnew (G.ixnew ) +#define ixold (G.ixold ) +#define nfile (G.nfile ) +#define sfile (G.sfile ) +#define context_vec_start (G.context_vec_start ) +#define context_vec_end (G.context_vec_end ) +#define context_vec_ptr (G.context_vec_ptr ) +#define stb1 (G.stb1 ) +#define stb2 (G.stb2 ) +#define tempname1 (G.tempname1 ) +#define tempname2 (G.tempname2 ) +#define INIT_G() do { \ + SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ + opt_U_context = 3; \ + max_context = 64; \ +} while (0) + + +#if ENABLE_FEATURE_DIFF_DIR +static void print_only(const char *path, const char *entry) +{ + printf("Only in %s: %s\n", path, entry); +} +#endif + + +static void print_status(int val, char *_path1, char *_path2) +{ + /*const char *const _entry = entry ? entry : "";*/ + /*char *const _path1 = entry ? concat_path_file(path1, _entry) : path1;*/ + /*char *const _path2 = entry ? concat_path_file(path2, _entry) : path2;*/ + + switch (val) { +/* case D_ONLY: + print_only(path1, entry); + break; +*/ + case D_COMMON: + printf("Common subdirectories: %s and %s\n", _path1, _path2); + break; + case D_BINARY: + printf("Binary files %s and %s differ\n", _path1, _path2); + break; + case D_DIFFER: + if (option_mask32 & FLAG_q) + printf("Files %s and %s differ\n", _path1, _path2); + break; + case D_SAME: + if (option_mask32 & FLAG_s) + printf("Files %s and %s are identical\n", _path1, _path2); + break; + case D_ISDIR1: + printf("File %s is a %s while file %s is a %s\n", + _path1, "directory", _path2, "regular file"); + break; + case D_ISDIR2: + printf("File %s is a %s while file %s is a %s\n", + _path1, "regular file", _path2, "directory"); + break; + case D_SKIPPED1: + printf("File %s is not a regular file or directory and was skipped\n", + _path1); + break; + case D_SKIPPED2: + printf("File %s is not a regular file or directory and was skipped\n", + _path2); + break; + } +/* + if (entry) { + free(_path1); + free(_path2); + } +*/ +} + + +/* Read line, return its nonzero hash. Return 0 if EOF. + * + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. + */ +static ALWAYS_INLINE int fiddle_sum(int sum, int t) +{ + return sum * 127 + t; +} +static int readhash(FILE *fp) +{ + int i, t, space; + int sum; + + sum = 1; + space = 0; + i = 0; + if (!(option_mask32 & (FLAG_b | FLAG_w))) { + while ((t = getc(fp)) != '\n') { + if (t == EOF) { + if (i == 0) + return 0; + break; + } + sum = fiddle_sum(sum, t); + i = 1; + } + } else { + while (1) { + switch (t = getc(fp)) { + case '\t': + case '\r': + case '\v': + case '\f': + case ' ': + space = 1; + continue; + default: + if (space && !(option_mask32 & FLAG_w)) { + i = 1; + space = 0; + } + sum = fiddle_sum(sum, t); + i = 1; + continue; + case EOF: + if (i == 0) + return 0; + /* FALLTHROUGH */ + case '\n': + break; + } + break; + } + } + /* + * There is a remote possibility that we end up with a zero sum. + * Zero is used as an EOF marker, so return 1 instead. + */ + return (sum == 0 ? 1 : sum); +} + + +/* Our diff implementation is using seek. + * When we meet non-seekable file, we must make a temp copy. + */ +static char *make_temp(FILE *f, struct stat *sb) +{ + char *name; + int fd; + + if (S_ISREG(sb->st_mode) || S_ISBLK(sb->st_mode)) + return NULL; + name = xstrdup("/tmp/difXXXXXX"); + fd = mkstemp(name); + if (fd < 0) + bb_perror_msg_and_die("mkstemp"); + if (bb_copyfd_eof(fileno(f), fd) < 0) { + clean_up: + unlink(name); + xfunc_die(); /* error message is printed by bb_copyfd_eof */ + } + fstat(fd, sb); + close(fd); + if (freopen(name, "r+", f) == NULL) { + bb_perror_msg("freopen"); + goto clean_up; + } + return name; +} + + +/* + * Check to see if the given files differ. + * Returns 0 if they are the same, 1 if different, and -1 on error. + */ +static NOINLINE int files_differ(FILE *f1, FILE *f2) +{ + size_t i, j; + + /* Prevent making copies for "/dev/null" (too common) */ + /* Deal with input from pipes etc */ + tempname1 = make_temp(f1, &stb1); + tempname2 = make_temp(f2, &stb2); + if (stb1.st_size != stb2.st_size) { + return 1; + } + while (1) { + i = fread(g_read_buf, 1, COMMON_BUFSIZE/2, f1); + j = fread(g_read_buf + COMMON_BUFSIZE/2, 1, COMMON_BUFSIZE/2, f2); + if (i != j) + return 1; + if (i == 0) + return (ferror(f1) || ferror(f2)) ? -1 : 0; + if (memcmp(g_read_buf, + g_read_buf + COMMON_BUFSIZE/2, i) != 0) + return 1; + } +} + + +static void prepare(int i, FILE *fp /*, off_t filesize*/) +{ + struct line *p; + int h; + size_t j, sz; + + rewind(fp); + + /*sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;*/ + /*if (sz < 100)*/ + sz = 100; + + p = xmalloc((sz + 3) * sizeof(p[0])); + j = 0; + while ((h = readhash(fp)) != 0) { /* while not EOF */ + if (j == sz) { + sz = sz * 3 / 2; + p = xrealloc(p, (sz + 3) * sizeof(p[0])); + } + p[++j].value = h; + } + nlen[i] = j; + nfile[i] = p; +} + + +static void prune(void) +{ + int i, j; + + for (pref = 0; pref < nlen[0] && pref < nlen[1] && + nfile[0][pref + 1].value == nfile[1][pref + 1].value; pref++) + continue; + for (suff = 0; suff < nlen[0] - pref && suff < nlen[1] - pref && + nfile[0][nlen[0] - suff].value == nfile[1][nlen[1] - suff].value; + suff++) + continue; + for (j = 0; j < 2; j++) { + sfile[j] = nfile[j] + pref; + slen[j] = nlen[j] - pref - suff; + for (i = 0; i <= slen[j]; i++) + sfile[j][i].serial = i; + } +} + + +static void equiv(struct line *a, int n, struct line *b, int m, int *c) +{ + int i, j; + + i = j = 1; + while (i <= n && j <= m) { + if (a[i].value < b[j].value) + a[i++].value = 0; + else if (a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while (i <= n) + a[i++].value = 0; + b[m + 1].value = 0; + j = 0; + while (++j <= m) { + c[j] = -b[j].serial; + while (b[j + 1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + + +static int isqrt(int n) +{ + int y, x; + + if (n == 0) + return 0; + x = 1; + do { + y = x; + x = n / x; + x += y; + x /= 2; + } while ((x - y) > 1 || (x - y) < -1); + + return x; +} + + +static int newcand(int x, int y, int pred) +{ + struct cand *q; + + if (clen == clistlen) { + clistlen = clistlen * 11 / 10; + clist = xrealloc(clist, clistlen * sizeof(struct cand)); + } + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return clen++; +} + + +static int search(int *c, int k, int y) +{ + int i, j, l, t; + + if (clist[c[k]].y < y) /* quick look for typical case */ + return k + 1; + i = 0; + j = k + 1; + while (1) { + l = i + j; + if ((l >>= 1) <= i) + break; + t = clist[c[l]].y; + if (t > y) + j = l; + else if (t < y) + i = l; + else + return l; + } + return l + 1; +} + + +static int stone(int *a, int n, int *b, int *c) +{ + int i, k, y, j, l; + int oldc, tc, oldl; + unsigned int numtries; +#if ENABLE_FEATURE_DIFF_MINIMAL + const unsigned int bound = + (option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n)); +#else + const unsigned int bound = MAX(256, isqrt(n)); +#endif + + k = 0; + c[0] = newcand(0, 0, 0); + for (i = 1; i <= n; i++) { + j = a[i]; + if (j == 0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + numtries = 0; + do { + if (y <= clist[oldc].y) + continue; + l = search(c, k, y); + if (l != oldl + 1) + oldc = c[l - 1]; + if (l <= k) { + if (clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i, y, oldc); + oldc = tc; + oldl = l; + numtries++; + } else { + c[l] = newcand(i, y, oldc); + k++; + break; + } + } while ((y = b[++j]) > 0 && numtries < bound); + } + return k; +} + + +static void unravel(int p) +{ + struct cand *q; + int i; + + for (i = 0; i <= nlen[0]; i++) + J[i] = i <= pref ? i : i > nlen[0] - suff ? i + nlen[1] - nlen[0] : 0; + for (q = clist + p; q->y != 0; q = clist + q->pred) + J[q->x + pref] = q->y + pref; +} + + +static void unsort(struct line *f, int l, int *b) +{ + int *a, i; + + a = xmalloc((l + 1) * sizeof(int)); + for (i = 1; i <= l; i++) + a[f[i].serial] = f[i].value; + for (i = 1; i <= l; i++) + b[i] = a[i]; + free(a); +} + + +static int skipline(FILE *f) +{ + int i, c; + + for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) + continue; + return i; +} + + +/* + * Check does double duty: + * 1. ferret out any fortuitous correspondences due + * to confounding by hashing (which result in "jackpot") + * 2. collect random access indexes to the two files + */ +static NOINLINE void check(FILE *f1, FILE *f2) +{ + int i, j, jackpot, c, d; + long ctold, ctnew; + + rewind(f1); + rewind(f2); + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for (i = 1; i <= nlen[0]; i++) { + if (J[i] == 0) { + ixold[i] = ctold += skipline(f1); + continue; + } + while (j < J[i]) { + ixnew[j] = ctnew += skipline(f2); + j++; + } + if (option_mask32 & (FLAG_b | FLAG_w | FLAG_i)) { + while (1) { + c = getc(f1); + d = getc(f2); + /* + * GNU diff ignores a missing newline + * in one file if bflag || wflag. + */ + if ((option_mask32 & (FLAG_b | FLAG_w)) + && ((c == EOF && d == '\n') || (c == '\n' && d == EOF)) + ) { + break; + } + ctold++; + ctnew++; + if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) { + do { + if (c == '\n') + break; + ctold++; + c = getc(f1); + } while (isspace(c)); + do { + if (d == '\n') + break; + ctnew++; + d = getc(f2); + } while (isspace(d)); + } else if (option_mask32 & FLAG_w) { + while (isspace(c) && c != '\n') { + c = getc(f1); + ctold++; + } + while (isspace(d) && d != '\n') { + d = getc(f2); + ctnew++; + } + } + if (c != d) { + jackpot++; + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } else { + while (1) { + ctold++; + ctnew++; + c = getc(f1); + d = getc(f2); + if (c != d) { + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); +/* was buggy? "if (d != '\n' && c != EOF)" */ + if (d != '\n' && d != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for (; j <= nlen[1]; j++) + ixnew[j] = ctnew += skipline(f2); +} + + +/* shellsort CACM #201 */ +static void sort(struct line *a, int n) +{ + struct line *ai, *aim, w; + int j, m = 0, k; + + if (n == 0) + return; + for (j = 1; j <= n; j *= 2) + m = 2 * j - 1; + for (m /= 2; m != 0; m /= 2) { + k = n - m; + for (j = 1; j <= k; j++) { + for (ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if (aim < ai) + break; /* wraparound */ + if (aim->value > ai[0].value + || (aim->value == ai[0].value && aim->serial > ai[0].serial) + ) { + break; + } + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + + +static void uni_range(int a, int b) +{ + if (a < b) + printf("%d,%d", a, b - a + 1); + else if (a == b) + printf("%d", b); + else + printf("%d,0", b); +} + + +static void fetch(long *f, int a, int b, FILE *lb, int ch) +{ + int i, j, c, lastc, col, nc; + + if (a > b) + return; + for (i = a; i <= b; i++) { + fseek(lb, f[i - 1], SEEK_SET); + nc = f[i] - f[i - 1]; + if (ch != '\0') { + putchar(ch); + if (option_mask32 & FLAG_T) + putchar('\t'); + } + col = 0; + for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { + c = getc(lb); + if (c == EOF) { + printf("\n\\ No newline at end of file\n"); + return; + } + if (c == '\t' && (option_mask32 & FLAG_t)) { + do { + putchar(' '); + } while (++col & 7); + } else { + putchar(c); + col++; + } + } + } +} + + +#if ENABLE_FEATURE_DIFF_BINARY +static int asciifile(FILE *f) +{ + int i, cnt; + + if (option_mask32 & FLAG_a) + return 1; + rewind(f); + cnt = fread(g_read_buf, 1, COMMON_BUFSIZE, f); + for (i = 0; i < cnt; i++) { + if (!isprint(g_read_buf[i]) + && !isspace(g_read_buf[i]) + ) { + return 0; + } + } + return 1; +} +#else +#define asciifile(f) 1 +#endif + + +/* dump accumulated "unified" diff changes */ +static void dump_unified_vec(FILE *f1, FILE *f2) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd; + int a, b, c, d; + char ch; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAX(1, cvp->a - opt_U_context); + upb = MIN(nlen[0], context_vec_ptr->b + opt_U_context); + lowc = MAX(1, cvp->c - opt_U_context); + upd = MIN(nlen[1], context_vec_ptr->d + opt_U_context); + + printf("@@ -"); + uni_range(lowa, upb); + printf(" +"); + uni_range(lowc, upd); + printf(" @@\n"); + + /* + * Output changes in "unified" diff format--the old and new lines + * are printed together. + */ + for (; cvp <= context_vec_ptr; cvp++) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + /* + * c: both new and old changes + * d: only changes in the old file + * a: only changes in the new file + */ + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; +#if 0 + switch (ch) { + case 'c': +// fetch() seeks! + fetch(ixold, lowa, a - 1, f1, ' '); + fetch(ixold, a, b, f1, '-'); + fetch(ixnew, c, d, f2, '+'); + break; + case 'd': + fetch(ixold, lowa, a - 1, f1, ' '); + fetch(ixold, a, b, f1, '-'); + break; + case 'a': + fetch(ixnew, lowc, c - 1, f2, ' '); + fetch(ixnew, c, d, f2, '+'); + break; + } +#else + if (ch == 'c' || ch == 'd') { + fetch(ixold, lowa, a - 1, f1, ' '); + fetch(ixold, a, b, f1, '-'); + } + if (ch == 'a') + fetch(ixnew, lowc, c - 1, f2, ' '); + if (ch == 'c' || ch == 'a') + fetch(ixnew, c, d, f2, '+'); +#endif + lowa = b + 1; + lowc = d + 1; + } + fetch(ixnew, d + 1, upd, f2, ' '); + + context_vec_ptr = context_vec_start - 1; +} + + +static void print_header(const char *file1, const char *file2) +{ + if (label1) + printf("--- %s\n", label1); + else + printf("--- %s\t%s", file1, ctime(&stb1.st_mtime)); + if (label2) + printf("+++ %s\n", label2); + else + printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime)); +} + + +/* + * Indicate that there is a difference between lines a and b of the from file + * to get to lines c to d of the to file. If a is greater than b then there + * are no lines in the from file involved and this means that there were + * lines appended (beginning at b). If c is greater than d then there are + * lines missing from the to file. + */ +static void change(char *file1, FILE *f1, char *file2, FILE *f2, + int a, int b, int c, int d) +{ + if ((a > b && c > d) || (option_mask32 & FLAG_q)) { + anychange = 1; + return; + } + + /* + * Allocate change records as needed. + */ + if (context_vec_ptr == context_vec_end - 1) { + ptrdiff_t offset = context_vec_ptr - context_vec_start; + + max_context <<= 1; + context_vec_start = xrealloc(context_vec_start, + max_context * sizeof(struct context_vec)); + context_vec_end = context_vec_start + max_context; + context_vec_ptr = context_vec_start + offset; + } + if (anychange == 0) { + /* + * Print the context/unidiff header first time through. + */ + print_header(file1, file2); + } else if (a > context_vec_ptr->b + (2 * opt_U_context) + 1 + && c > context_vec_ptr->d + (2 * opt_U_context) + 1 + ) { + /* + * If this change is more than 'context' lines from the + * previous change, dump the record and reset it. + */ +// dump_unified_vec() seeks! + dump_unified_vec(f1, f2); + } + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + anychange = 1; +} + + +static void output(char *file1, FILE *f1, char *file2, FILE *f2) +{ + /* Note that j0 and j1 can't be used as they are defined in math.h. + * This also allows the rather amusing variable 'j00'... */ + int m, i0, i1, j00, j01; + + rewind(f1); + rewind(f2); + m = nlen[0]; + J[0] = 0; + J[m + 1] = nlen[1] + 1; + for (i0 = 1; i0 <= m; i0 = i1 + 1) { + while (i0 <= m && J[i0] == J[i0 - 1] + 1) + i0++; + j00 = J[i0 - 1] + 1; + i1 = i0 - 1; + while (i1 < m && J[i1 + 1] == 0) + i1++; + j01 = J[i1 + 1] - 1; + J[i1] = j01; +// change() seeks! + change(file1, f1, file2, f2, i0, i1, j00, j01); + } + if (m == 0) { +// change() seeks! + change(file1, f1, file2, f2, 1, 0, 1, nlen[1]); + } + if (anychange != 0 && !(option_mask32 & FLAG_q)) { +// dump_unified_vec() seeks! + dump_unified_vec(f1, f2); + } +} + +/* + * The following code uses an algorithm due to Harold Stone, + * which finds a pair of longest identical subsequences in + * the two files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i in file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called "value"). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0, y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * checked against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ +/* NB: files can be not REGular. The only sure thing that they + * are not both DIRectories. */ +static unsigned diffreg(char *file1, char *file2, int flags) +{ + int *member; /* will be overlaid on nfile[1] */ + int *class; /* will be overlaid on nfile[0] */ + int *klist; /* will be overlaid on nfile[0] after class */ + FILE *f1; + FILE *f2; + unsigned rval; + int i; + + anychange = 0; + context_vec_ptr = context_vec_start - 1; + tempname1 = tempname2 = NULL; + + /* Is any of them a directory? Then it's simple */ + if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) + return (S_ISDIR(stb1.st_mode) ? D_ISDIR1 : D_ISDIR2); + + /* None of them are directories */ + rval = D_SAME; + + if (flags & D_EMPTY1) + /* can't be stdin, but xfopen_stdin() is smaller code */ + f1 = xfopen_stdin(bb_dev_null); + else + f1 = xfopen_stdin(file1); + if (flags & D_EMPTY2) + f2 = xfopen_stdin(bb_dev_null); + else + f2 = xfopen_stdin(file2); + + /* NB: if D_EMPTY1/2 is set, other file is always a regular file, + * not pipe/fifo/chardev/etc - D_EMPTY is used by "diff -r" only, + * and it never diffs non-ordinary files in subdirs. */ + if (!(flags & (D_EMPTY1 | D_EMPTY2))) { + /* Quick check whether they are different */ + /* NB: copies non-REG files to tempfiles and fills tempname1/2 */ + i = files_differ(f1, f2); + if (i != 1) { /* not different? */ + if (i != 0) /* error? */ + exit_status |= 2; + goto closem; + } + } + + if (!asciifile(f1) || !asciifile(f2)) { + rval = D_BINARY; + exit_status |= 1; + goto closem; + } + +// Rewind inside! + prepare(0, f1 /*, stb1.st_size*/); + prepare(1, f2 /*, stb2.st_size*/); + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *) nfile[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); +//TODO: xrealloc_vector? + member = xrealloc(member, (slen[1] + 2) * sizeof(int)); + + class = (int *) nfile[0]; + unsort(sfile[0], slen[0], class); + class = xrealloc(class, (slen[0] + 2) * sizeof(int)); + + klist = xmalloc((slen[0] + 2) * sizeof(int)); + clen = 0; + clistlen = 100; + clist = xmalloc(clistlen * sizeof(struct cand)); + i = stone(class, slen[0], member, klist); + free(member); + free(class); + + J = xrealloc(J, (nlen[0] + 2) * sizeof(int)); + unravel(klist[i]); + free(clist); + free(klist); + + ixold = xrealloc(ixold, (nlen[0] + 2) * sizeof(long)); + ixnew = xrealloc(ixnew, (nlen[1] + 2) * sizeof(long)); +// Rewind inside! + check(f1, f2); +// Rewind inside! + output(file1, f1, file2, f2); + + closem: + if (anychange) { + exit_status |= 1; + if (rval == D_SAME) + rval = D_DIFFER; + } + fclose_if_not_stdin(f1); + fclose_if_not_stdin(f2); + if (tempname1) { + unlink(tempname1); + free(tempname1); + } + if (tempname2) { + unlink(tempname2); + free(tempname2); + } + return rval; +} + + +#if ENABLE_FEATURE_DIFF_DIR +static void do_diff(char *dir1, char *path1, char *dir2, char *path2) +{ + int flags = 0; /*D_HEADER;*/ + int val; + char *fullpath1 = NULL; /* if -N */ + char *fullpath2 = NULL; + + if (path1) + fullpath1 = concat_path_file(dir1, path1); + if (path2) + fullpath2 = concat_path_file(dir2, path2); + + if (!fullpath1 || stat(fullpath1, &stb1) != 0) { + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + if (path2) { + free(fullpath1); + fullpath1 = concat_path_file(dir1, path2); + } + } + if (!fullpath2 || stat(fullpath2, &stb2) != 0) { + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + if (path1) { + free(fullpath2); + fullpath2 = concat_path_file(dir2, path1); + } + } + + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); + goto ret; + } + + if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) + val = D_SKIPPED1; + else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) + val = D_SKIPPED2; + else { + /* Both files are either REGular or DIRectories */ + val = diffreg(fullpath1, fullpath2, flags); + } + + print_status(val, fullpath1, fullpath2 /*, NULL*/); + ret: + free(fullpath1); + free(fullpath2); +} +#endif + + +#if ENABLE_FEATURE_DIFF_DIR +/* This function adds a filename to dl, the directory listing. */ +static int FAST_FUNC add_to_dirlist(const char *filename, + struct stat *sb UNUSED_PARAM, + void *userdata, + int depth UNUSED_PARAM) +{ + dl = xrealloc_vector(dl, 5, dl_count); + dl[dl_count] = xstrdup(filename + (int)(ptrdiff_t)userdata); + dl_count++; + return TRUE; +} + + +/* This returns a sorted directory listing. */ +static char **get_recursive_dirlist(char *path) +{ + dl_count = 0; + dl = xzalloc(sizeof(dl[0])); + + /* We need to trim root directory prefix. + * Using void *userdata to specify its length, + * add_to_dirlist will remove it. */ + if (option_mask32 & FLAG_r) { + recursive_action(path, ACTION_RECURSE|ACTION_FOLLOWLINKS, + add_to_dirlist, /* file_action */ + NULL, /* dir_action */ + (void*)(ptrdiff_t)(strlen(path) + 1), + 0); + } else { + DIR *dp; + struct dirent *ep; + + dp = warn_opendir(path); + while ((ep = readdir(dp))) { + if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.')) + continue; + add_to_dirlist(ep->d_name, NULL, (void*)(int)0, 0); + } + closedir(dp); + } + + /* Sort dl alphabetically. */ + qsort_string_vector(dl, dl_count); + + dl[dl_count] = NULL; + return dl; +} + + +static void diffdir(char *p1, char *p2) +{ + char **dirlist1, **dirlist2; + char *dp1, *dp2; + int pos; + + /* Check for trailing slashes. */ + dp1 = last_char_is(p1, '/'); + if (dp1 != NULL) + *dp1 = '\0'; + dp2 = last_char_is(p2, '/'); + if (dp2 != NULL) + *dp2 = '\0'; + + /* Get directory listings for p1 and p2. */ + dirlist1 = get_recursive_dirlist(p1); + dirlist2 = get_recursive_dirlist(p2); + + /* If -S was set, find the starting point. */ + if (opt_S_start) { + while (*dirlist1 != NULL && strcmp(*dirlist1, opt_S_start) < 0) + dirlist1++; + while (*dirlist2 != NULL && strcmp(*dirlist2, opt_S_start) < 0) + dirlist2++; + if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) + bb_error_msg(bb_msg_invalid_arg, "NULL", "-S"); + } + + /* Now that both dirlist1 and dirlist2 contain sorted directory + * listings, we can start to go through dirlist1. If both listings + * contain the same file, then do a normal diff. Otherwise, behaviour + * is determined by whether the -N flag is set. */ + while (*dirlist1 != NULL || *dirlist2 != NULL) { + dp1 = *dirlist1; + dp2 = *dirlist2; + pos = dp1 == NULL ? 1 : (dp2 == NULL ? -1 : strcmp(dp1, dp2)); + if (pos == 0) { + do_diff(p1, dp1, p2, dp2); + dirlist1++; + dirlist2++; + } else if (pos < 0) { + if (option_mask32 & FLAG_N) + do_diff(p1, dp1, p2, NULL); + else + print_only(p1, dp1); + dirlist1++; + } else { + if (option_mask32 & FLAG_N) + do_diff(p1, NULL, p2, dp2); + else + print_only(p2, dp2); + dirlist2++; + } + } +} +#endif + + +int diff_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int diff_main(int argc UNUSED_PARAM, char **argv) +{ + int gotstdin = 0; + char *f1, *f2; + llist_t *L_arg = NULL; + + INIT_G(); + + /* exactly 2 params; collect multiple -L