[Toybox] [patch] add sed
Isaac
idunham at lavabit.com
Thu Aug 1 19:49:46 PDT 2013
On Wed, Jul 31, 2013 at 09:07:45PM -0500, Strake wrote:
> Not yet fully functional, but has s/// and some others.
>
One fix I'd suggest is s/E/r/g ;-)
Besides that there's -i, which could be done easily by open()ing a
temporary file, writing to that, then renaming to the original filename.
Also, PLEASE KEEP ROB'S HELP MESSAGES!
They're the most useful I've seen.
Isaac Dunham
> From ded73ac6c00754ec9715ede475b328a48561ad86 Mon Sep 17 00:00:00 2001
> From: Strake <strake888 at gmail.com>
> Date: Wed, 31 Jul 2013 20:41:31 -0500
> Subject: add sed
>
> ---
> lib/lib.h | 7 +-
> lib/pending.c | 24 ++
> toys/pending/sed.c | 629 ++++++++++++++++++++++++++++++++++++++++++++---------
> 3 files changed, 555 insertions(+), 105 deletions(-)
>
> diff --git a/lib/lib.h b/lib/lib.h
> index 98f4aad..a6b185e 100644
> --- a/lib/lib.h
> +++ b/lib/lib.h
> @@ -107,6 +107,7 @@ int xopen(char *path, int flags);
> void xclose(int fd);
> int xdup(int fd);
> FILE *xfopen(char *path, char *mode);
> +FILE *xfmemopen(void *, size_t, char *);
> size_t xread(int fd, void *buf, size_t len);
> void xreadall(int fd, void *buf, size_t len);
> void xwrite(int fd, void *buf, size_t len);
> @@ -199,5 +200,7 @@ char* make_human_readable(unsigned long long size,
> unsigned long unit);
> unsigned long get_int_value(const char *numstr, unsigned lowrange,
> unsigned highrange);
>
> // grep helper functions
> -char *astrcat (char *, char *);
> -char *xastrcat (char *, char *);
> +char *astrcat (char *, char *);
> +char *xastrcat (char *, char *);
> +char *astrncat0 (char *, char *, size_t);
> +char *xastrncat0 (char *, char *, size_t);
> diff --git a/lib/pending.c b/lib/pending.c
> index fad1c65..099fee4 100644
> --- a/lib/pending.c
> +++ b/lib/pending.c
> @@ -102,3 +102,27 @@ char *xastrcat (char *x, char *y) {
> if (!x) error_exit ("xastrcat");
> return x;
> }
> +
> +char *astrncat0 (char *x, char *y, size_t n) {
> + char *z;
> + size_t m = x ? strlen (x) : 0;
> + z = x;
> + x = realloc (x, m + n + 1);
> + if (!x) return 0;
> + (z ? strncat : strncpy) (x, y, n);
> + x[m + n] = 0;
> + return x;
> +}
> +
> +char *xastrncat0 (char *x, char *y, size_t n) {
> + x = astrncat0 (x, y, n);
> + if (!x) error_exit ("xastrncat");
> + return x;
> +}
> +
> +FILE *xfmemopen (void *x, size_t n, char *mode) {
> + FILE *f;
> + f = fmemopen (x, n, mode);
> + if (!f) perror_exit ("xfmemopen");
> + return f;
> +}
> diff --git a/toys/pending/sed.c b/toys/pending/sed.c
> index 0ce25ac..e0787e4 100644
> --- a/toys/pending/sed.c
> +++ b/toys/pending/sed.c
> @@ -1,138 +1,561 @@
> -/* sed.c - Stream editor.
> +/* sed.c - stream editor
> *
> - * Copyright 2012 Rob Landley <rob at landley.net>
> + * Copyright 2013 CE Strake <strake888 at gmail.com>
> *
> - * See http://opengroup.org/onlinepubs/9699919799/utilities/sed.c
> + * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/
> + * See http://refspecs.linuxfoundation.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/cmdbehav.html
>
> -USE_SED(NEWTOY(sed, "irne*f*", TOYFLAG_BIN))
> +USE_SED(NEWTOY(sed, "Ene*f*", TOYFLAG_BIN))
>
> config SED
> bool "sed"
> default n
> help
> - usage: sed [-irn] {command | [-e command]...} [FILE...]
> -
> - Stream EDitor, transforms text by appling script of command to each line
> - of input.
> -
> - -e Add expression to the command script (if no -e, use first argument)
> - -i Modify file in place
> - -n No default output (p commands only)
> - -r Use extended regular expression syntex
> + usage: sed ...
> */
>
> #define FOR_sed
> #include "toys.h"
> #include "lib/xregcomp.h"
> +#include <regex.h>
>
> GLOBALS(
> - struct arg_list *files;
> - struct arg_list *scripts;
> -
> - void *commands;
> + struct arg_list *fArgu, *eArgu;
> + char *patternSpace, *holdSpace;
> + long n;
> )
>
> -// Digested version of what sed commands can actually tell use to do.
> +static wint_t xfgetwc (FILE *f) {
> + wint_t x = fgetwc (f);
> + if (x < 0) error_exit ("failed to read");
> + return x;
> +}
> +
> +static int xfgetc (FILE *f) {
> + int c = fgetc (f);
> + if (c < 0) error_exit ("failed to read");
> + return c;
> +}
> +
> +static wint_t fpeekwc (FILE *f) {
> + wint_t x = fgetwc (f);
> + ungetwc (x, f);
> + return x;
> +}
> +
> +static int fpeekc (FILE *f) {
> + int c = fgetc (f);
> + ungetc (c, f);
> + return c;
> +}
> +
> +static void skipSuch (FILE *f, int (*p) (wint_t)) {
> + wint_t x;
> + do x = fgetwc (f); while (p (x));
> + ungetwc (x, f);
> +}
> +
> +static int iswnspace (wint_t x) { return (iswspace (x) && x != '\n'); }
> +
> +static wchar_t *afgetwswde (FILE *f, wint_t d, wint_t e) {
> + wchar_t x, *xs;
> + size_t n = 0;
> +
> + xs = 0;
> + for (;;) {
> + x = fgetwc (f);
> + if (x < 0 || x == d) break;
> +
> + if (e && x == e) {
> + x = fgetwc (f);
> + if (x < 0) {
> + if (xs) free (xs);
> + return 0;
> + }
> + if (x != d) {
> + ungetwc (x, f);
> + x = e;
> + }
> + }
>
> + xs = xrealloc (xs, sizeof (wchar_t)*(++n + 1));
> + xs[n - 1] = x;
> + }
> +
> + if (xs) xs[n] = 0;
> + return xs;
> +}
>
> -struct sed_command {
> - // double_list compatibility (easier to create in-order)
> - struct sed_command *next, *prev;
> +static char *wcstoambs (wchar_t *xs) {
> + char *cs;
> + size_t n;
> + n = wcstombs (0, xs, 0);
> + cs = xmalloc (sizeof (char)*(n + 1));
> + wcstombs (cs, xs, n + 1);
> + return cs;
> +}
>
> - // data string for (saicytb)
> - char c, *data;
> - // Regexes for s/match/data/ and /begin/,/end/command
> - regex_t *rmatch, *rbegin, *rend;
> - // For numeric ranges ala 10,20command
> - long lstart, lstop;
> - // Which match to replace, 0 for all. s and w commands can write to a file
> - int which, outfd;
> +static char *afgetswde (FILE *f, wint_t d, wint_t e) {
> + wchar_t *xs;
> + char *cs;
> + cs = 0;
> + xs = afgetwswde (f, d, e);
> + if (xs) {
> + cs = wcstoambs (xs);
> + free (xs);
> + }
> + return cs;
> +}
> +
> +static char *xafgetswde (FILE *f, wint_t d, wint_t e) {
> + char *cs;
> + cs = afgetswde (f, d, e);
> + if (!cs) perror_exit ("failed to read");
> + return cs;
> +}
> +
> +typedef struct {
> + char type;
> + union {
> + regex_t re;
> + long n;
> + };
> +} Address;
> +
> +typedef struct Function {
> + char x;
> + union {
> + struct {
> + char *writ;
> + regex_t re;
> + int flags, fd;
> + long n, nMatch /* for s function, to pass to regexec */;
> + };
> + struct Function *fns;
> + };
> +} Function;
> +
> +typedef struct {
> + Address s, t;
> + Function fn;
> + int active;
> +} Command;
> +
> +enum {
> + sed_gFlag = 1,
> + sed_pFlag = 2,
> + sed_wFlag = 4,
> };
>
> -// Space. Space. Gotta get past space. Spaaaaaaaace! (But not newline.)
> -static void spaceorb(char **s)
> -{
> - while (**s == ' ' || **s == '\t') ++*s;
> -}
> -
> -// Parse sed commands
> -
> -static void parse_scripts(void)
> -{
> - struct arg_list *script;
> - int which = 0, i;
> -
> - // Loop through list of scripts collated from command line and/or files
> -
> - for (script = TT.scripts; script; script = script->next) {
> - char *str = script->arg;
> - struct sed_command *cmd;
> -
> - // we can get multiple commands from a string (semicolons and such)
> -
> - which++;
> - for (i=1;;) {
> - if (!*str) break;
> -
> - cmd = xzalloc(sizeof(struct sed_command));
> -
> - // Identify prefix
> - for (;;) {
> - spaceorb(&str);
> - if (*str == '^') {
> - if (cmd->lstart) goto parse_fail;
> - cmd->lstart = -1;
> - str++;
> - continue;
> - } else if (*str == '$') {
> - cmd->lstop = LONG_MAX;
> - str++;
> - break;
> - } else if (isdigit(*str)) {
> - long ll = strtol(str, &str, 10);
> -
> - if (ll<0) goto parse_fail;
> - if (cmd->lstart) {
> - cmd->lstop = ll;
> - break;
> - } else cmd->lstart = ll;
> - } else if (*str == '/' || *str == '\\') {
> - // set begin/end
> - printf("regex\n");
> - exit(1);
> - } else if (!cmd->lstart && !cmd->rbegin) break;
> - else goto parse_fail; // , with no range after it
> -
> - spaceorb(&str);
> - if (*str != ',') break;
> - str++;
> +static Command *cs;
> +
> +static long parseNumber (FILE *);
> +static Address parseAddress (FILE *);
> +static Function parseFunction (FILE *);
> +static Function *parseFunctions (FILE *);
> +static Command parseCommand (FILE *);
> +
> +/* parse natural number */
> +static long parseNumber (FILE *f) {
> + long n = 0;
> + int c;
> +
> + for (;;) {
> + c = fgetc (f);
> + if (!isdigit (c)) {
> + ungetc (c, f);
> + return n;
> + }
> + n *= 10;
> + n += c - '0';
> + }
> +}
> +
> +static Address parseAddress (FILE *f) {
> + Address a;
> + char *xs;
> + wint_t d;
> +
> + d = fgetwc (f);
> + switch (d) {
> + case '\\':
> + d = xfgetwc (f);
> + if (d == '\\' || d == 0) error_exit ("bad delimiter");
> + /* fall thru */
> + case '/':
> + a.type = '/';
> + xs = xafgetswde (f, d, '\\');
> + xregcomp (&a.re, xs, toys.optflags & FLAG_E ? REG_EXTENDED : 0);
> + free (xs);
> + break;
> + case '$':
> + a.type = '$';
> + break;
> + default:
> + ungetwc (d, f);
> + if (iswdigit (d)) {
> + a.type = 'n';
> + a.n = parseNumber (f);
> + }
> + else a.type = 0;
> + }
> +
> + return a;
> +}
> +
> +static Function parseFunction (FILE *f) {
> + Function fn;
> + char *xs;
> + wint_t d;
> +
> + fn.x = fgetc (f);
> + switch (fn.x) {
> + case '{':
> + fn.x = ';';
> + fn.fns = parseFunctions (f);
> + if (fpeekc (f) != '}') error_exit ("{} mismatch");
> + break;
> + case 'a':
> + case 'c':
> + case 'i':
> + while (fgetc (f) != '\n');
> + fn.writ = xafgetswde (f, '\n', '\\');
> + break;
> + case 'r':
> + case 'w':
> + skipSuch (f, iswnspace);
> + xs = xafgetswde (f, '\n', '\\');
> + fn.fd = xopen (xs, fn.x == 'r' ? O_RDONLY : (O_WRONLY | O_CREAT |
> O_TRUNC));
> + free (xs);
> + break;
> + case 's':
> + d = xfgetwc (f);
> + xs = xafgetswde (f, d, '\\');
> + fn.writ = xafgetswde (f, d, '\\');
> + xregcomp (&fn.re, xs, toys.optflags & FLAG_E ? REG_EXTENDED : 0);
> + fn.nMatch = 256; /* TO DO: unbreak */
> + free (xs);
> + for (;;) {
> + char c = fgetc (f);
> + if (c < 0) break;
> + if (isspace (c) && c != '\n') continue;
> + else switch (c) {
> + case '\n':
> + case ';':
> + goto s_end;
> + case 'g':
> + fn.flags |= sed_gFlag;
> + break;
> + case 'p':
> + fn.flags |= sed_pFlag;
> + break;
> + case 'w':
> + fn.flags |= sed_wFlag;
> + skipSuch (f, iswnspace);
> + xs = xafgetswde (f, '\n', '\\');
> + fn.fd = xopen (xs, O_APPEND);
> + free (xs);
> + goto s_end;
> + default:
> + error_exit ("bad flag: %c", c);
> }
> - i = stridx("{bcdDgGhHlnNpPstwxyrqia= \t#:}", *str);
> - if (i == -1) goto parse_fail;
> + }
> +s_end:
> + break;
> + }
> +
> + return fn;
> +}
> +
> +static Command parseCommand (FILE *f) {
> + Command c;
> +
> + skipSuch (f, iswnspace);
> +
> + {
> + c.s = parseAddress (f);
> + }
> + if (fpeekc (f) == ',') {
> + fgetc (f);
> + c.t = parseAddress (f);
> + if (!c.s.type || !c.t.type) error_exit ("bad address");
> + }
> + else {
> + c.t = (Address){ .type = 0 };
> + }
> +
> + skipSuch (f, iswnspace);
> +
> + c.fn = parseFunction (f);
> +
> + c.active = 0;
> +
> + return c;
> +}
> +
> +static Function *parseFunctions (FILE *f) {
> + Function *fns;
> + long n = 0;
> +
> + fns = 0;
> + for (;;) {
> + int c;
> +
> + skipSuch (f, iswspace);
> +
> + fns = xrealloc (fns, sizeof (Function)*(++n + 1));
>
> - dlist_add_nomalloc((struct double_list **)&TT.commands,
> - (struct double_list *)cmd);
> - exit(1);
> + c = fpeekc (f);
> + if (c == EOF || c == '}') break;
> +
> + fns[n - 1] = parseFunction (f);
> + }
> +
> + fns[n] = (Function){ .x = 0 };
> +
> + return fns;
> +}
> +
> +static Command *parseCommands (FILE *f) {
> + Command *cs;
> + long n = 0;
> +
> + cs = 0;
> + for (;;) {
> + int c;
> +
> + skipSuch (f, iswspace);
> +
> + cs = xrealloc (cs, sizeof (Command)*(++n + 1));
> +
> + c = fpeekc (f);
> + if (c == EOF || c == '}') break;
> +
> + cs[n - 1] = parseCommand (f);
> + }
> +
> + cs[n] = (Command){ .fn = (Function) { .x = 0 } };
> +
> + return cs;
> +}
> +
> +static int inRange (Command c) {
> + if (!c.t.type) switch (c.s.type) {
> + case 'n':
> + return (c.s.n == TT.n);
> + case '/':
> + return (regexec (&c.t.re, TT.patternSpace, 0, 0, 0) == 0);
> + case 0:
> + return 1;
> + }
> + if (c.active) {
> + switch (c.t.type) {
> + case 'n':
> + if (c.t.n >= TT.n) c.active = 0;
> + break;
> + case '/':
> + if (regexec (&c.t.re, TT.patternSpace, 0, 0, 0) == 0) c.active = 0;
> + break;
> + }
> + return 1;
> + }
> + else {
> + switch (c.s.type) {
> + case 'n':
> + if (c.s.n <= TT.n) c.active = 1;
> + break;
> + case '/':
> + if (regexec (&c.s.re, TT.patternSpace, 0, 0, 0) == 0) c.active = 1;
> + break;
> }
> + return c.active;
> }
> +}
> +
> +static void sFn (Function fn) {
> + char *xs, *ys, *p;
> + regmatch_t *ms;
> + long n;
> +
> + ms = xmalloc (sizeof (regmatch_t)*fn.nMatch);
> + ys = 0;
> + for (xs = TT.patternSpace;
> + (xs == TT.patternSpace || fn.flags & sed_gFlag) &&
> + regexec (&fn.re, xs, fn.nMatch, ms, 0) == 0;
> + xs += ms[0].rm_eo) {
> + long ii;
> + ys = xastrncat0 (ys, xs, ms[0].rm_so);
> + for (ii = 0; fn.writ[ii]; ii++) switch (fn.writ[ii]) {
> + case '&':
> + ys = xastrncat0 (ys, xs + ms[0].rm_so, ms[0].rm_eo - ms[0].rm_so);
> + break;
> + case '\\':
> + n = strtoul (fn.writ + ++ii, &p, 10);
> + if (p > fn.writ + ii) {
> + ii = p - fn.writ - 1;
> + if (n >= fn.nMatch || ms[n].rm_so < 0) error_msg ("bad
> backreference: %d", n);
> + else ys = xastrncat0 (ys, xs + ms[n].rm_so, ms[n].rm_eo - ms[n].rm_so);
> + break;
> + }
> + /* fall thru */
> + default:
> + ys = xastrcat (ys, (char []){ fn.writ[ii], 0 });
> + }
> + }
> + ys = xastrcat (ys, xs);
> + free (TT.patternSpace);
> + TT.patternSpace = ys;
> +}
>
> - return;
> +/* return whether to start next cycle */
> +static int doCommand (FILE *f, Command c) {
> + char *xs;
> + size_t _;
> +
> + if (inRange (c)) switch (c.fn.x) {
> + case 'c':
> + TT.patternSpace[0] = 0;
> + /* fall thru */
> + case 'a':
> + case 'i':
> + fputs (c.fn.writ, stdout);
> + return 0;
> + case 'D':
> + xs = strchr (TT.patternSpace, '\n');
> + if (xs) {
> + memmove (TT.patternSpace, xs + 1, strlen (xs + 1) + 1);
> + return 1;
> + }
> + /* fall thru */
> + case 'd':
> + TT.patternSpace[0] = 0;
> + return 1;
> + case 'g':
> + free (TT.patternSpace);
> + TT.patternSpace = xstrdup (TT.holdSpace);
> + return 0;
> + case 'G':
> + xastrcat (TT.patternSpace, "\n");
> + xastrcat (TT.patternSpace, TT.holdSpace);
> + return 0;
> + case 'h':
> + free (TT.holdSpace);
> + TT.holdSpace = xstrdup (TT.patternSpace);
> + return 0;
> + case 'H':
> + xastrcat (TT.holdSpace, "\n");
> + xastrcat (TT.holdSpace, TT.patternSpace);
> + return 0;
> + case 'n':
> + if (!(toys.optflags & FLAG_n)) fputs (TT.patternSpace, stdout);
> + free (TT.patternSpace);
> + TT.patternSpace = 0;
> + if (getline (&TT.patternSpace, &_, f) < 0) xexit ();
> + return 0;
> + case 'N':
> + xs = 0;
> + if (getline (&xs, &_, f) < 0) xexit ();
> + xastrcat (TT.patternSpace, "\n");
> + xastrcat (TT.patternSpace, xs);
> + free (xs);
> + TT.n++;
> + return 0;
> + case 'p':
> + fputs (TT.patternSpace, stdout);
> + return 0;
> + case 'q':
> + xexit ();
> + case 's':
> + sFn (c.fn);
> + break;
> + case 'x':
> + xs = TT.patternSpace;
> + TT.patternSpace = TT.holdSpace;
> + TT.holdSpace = xs;
> + return 0;
> + case '=':
> + printf ("%ld\n", TT.n);
> + return 0;
> + default:
> + error_exit ("%c function unimplete", c.fn.x);
> + }
> +}
>
> -parse_fail:
> - error_exit("bad expression %d@%d: %s", which, i, script->arg+i);
> +static void doPreCommands (FILE *f, Command *cs) {
> + long ii;
> + for (ii = 0; cs[ii].fn.x; ii++) if ('i' == cs[ii].fn.x && doCommand
> (f, cs[ii])) break;
> }
>
> -void sed_main(void)
> -{
> - char **files=toys.optargs;
> +static void doCommands (FILE *f, Command *cs) {
> + long ii;
> + for (ii = 0; cs[ii].fn.x; ii++) if ('i' != cs[ii].fn.x && doCommand
> (f, cs[ii])) break;
> +}
>
> - // If no -e, use first argument
> - if (!TT.scripts) {
> - if (!*files) error_exit("Need script");
> - (TT.scripts = xzalloc(sizeof(struct arg_list)))->arg = *(files++);
> +void do_sed (int fd, char *name) {
> + FILE *f;
> +
> + f = fdopen (fd, "r");
> + if (fd < 0) perror_exit ("failed to open %s", name);
> +
> + TT.patternSpace = 0;
> + for (;;) {
> + doPreCommands (f, cs);
> + TT.patternSpace = afgetswde (f, '\n', 0);
> + if (!TT.patternSpace) return;
> + TT.n++;
> + doCommands (f, cs);
> + printf ("%s\n", TT.patternSpace);
> + free (TT.patternSpace);
> }
> +}
> +
> +void addCommands (Command *ds) {
> + long m, n;
> + if (!cs) cs = xmalloc (0);
> + for (m = 0; cs[m].fn.x; m++);
> + for (n = 0; ds[n].fn.x; n++);
> + cs = xrealloc (cs, sizeof (Command)*(m + n + 1));
> + memmove (cs + m, ds, sizeof(Command)*(n + 1));
> +}
> +
> +void faddCommands (FILE *f) {
> + Command *ds;
> + ds = parseCommands (f);
> + if (fpeekc (f) == '}') error_exit ("{} mismatch");
> + addCommands (ds);
> + free (ds);
> +}
>
> - parse_scripts();
> +void buildScript (void) {
> + FILE *f;
> +
> + cs = 0;
> +
> + for (; TT.eArgu; TT.eArgu = TT.eArgu -> next) {
> + f = xfmemopen (TT.eArgu -> arg, strlen (TT.eArgu -> arg), "r");
> + faddCommands (f);
> + fclose (f);
> + }
> + for (; TT.fArgu; TT.fArgu = TT.fArgu -> next) {
> + f = xfopen (TT.fArgu -> arg, "r");
> + faddCommands (f);
> + fclose (f);
> + }
> +
> + if (!cs) {
> + if (toys.optc < 1) error_exit ("no script");
> + f = xfmemopen (toys.optargs[0], strlen (toys.optargs[0]), "r");
> + faddCommands (f);
> + fclose (f);
> + toys.optc--; toys.optargs++;
> + }
> +}
>
> - while (*files) dprintf(2,"file=%s\n", *(files++));
> +void sed_main (void) {
> + buildScript ();
> +
> + TT.patternSpace = xmalloc (0);
> + TT.holdSpace = xmalloc (0);
> +
> + loopfiles (toys.optargs, do_sed);
> }
> --
> 1.7.11.1
1375411786.0
More information about the Toybox
mailing list