[Toybox] [patch] add sed
Strake
strake888 at gmail.com
Wed Jul 31 19:07:45 PDT 2013
Not yet fully functional, but has s/// and some others.
>From ded73ac6c00754ec9715ede475b328a48561ad86 Mon Sep 17 00:00:00 2001
From: Strake <strake888 at gmail.com>
Date: Wed, 31 Jul 2013 20:41:31 -0500
Subject: add sed
---
lib/lib.h | 7 +-
lib/pending.c | 24 ++
toys/pending/sed.c | 629 ++++++++++++++++++++++++++++++++++++++++++++---------
3 files changed, 555 insertions(+), 105 deletions(-)
diff --git a/lib/lib.h b/lib/lib.h
index 98f4aad..a6b185e 100644
--- a/lib/lib.h
+++ b/lib/lib.h
@@ -107,6 +107,7 @@ int xopen(char *path, int flags);
void xclose(int fd);
int xdup(int fd);
FILE *xfopen(char *path, char *mode);
+FILE *xfmemopen(void *, size_t, char *);
size_t xread(int fd, void *buf, size_t len);
void xreadall(int fd, void *buf, size_t len);
void xwrite(int fd, void *buf, size_t len);
@@ -199,5 +200,7 @@ char* make_human_readable(unsigned long long size,
unsigned long unit);
unsigned long get_int_value(const char *numstr, unsigned lowrange,
unsigned highrange);
// grep helper functions
-char *astrcat (char *, char *);
-char *xastrcat (char *, char *);
+char *astrcat (char *, char *);
+char *xastrcat (char *, char *);
+char *astrncat0 (char *, char *, size_t);
+char *xastrncat0 (char *, char *, size_t);
diff --git a/lib/pending.c b/lib/pending.c
index fad1c65..099fee4 100644
--- a/lib/pending.c
+++ b/lib/pending.c
@@ -102,3 +102,27 @@ char *xastrcat (char *x, char *y) {
if (!x) error_exit ("xastrcat");
return x;
}
+
+char *astrncat0 (char *x, char *y, size_t n) {
+ char *z;
+ size_t m = x ? strlen (x) : 0;
+ z = x;
+ x = realloc (x, m + n + 1);
+ if (!x) return 0;
+ (z ? strncat : strncpy) (x, y, n);
+ x[m + n] = 0;
+ return x;
+}
+
+char *xastrncat0 (char *x, char *y, size_t n) {
+ x = astrncat0 (x, y, n);
+ if (!x) error_exit ("xastrncat");
+ return x;
+}
+
+FILE *xfmemopen (void *x, size_t n, char *mode) {
+ FILE *f;
+ f = fmemopen (x, n, mode);
+ if (!f) perror_exit ("xfmemopen");
+ return f;
+}
diff --git a/toys/pending/sed.c b/toys/pending/sed.c
index 0ce25ac..e0787e4 100644
--- a/toys/pending/sed.c
+++ b/toys/pending/sed.c
@@ -1,138 +1,561 @@
-/* sed.c - Stream editor.
+/* sed.c - stream editor
*
- * Copyright 2012 Rob Landley <rob at landley.net>
+ * Copyright 2013 CE Strake <strake888 at gmail.com>
*
- * See http://opengroup.org/onlinepubs/9699919799/utilities/sed.c
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/
+ * See http://refspecs.linuxfoundation.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/cmdbehav.html
-USE_SED(NEWTOY(sed, "irne*f*", TOYFLAG_BIN))
+USE_SED(NEWTOY(sed, "Ene*f*", TOYFLAG_BIN))
config SED
bool "sed"
default n
help
- usage: sed [-irn] {command | [-e command]...} [FILE...]
-
- Stream EDitor, transforms text by appling script of command to each line
- of input.
-
- -e Add expression to the command script (if no -e, use first argument)
- -i Modify file in place
- -n No default output (p commands only)
- -r Use extended regular expression syntex
+ usage: sed ...
*/
#define FOR_sed
#include "toys.h"
#include "lib/xregcomp.h"
+#include <regex.h>
GLOBALS(
- struct arg_list *files;
- struct arg_list *scripts;
-
- void *commands;
+ struct arg_list *fArgu, *eArgu;
+ char *patternSpace, *holdSpace;
+ long n;
)
-// Digested version of what sed commands can actually tell use to do.
+static wint_t xfgetwc (FILE *f) {
+ wint_t x = fgetwc (f);
+ if (x < 0) error_exit ("failed to read");
+ return x;
+}
+
+static int xfgetc (FILE *f) {
+ int c = fgetc (f);
+ if (c < 0) error_exit ("failed to read");
+ return c;
+}
+
+static wint_t fpeekwc (FILE *f) {
+ wint_t x = fgetwc (f);
+ ungetwc (x, f);
+ return x;
+}
+
+static int fpeekc (FILE *f) {
+ int c = fgetc (f);
+ ungetc (c, f);
+ return c;
+}
+
+static void skipSuch (FILE *f, int (*p) (wint_t)) {
+ wint_t x;
+ do x = fgetwc (f); while (p (x));
+ ungetwc (x, f);
+}
+
+static int iswnspace (wint_t x) { return (iswspace (x) && x != '\n'); }
+
+static wchar_t *afgetwswde (FILE *f, wint_t d, wint_t e) {
+ wchar_t x, *xs;
+ size_t n = 0;
+
+ xs = 0;
+ for (;;) {
+ x = fgetwc (f);
+ if (x < 0 || x == d) break;
+
+ if (e && x == e) {
+ x = fgetwc (f);
+ if (x < 0) {
+ if (xs) free (xs);
+ return 0;
+ }
+ if (x != d) {
+ ungetwc (x, f);
+ x = e;
+ }
+ }
+ xs = xrealloc (xs, sizeof (wchar_t)*(++n + 1));
+ xs[n - 1] = x;
+ }
+
+ if (xs) xs[n] = 0;
+ return xs;
+}
-struct sed_command {
- // double_list compatibility (easier to create in-order)
- struct sed_command *next, *prev;
+static char *wcstoambs (wchar_t *xs) {
+ char *cs;
+ size_t n;
+ n = wcstombs (0, xs, 0);
+ cs = xmalloc (sizeof (char)*(n + 1));
+ wcstombs (cs, xs, n + 1);
+ return cs;
+}
- // data string for (saicytb)
- char c, *data;
- // Regexes for s/match/data/ and /begin/,/end/command
- regex_t *rmatch, *rbegin, *rend;
- // For numeric ranges ala 10,20command
- long lstart, lstop;
- // Which match to replace, 0 for all. s and w commands can write to a file
- int which, outfd;
+static char *afgetswde (FILE *f, wint_t d, wint_t e) {
+ wchar_t *xs;
+ char *cs;
+ cs = 0;
+ xs = afgetwswde (f, d, e);
+ if (xs) {
+ cs = wcstoambs (xs);
+ free (xs);
+ }
+ return cs;
+}
+
+static char *xafgetswde (FILE *f, wint_t d, wint_t e) {
+ char *cs;
+ cs = afgetswde (f, d, e);
+ if (!cs) perror_exit ("failed to read");
+ return cs;
+}
+
+typedef struct {
+ char type;
+ union {
+ regex_t re;
+ long n;
+ };
+} Address;
+
+typedef struct Function {
+ char x;
+ union {
+ struct {
+ char *writ;
+ regex_t re;
+ int flags, fd;
+ long n, nMatch /* for s function, to pass to regexec */;
+ };
+ struct Function *fns;
+ };
+} Function;
+
+typedef struct {
+ Address s, t;
+ Function fn;
+ int active;
+} Command;
+
+enum {
+ sed_gFlag = 1,
+ sed_pFlag = 2,
+ sed_wFlag = 4,
};
-// Space. Space. Gotta get past space. Spaaaaaaaace! (But not newline.)
-static void spaceorb(char **s)
-{
- while (**s == ' ' || **s == '\t') ++*s;
-}
-
-// Parse sed commands
-
-static void parse_scripts(void)
-{
- struct arg_list *script;
- int which = 0, i;
-
- // Loop through list of scripts collated from command line and/or files
-
- for (script = TT.scripts; script; script = script->next) {
- char *str = script->arg;
- struct sed_command *cmd;
-
- // we can get multiple commands from a string (semicolons and such)
-
- which++;
- for (i=1;;) {
- if (!*str) break;
-
- cmd = xzalloc(sizeof(struct sed_command));
-
- // Identify prefix
- for (;;) {
- spaceorb(&str);
- if (*str == '^') {
- if (cmd->lstart) goto parse_fail;
- cmd->lstart = -1;
- str++;
- continue;
- } else if (*str == '$') {
- cmd->lstop = LONG_MAX;
- str++;
- break;
- } else if (isdigit(*str)) {
- long ll = strtol(str, &str, 10);
-
- if (ll<0) goto parse_fail;
- if (cmd->lstart) {
- cmd->lstop = ll;
- break;
- } else cmd->lstart = ll;
- } else if (*str == '/' || *str == '\\') {
- // set begin/end
- printf("regex\n");
- exit(1);
- } else if (!cmd->lstart && !cmd->rbegin) break;
- else goto parse_fail; // , with no range after it
-
- spaceorb(&str);
- if (*str != ',') break;
- str++;
+static Command *cs;
+
+static long parseNumber (FILE *);
+static Address parseAddress (FILE *);
+static Function parseFunction (FILE *);
+static Function *parseFunctions (FILE *);
+static Command parseCommand (FILE *);
+
+/* parse natural number */
+static long parseNumber (FILE *f) {
+ long n = 0;
+ int c;
+
+ for (;;) {
+ c = fgetc (f);
+ if (!isdigit (c)) {
+ ungetc (c, f);
+ return n;
+ }
+ n *= 10;
+ n += c - '0';
+ }
+}
+
+static Address parseAddress (FILE *f) {
+ Address a;
+ char *xs;
+ wint_t d;
+
+ d = fgetwc (f);
+ switch (d) {
+ case '\\':
+ d = xfgetwc (f);
+ if (d == '\\' || d == 0) error_exit ("bad delimiter");
+ /* fall thru */
+ case '/':
+ a.type = '/';
+ xs = xafgetswde (f, d, '\\');
+ xregcomp (&a.re, xs, toys.optflags & FLAG_E ? REG_EXTENDED : 0);
+ free (xs);
+ break;
+ case '$':
+ a.type = '$';
+ break;
+ default:
+ ungetwc (d, f);
+ if (iswdigit (d)) {
+ a.type = 'n';
+ a.n = parseNumber (f);
+ }
+ else a.type = 0;
+ }
+
+ return a;
+}
+
+static Function parseFunction (FILE *f) {
+ Function fn;
+ char *xs;
+ wint_t d;
+
+ fn.x = fgetc (f);
+ switch (fn.x) {
+ case '{':
+ fn.x = ';';
+ fn.fns = parseFunctions (f);
+ if (fpeekc (f) != '}') error_exit ("{} mismatch");
+ break;
+ case 'a':
+ case 'c':
+ case 'i':
+ while (fgetc (f) != '\n');
+ fn.writ = xafgetswde (f, '\n', '\\');
+ break;
+ case 'r':
+ case 'w':
+ skipSuch (f, iswnspace);
+ xs = xafgetswde (f, '\n', '\\');
+ fn.fd = xopen (xs, fn.x == 'r' ? O_RDONLY : (O_WRONLY | O_CREAT |
O_TRUNC));
+ free (xs);
+ break;
+ case 's':
+ d = xfgetwc (f);
+ xs = xafgetswde (f, d, '\\');
+ fn.writ = xafgetswde (f, d, '\\');
+ xregcomp (&fn.re, xs, toys.optflags & FLAG_E ? REG_EXTENDED : 0);
+ fn.nMatch = 256; /* TO DO: unbreak */
+ free (xs);
+ for (;;) {
+ char c = fgetc (f);
+ if (c < 0) break;
+ if (isspace (c) && c != '\n') continue;
+ else switch (c) {
+ case '\n':
+ case ';':
+ goto s_end;
+ case 'g':
+ fn.flags |= sed_gFlag;
+ break;
+ case 'p':
+ fn.flags |= sed_pFlag;
+ break;
+ case 'w':
+ fn.flags |= sed_wFlag;
+ skipSuch (f, iswnspace);
+ xs = xafgetswde (f, '\n', '\\');
+ fn.fd = xopen (xs, O_APPEND);
+ free (xs);
+ goto s_end;
+ default:
+ error_exit ("bad flag: %c", c);
}
- i = stridx("{bcdDgGhHlnNpPstwxyrqia= \t#:}", *str);
- if (i == -1) goto parse_fail;
+ }
+s_end:
+ break;
+ }
+
+ return fn;
+}
+
+static Command parseCommand (FILE *f) {
+ Command c;
+
+ skipSuch (f, iswnspace);
+
+ {
+ c.s = parseAddress (f);
+ }
+ if (fpeekc (f) == ',') {
+ fgetc (f);
+ c.t = parseAddress (f);
+ if (!c.s.type || !c.t.type) error_exit ("bad address");
+ }
+ else {
+ c.t = (Address){ .type = 0 };
+ }
+
+ skipSuch (f, iswnspace);
+
+ c.fn = parseFunction (f);
+
+ c.active = 0;
+
+ return c;
+}
+
+static Function *parseFunctions (FILE *f) {
+ Function *fns;
+ long n = 0;
+
+ fns = 0;
+ for (;;) {
+ int c;
+
+ skipSuch (f, iswspace);
+
+ fns = xrealloc (fns, sizeof (Function)*(++n + 1));
- dlist_add_nomalloc((struct double_list **)&TT.commands,
- (struct double_list *)cmd);
- exit(1);
+ c = fpeekc (f);
+ if (c == EOF || c == '}') break;
+
+ fns[n - 1] = parseFunction (f);
+ }
+
+ fns[n] = (Function){ .x = 0 };
+
+ return fns;
+}
+
+static Command *parseCommands (FILE *f) {
+ Command *cs;
+ long n = 0;
+
+ cs = 0;
+ for (;;) {
+ int c;
+
+ skipSuch (f, iswspace);
+
+ cs = xrealloc (cs, sizeof (Command)*(++n + 1));
+
+ c = fpeekc (f);
+ if (c == EOF || c == '}') break;
+
+ cs[n - 1] = parseCommand (f);
+ }
+
+ cs[n] = (Command){ .fn = (Function) { .x = 0 } };
+
+ return cs;
+}
+
+static int inRange (Command c) {
+ if (!c.t.type) switch (c.s.type) {
+ case 'n':
+ return (c.s.n == TT.n);
+ case '/':
+ return (regexec (&c.t.re, TT.patternSpace, 0, 0, 0) == 0);
+ case 0:
+ return 1;
+ }
+ if (c.active) {
+ switch (c.t.type) {
+ case 'n':
+ if (c.t.n >= TT.n) c.active = 0;
+ break;
+ case '/':
+ if (regexec (&c.t.re, TT.patternSpace, 0, 0, 0) == 0) c.active = 0;
+ break;
+ }
+ return 1;
+ }
+ else {
+ switch (c.s.type) {
+ case 'n':
+ if (c.s.n <= TT.n) c.active = 1;
+ break;
+ case '/':
+ if (regexec (&c.s.re, TT.patternSpace, 0, 0, 0) == 0) c.active = 1;
+ break;
}
+ return c.active;
}
+}
+
+static void sFn (Function fn) {
+ char *xs, *ys, *p;
+ regmatch_t *ms;
+ long n;
+
+ ms = xmalloc (sizeof (regmatch_t)*fn.nMatch);
+ ys = 0;
+ for (xs = TT.patternSpace;
+ (xs == TT.patternSpace || fn.flags & sed_gFlag) &&
+ regexec (&fn.re, xs, fn.nMatch, ms, 0) == 0;
+ xs += ms[0].rm_eo) {
+ long ii;
+ ys = xastrncat0 (ys, xs, ms[0].rm_so);
+ for (ii = 0; fn.writ[ii]; ii++) switch (fn.writ[ii]) {
+ case '&':
+ ys = xastrncat0 (ys, xs + ms[0].rm_so, ms[0].rm_eo - ms[0].rm_so);
+ break;
+ case '\\':
+ n = strtoul (fn.writ + ++ii, &p, 10);
+ if (p > fn.writ + ii) {
+ ii = p - fn.writ - 1;
+ if (n >= fn.nMatch || ms[n].rm_so < 0) error_msg ("bad
backreference: %d", n);
+ else ys = xastrncat0 (ys, xs + ms[n].rm_so, ms[n].rm_eo - ms[n].rm_so);
+ break;
+ }
+ /* fall thru */
+ default:
+ ys = xastrcat (ys, (char []){ fn.writ[ii], 0 });
+ }
+ }
+ ys = xastrcat (ys, xs);
+ free (TT.patternSpace);
+ TT.patternSpace = ys;
+}
- return;
+/* return whether to start next cycle */
+static int doCommand (FILE *f, Command c) {
+ char *xs;
+ size_t _;
+
+ if (inRange (c)) switch (c.fn.x) {
+ case 'c':
+ TT.patternSpace[0] = 0;
+ /* fall thru */
+ case 'a':
+ case 'i':
+ fputs (c.fn.writ, stdout);
+ return 0;
+ case 'D':
+ xs = strchr (TT.patternSpace, '\n');
+ if (xs) {
+ memmove (TT.patternSpace, xs + 1, strlen (xs + 1) + 1);
+ return 1;
+ }
+ /* fall thru */
+ case 'd':
+ TT.patternSpace[0] = 0;
+ return 1;
+ case 'g':
+ free (TT.patternSpace);
+ TT.patternSpace = xstrdup (TT.holdSpace);
+ return 0;
+ case 'G':
+ xastrcat (TT.patternSpace, "\n");
+ xastrcat (TT.patternSpace, TT.holdSpace);
+ return 0;
+ case 'h':
+ free (TT.holdSpace);
+ TT.holdSpace = xstrdup (TT.patternSpace);
+ return 0;
+ case 'H':
+ xastrcat (TT.holdSpace, "\n");
+ xastrcat (TT.holdSpace, TT.patternSpace);
+ return 0;
+ case 'n':
+ if (!(toys.optflags & FLAG_n)) fputs (TT.patternSpace, stdout);
+ free (TT.patternSpace);
+ TT.patternSpace = 0;
+ if (getline (&TT.patternSpace, &_, f) < 0) xexit ();
+ return 0;
+ case 'N':
+ xs = 0;
+ if (getline (&xs, &_, f) < 0) xexit ();
+ xastrcat (TT.patternSpace, "\n");
+ xastrcat (TT.patternSpace, xs);
+ free (xs);
+ TT.n++;
+ return 0;
+ case 'p':
+ fputs (TT.patternSpace, stdout);
+ return 0;
+ case 'q':
+ xexit ();
+ case 's':
+ sFn (c.fn);
+ break;
+ case 'x':
+ xs = TT.patternSpace;
+ TT.patternSpace = TT.holdSpace;
+ TT.holdSpace = xs;
+ return 0;
+ case '=':
+ printf ("%ld\n", TT.n);
+ return 0;
+ default:
+ error_exit ("%c function unimplete", c.fn.x);
+ }
+}
-parse_fail:
- error_exit("bad expression %d@%d: %s", which, i, script->arg+i);
+static void doPreCommands (FILE *f, Command *cs) {
+ long ii;
+ for (ii = 0; cs[ii].fn.x; ii++) if ('i' == cs[ii].fn.x && doCommand
(f, cs[ii])) break;
}
-void sed_main(void)
-{
- char **files=toys.optargs;
+static void doCommands (FILE *f, Command *cs) {
+ long ii;
+ for (ii = 0; cs[ii].fn.x; ii++) if ('i' != cs[ii].fn.x && doCommand
(f, cs[ii])) break;
+}
- // If no -e, use first argument
- if (!TT.scripts) {
- if (!*files) error_exit("Need script");
- (TT.scripts = xzalloc(sizeof(struct arg_list)))->arg = *(files++);
+void do_sed (int fd, char *name) {
+ FILE *f;
+
+ f = fdopen (fd, "r");
+ if (fd < 0) perror_exit ("failed to open %s", name);
+
+ TT.patternSpace = 0;
+ for (;;) {
+ doPreCommands (f, cs);
+ TT.patternSpace = afgetswde (f, '\n', 0);
+ if (!TT.patternSpace) return;
+ TT.n++;
+ doCommands (f, cs);
+ printf ("%s\n", TT.patternSpace);
+ free (TT.patternSpace);
}
+}
+
+void addCommands (Command *ds) {
+ long m, n;
+ if (!cs) cs = xmalloc (0);
+ for (m = 0; cs[m].fn.x; m++);
+ for (n = 0; ds[n].fn.x; n++);
+ cs = xrealloc (cs, sizeof (Command)*(m + n + 1));
+ memmove (cs + m, ds, sizeof(Command)*(n + 1));
+}
+
+void faddCommands (FILE *f) {
+ Command *ds;
+ ds = parseCommands (f);
+ if (fpeekc (f) == '}') error_exit ("{} mismatch");
+ addCommands (ds);
+ free (ds);
+}
- parse_scripts();
+void buildScript (void) {
+ FILE *f;
+
+ cs = 0;
+
+ for (; TT.eArgu; TT.eArgu = TT.eArgu -> next) {
+ f = xfmemopen (TT.eArgu -> arg, strlen (TT.eArgu -> arg), "r");
+ faddCommands (f);
+ fclose (f);
+ }
+ for (; TT.fArgu; TT.fArgu = TT.fArgu -> next) {
+ f = xfopen (TT.fArgu -> arg, "r");
+ faddCommands (f);
+ fclose (f);
+ }
+
+ if (!cs) {
+ if (toys.optc < 1) error_exit ("no script");
+ f = xfmemopen (toys.optargs[0], strlen (toys.optargs[0]), "r");
+ faddCommands (f);
+ fclose (f);
+ toys.optc--; toys.optargs++;
+ }
+}
- while (*files) dprintf(2,"file=%s\n", *(files++));
+void sed_main (void) {
+ buildScript ();
+
+ TT.patternSpace = xmalloc (0);
+ TT.holdSpace = xmalloc (0);
+
+ loopfiles (toys.optargs, do_sed);
}
--
1.7.11.1
1375322865.0
More information about the Toybox
mailing list