[Toybox] tar --transform again

enh enh at google.com
Fri Feb 12 17:49:35 PST 2021


attached is "just enough" tar --transform for the kernel use case. adding
the missing FLAGS syntax checking is easy, and
sed's unescape_delimited_string() is easily moved into lib for reuse for
fixing the PATTERN part of s/PATTERN/REPLACEMENT/FLAGS, but the REPLACEMENT
part seems hairy enough that i thought i'd stop and ask before ploughing on
in this direction...
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.landley.net/pipermail/toybox-landley.net/attachments/20210212/26bf232b/attachment-0001.html>
-------------- next part --------------
diff --git a/tests/tar.test b/tests/tar.test
index dbe28d78..d7d930a5 100644
--- a/tests/tar.test
+++ b/tests/tar.test
@@ -143,6 +143,16 @@ testing "pass broken absolute symlink" "$TAR dir/linkabsbrok | LST" \
   "lrwxrwxrwx root/root 0 2009-02-13 23:31 dir/linkabsbrok -> /does/not/exist\n" \
   "" ""
 
+testing "transform" \
+  "$TAR --transform='s:^:prefix/:' dir/linkabsbrok file | LST" \
+  "lrwxrwxrwx root/root 0 2009-02-13 23:31 prefix/dir/linkabsbrok -> prefix//does/not/exist\n-rw-rw-r-- root/root 0 2009-02-13 23:31 prefix/file\n" \
+  "" ""
+
+testing "transform S" \
+  "$TAR --transform='s:^:prefix/:S' dir/linkabsbrok file | LST" \
+  "lrwxrwxrwx root/root 0 2009-02-13 23:31 prefix/dir/linkabsbrok -> /does/not/exist\n-rw-rw-r-- root/root 0 2009-02-13 23:31 prefix/file\n" \
+  "" ""
+
 # this expects devtmpfs values
 
 testing "pass /dev/null" \
@@ -216,7 +226,7 @@ toyonly testing "cat tbz | extract dir/file (autodetect)" \
   "" ""
 
 yes | (dd bs=$((1<<16)) count=1; dd bs=8192 seek=14 count=1; dd bs=4096 seek=64 count=5) 2>/dev/null > fweep
-testing "sparse without overflow" "$TAR --sparse fweep | SUM 3" \
+toyonly testing "sparse without overflow" "$TAR --sparse fweep | SUM 3" \
   "e1560110293247934493626d564c8f03c357cec5\n" "" ""
 
 rm fweep
diff --git a/toys/posix/tar.c b/toys/posix/tar.c
index 66547613..fb26ac16 100644
--- a/toys/posix/tar.c
+++ b/toys/posix/tar.c
@@ -17,7 +17,7 @@
  * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
  *
 
-USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
+USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(transform)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
 
 config TAR
   bool "tar"
@@ -51,7 +51,7 @@ GLOBALS(
   char *f, *C;
   struct arg_list *T, *X;
   char *I, *to_command, *owner, *group, *mtime, *mode;
-  struct arg_list *exclude;
+  struct arg_list *transform, *exclude;
 
   struct double_list *incl, *excl, *seen;
   struct string_list *dirs;
@@ -77,6 +77,12 @@ GLOBALS(
     time_t mtime;
     dev_t device;
   } hdr;
+
+  struct transform_cmd {
+    struct transform_cmd *next;
+    regex_t re;
+    char *replacement, *flags;
+  } *transforms;
 )
 
 struct tar_hdr {
@@ -173,6 +179,32 @@ static void alloread(void *buf, int len)
   (*b)[len] = 0;
 }
 
+static char *transform(char *name, struct stat *st, int link_target)
+{
+  struct transform_cmd *tc = TT.transforms;
+  regmatch_t m[1];
+  char *tname = NULL;
+
+  for (; tc; tc = tc->next) {
+    fprintf(stderr, "flags=%s\n", tc->flags);
+    // TODO: h/H - do/don't apply to hard link targets
+    if ((S_ISREG(st->st_mode) && !strchr(tc->flags, 'R')) ||
+        (!link_target || (S_ISLNK(st->st_mode) && !strchr(tc->flags, 'S')))) {
+      // TODO: do multiple matches if the sed 's' g flag was supplied
+      // TODO: handle &
+      if (!regexec(&tc->re, name, 1, m, 0)) {
+        size_t len = strlen(name) + strlen(tc->replacement) + (m[0].rm_so - m[0].rm_eo) + 1;
+        tname = xzalloc(len);
+        memcpy(tname, name, m[0].rm_so);
+        strcat(stpcpy(tname + m[0].rm_so, tc->replacement), name + m[0].rm_eo);
+        fprintf(stderr, "%s -> %s\n", name, tname);
+        return tname;
+      }
+    }
+  }
+  return tname;
+}
+
 // callback from dirtree to create archive
 static int add_to_tar(struct dirtree *node)
 {
@@ -181,7 +213,7 @@ static int add_to_tar(struct dirtree *node)
   struct passwd *pw = pw;
   struct group *gr = gr;
   int i, fd = -1, norecurse = FLAG(no_recursion);
-  char *name, *lnk, *hname;
+  char *name, *lnk, *hname, *tname = 0, *lnktname;
 
   if (!dirtree_notdotdot(node)) return 0;
   if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
@@ -228,8 +260,10 @@ static int add_to_tar(struct dirtree *node)
   if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
   if (TT.mtime) st->st_mtime = TT.mtt;
 
+  tname = transform(hname, st, 0);
+
   memset(&hdr, 0, sizeof(hdr));
-  strncpy(hdr.name, hname, sizeof(hdr.name));
+  strncpy(hdr.name, tname ? tname : hname, sizeof(hdr.name));
   ITOO(hdr.mode, st->st_mode &07777);
   ITOO(hdr.uid, st->st_uid);
   ITOO(hdr.gid, st->st_gid);
@@ -268,9 +302,12 @@ static int add_to_tar(struct dirtree *node)
       perror_msg("readlink");
       goto done;
     }
-    if (strlen(lnk) > sizeof(hdr.link)) write_longname(lnk, 'K');
-    strncpy(hdr.link, lnk, sizeof(hdr.link));
+    lnktname = transform(lnk, st, 1);
+    if (strlen(lnktname ? lnktname : lnk) > sizeof(hdr.link))
+      write_longname(lnktname ? lnktname : lnk, 'K');
+    strncpy(hdr.link, lnktname ? lnktname : lnk, sizeof(hdr.link));
     if (!i) free(lnk);
+    free(lnktname);
   } else if (S_ISREG(st->st_mode)) {
     hdr.type = '0';
     ITOO(hdr.size, st->st_size);
@@ -285,7 +322,8 @@ static int add_to_tar(struct dirtree *node)
     goto done;
   }
 
-  if (strlen(hname) > sizeof(hdr.name)) write_longname(hname, 'L');
+  if (strlen(tname ? tname : hname) > sizeof(hdr.name))
+    write_longname(tname ? tname : hname, 'L');
 
   if (!FLAG(numeric_owner)) {
     if (TT.owner || (pw = bufgetpwuid(st->st_uid)))
@@ -373,6 +411,7 @@ static int add_to_tar(struct dirtree *node)
   }
 done:
   free(name);
+  free(tname);
 
   return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!norecurse;
 }
@@ -802,6 +841,33 @@ void tar_main(void)
   if (TT.owner) TT.ouid = xgetuid(TT.owner);
   if (TT.group) TT.ggid = xgetgid(TT.group);
   if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
+  if (TT.transform) {
+    struct arg_list *t = TT.transform;
+    struct transform_cmd *tc = NULL, *new;
+    char sep, *pattern, *replacement, *flags;
+
+    for (; t; t = t->next) {
+      // TODO: handle escaping like sed (move sed code to lib and share?)
+      if (*t->arg != 's' || !(sep = t->arg[1]) ||
+          !(replacement = strchr(pattern = t->arg+2, sep)) ||
+          !(flags = strchr(replacement+1, sep)))
+        error_exit("bad transform: %s", t->arg);
+      *replacement++ = 0;
+      *flags++ = 0;
+      if (!*flags) flags = "rsh";
+
+      new = xzalloc(sizeof(struct transform_cmd));
+      if (!TT.transforms) TT.transforms = tc = new;
+      else {
+        tc->next = new;
+        tc = new;
+      }
+      new->replacement = replacement;
+      new->flags = flags;
+      // TODO: extract sed 's' gix flags
+      xregcomp(&new->re, pattern, 0);
+    }
+  }
 
   // Collect file list.
   for (; TT.exclude; TT.exclude = TT.exclude->next)
@@ -947,6 +1013,13 @@ void tar_main(void)
   }
 
   if (CFG_TOYBOX_FREE) {
+    struct transform_cmd *tc;
+
+    while ((tc = TT.transforms)) {
+      TT.transforms = TT.transforms->next;
+      regfree(&tc->re);
+      free(tc);
+    }
     llist_traverse(TT.excl, llist_free_double);
     llist_traverse(TT.incl, llist_free_double);
     while(TT.hlc) free(TT.hlx[--TT.hlc].arg);


More information about the Toybox mailing list