[Toybox] tar --transform again
enh
enh at google.com
Fri Feb 12 17:49:35 PST 2021
attached is "just enough" tar --transform for the kernel use case. adding
the missing FLAGS syntax checking is easy, and
sed's unescape_delimited_string() is easily moved into lib for reuse for
fixing the PATTERN part of s/PATTERN/REPLACEMENT/FLAGS, but the REPLACEMENT
part seems hairy enough that i thought i'd stop and ask before ploughing on
in this direction...
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.landley.net/pipermail/toybox-landley.net/attachments/20210212/26bf232b/attachment-0001.html>
-------------- next part --------------
diff --git a/tests/tar.test b/tests/tar.test
index dbe28d78..d7d930a5 100644
--- a/tests/tar.test
+++ b/tests/tar.test
@@ -143,6 +143,16 @@ testing "pass broken absolute symlink" "$TAR dir/linkabsbrok | LST" \
"lrwxrwxrwx root/root 0 2009-02-13 23:31 dir/linkabsbrok -> /does/not/exist\n" \
"" ""
+testing "transform" \
+ "$TAR --transform='s:^:prefix/:' dir/linkabsbrok file | LST" \
+ "lrwxrwxrwx root/root 0 2009-02-13 23:31 prefix/dir/linkabsbrok -> prefix//does/not/exist\n-rw-rw-r-- root/root 0 2009-02-13 23:31 prefix/file\n" \
+ "" ""
+
+testing "transform S" \
+ "$TAR --transform='s:^:prefix/:S' dir/linkabsbrok file | LST" \
+ "lrwxrwxrwx root/root 0 2009-02-13 23:31 prefix/dir/linkabsbrok -> /does/not/exist\n-rw-rw-r-- root/root 0 2009-02-13 23:31 prefix/file\n" \
+ "" ""
+
# this expects devtmpfs values
testing "pass /dev/null" \
@@ -216,7 +226,7 @@ toyonly testing "cat tbz | extract dir/file (autodetect)" \
"" ""
yes | (dd bs=$((1<<16)) count=1; dd bs=8192 seek=14 count=1; dd bs=4096 seek=64 count=5) 2>/dev/null > fweep
-testing "sparse without overflow" "$TAR --sparse fweep | SUM 3" \
+toyonly testing "sparse without overflow" "$TAR --sparse fweep | SUM 3" \
"e1560110293247934493626d564c8f03c357cec5\n" "" ""
rm fweep
diff --git a/toys/posix/tar.c b/toys/posix/tar.c
index 66547613..fb26ac16 100644
--- a/toys/posix/tar.c
+++ b/toys/posix/tar.c
@@ -17,7 +17,7 @@
* Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
*
-USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
+USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(transform)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
config TAR
bool "tar"
@@ -51,7 +51,7 @@ GLOBALS(
char *f, *C;
struct arg_list *T, *X;
char *I, *to_command, *owner, *group, *mtime, *mode;
- struct arg_list *exclude;
+ struct arg_list *transform, *exclude;
struct double_list *incl, *excl, *seen;
struct string_list *dirs;
@@ -77,6 +77,12 @@ GLOBALS(
time_t mtime;
dev_t device;
} hdr;
+
+ struct transform_cmd {
+ struct transform_cmd *next;
+ regex_t re;
+ char *replacement, *flags;
+ } *transforms;
)
struct tar_hdr {
@@ -173,6 +179,32 @@ static void alloread(void *buf, int len)
(*b)[len] = 0;
}
+static char *transform(char *name, struct stat *st, int link_target)
+{
+ struct transform_cmd *tc = TT.transforms;
+ regmatch_t m[1];
+ char *tname = NULL;
+
+ for (; tc; tc = tc->next) {
+ fprintf(stderr, "flags=%s\n", tc->flags);
+ // TODO: h/H - do/don't apply to hard link targets
+ if ((S_ISREG(st->st_mode) && !strchr(tc->flags, 'R')) ||
+ (!link_target || (S_ISLNK(st->st_mode) && !strchr(tc->flags, 'S')))) {
+ // TODO: do multiple matches if the sed 's' g flag was supplied
+ // TODO: handle &
+ if (!regexec(&tc->re, name, 1, m, 0)) {
+ size_t len = strlen(name) + strlen(tc->replacement) + (m[0].rm_so - m[0].rm_eo) + 1;
+ tname = xzalloc(len);
+ memcpy(tname, name, m[0].rm_so);
+ strcat(stpcpy(tname + m[0].rm_so, tc->replacement), name + m[0].rm_eo);
+ fprintf(stderr, "%s -> %s\n", name, tname);
+ return tname;
+ }
+ }
+ }
+ return tname;
+}
+
// callback from dirtree to create archive
static int add_to_tar(struct dirtree *node)
{
@@ -181,7 +213,7 @@ static int add_to_tar(struct dirtree *node)
struct passwd *pw = pw;
struct group *gr = gr;
int i, fd = -1, norecurse = FLAG(no_recursion);
- char *name, *lnk, *hname;
+ char *name, *lnk, *hname, *tname = 0, *lnktname;
if (!dirtree_notdotdot(node)) return 0;
if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
@@ -228,8 +260,10 @@ static int add_to_tar(struct dirtree *node)
if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
if (TT.mtime) st->st_mtime = TT.mtt;
+ tname = transform(hname, st, 0);
+
memset(&hdr, 0, sizeof(hdr));
- strncpy(hdr.name, hname, sizeof(hdr.name));
+ strncpy(hdr.name, tname ? tname : hname, sizeof(hdr.name));
ITOO(hdr.mode, st->st_mode &07777);
ITOO(hdr.uid, st->st_uid);
ITOO(hdr.gid, st->st_gid);
@@ -268,9 +302,12 @@ static int add_to_tar(struct dirtree *node)
perror_msg("readlink");
goto done;
}
- if (strlen(lnk) > sizeof(hdr.link)) write_longname(lnk, 'K');
- strncpy(hdr.link, lnk, sizeof(hdr.link));
+ lnktname = transform(lnk, st, 1);
+ if (strlen(lnktname ? lnktname : lnk) > sizeof(hdr.link))
+ write_longname(lnktname ? lnktname : lnk, 'K');
+ strncpy(hdr.link, lnktname ? lnktname : lnk, sizeof(hdr.link));
if (!i) free(lnk);
+ free(lnktname);
} else if (S_ISREG(st->st_mode)) {
hdr.type = '0';
ITOO(hdr.size, st->st_size);
@@ -285,7 +322,8 @@ static int add_to_tar(struct dirtree *node)
goto done;
}
- if (strlen(hname) > sizeof(hdr.name)) write_longname(hname, 'L');
+ if (strlen(tname ? tname : hname) > sizeof(hdr.name))
+ write_longname(tname ? tname : hname, 'L');
if (!FLAG(numeric_owner)) {
if (TT.owner || (pw = bufgetpwuid(st->st_uid)))
@@ -373,6 +411,7 @@ static int add_to_tar(struct dirtree *node)
}
done:
free(name);
+ free(tname);
return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!norecurse;
}
@@ -802,6 +841,33 @@ void tar_main(void)
if (TT.owner) TT.ouid = xgetuid(TT.owner);
if (TT.group) TT.ggid = xgetgid(TT.group);
if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
+ if (TT.transform) {
+ struct arg_list *t = TT.transform;
+ struct transform_cmd *tc = NULL, *new;
+ char sep, *pattern, *replacement, *flags;
+
+ for (; t; t = t->next) {
+ // TODO: handle escaping like sed (move sed code to lib and share?)
+ if (*t->arg != 's' || !(sep = t->arg[1]) ||
+ !(replacement = strchr(pattern = t->arg+2, sep)) ||
+ !(flags = strchr(replacement+1, sep)))
+ error_exit("bad transform: %s", t->arg);
+ *replacement++ = 0;
+ *flags++ = 0;
+ if (!*flags) flags = "rsh";
+
+ new = xzalloc(sizeof(struct transform_cmd));
+ if (!TT.transforms) TT.transforms = tc = new;
+ else {
+ tc->next = new;
+ tc = new;
+ }
+ new->replacement = replacement;
+ new->flags = flags;
+ // TODO: extract sed 's' gix flags
+ xregcomp(&new->re, pattern, 0);
+ }
+ }
// Collect file list.
for (; TT.exclude; TT.exclude = TT.exclude->next)
@@ -947,6 +1013,13 @@ void tar_main(void)
}
if (CFG_TOYBOX_FREE) {
+ struct transform_cmd *tc;
+
+ while ((tc = TT.transforms)) {
+ TT.transforms = TT.transforms->next;
+ regfree(&tc->re);
+ free(tc);
+ }
llist_traverse(TT.excl, llist_free_double);
llist_traverse(TT.incl, llist_free_double);
while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
More information about the Toybox
mailing list