[Toybox] Towards find cleanup

Tim Bird tim.bird at am.sony.com
Wed Apr 10 11:55:58 PDT 2013


On 04/10/2013 10:41 AM, Felix Janda wrote:
> Hello,
> 
> attached is some cleanup of the find toy inspired by Rob's (very cool)
> mails on how he proceeds when cleaning up toys. (and Isaac's recent
> partial cleanup of xzcat)


> # HG changeset patch
> # User Felix Janda <felix.janda at posteo.de>
> # Date 1365614706 -7200
> # Node ID fe09f47038ccf0c25f58ca00e68a4cee9b5994e5
> # Parent  44ed476d5c87cf70b9ae0ea5dde65f34e93ff5e2
> Partial cleanup of find
> 
> - Remove unnecessary headers
> - dump_node is not used anywhere
It used to be used in debug code that was recently removed
by Rob.  It's a bit of a pain to examine the expression
tree in a debugger, so an ascii dump routine is handy.
Because it is unused, it evaporates because of compiler
optimizations.  I don't know if Rob left it around intentionally
or not, but I hate to see it go while 'find' is still under
development.  (Once we're all happy with the find implementation,
of course, you can feel free to remove the lifeboats, just like
the Titanic. :-)

In general, I don't know if Rob has a policy for having debug
code available for use, but not currently used.

> - exec_buf is unused
I think I put this there in anticipation of the work required
to properly support exec.  Right now, the code doesn't
scan the args and replace '{}' with the file or directory
name being evaluated.  I think I planned to use exec_buf
for this substitution, rather than allocate something on
every evaluation.  But that's a detail that whoever finishes
the exec functionality can work out.

> - Replace SUCCESS with 1 and simplify code accordingly
> - a==0 -> !a
I'm OK with most of these, when 0 is being treated as
failure and !0 as success (all the string operations).
There is at least one place where I'm actually checking
that something is equal to the value 0 (that is, it's
a value comparison and not a boolean).  I know that's a
bit picky, and they compile the same, but mentally I
prefer to use numbers for number comparisons and ! for
boolan not.

> - Simplify an incremation pattern using pre-increments
> - Add static keyword to functions
> - Make error messages print to stderr
Good stuff.  Thanks.

> diff --git a/toys/pending/find.c b/toys/pending/find.c
> --- a/toys/pending/find.c
> +++ b/toys/pending/find.c
> @@ -27,13 +27,9 @@
>   */ 
>  
>  #include "toys.h"
> -#include <strings.h>
> -#include <time.h>
>  
>  #define SECONDS_PER_DAY (24*60*60)
>  
> -#define SUCCESS	1
> -

I think I defined this when the output from the dirtree* routines
was in flux, but I'm pretty sure it's vestigial now.

>  int have_action;
>  
>  struct filter_node {
> @@ -53,8 +49,6 @@
>  	} data;
>  };
>  
> -char exec_buf[1024];
> -
See note above.  It might have been intended for arg substitution
for -exec.

>  static struct filter_node *filter_root;
>  
>  /* filter operation types */
> @@ -78,80 +72,10 @@
>  #define TEST_EQ		1
>  #define TEST_GT		2
>  
> -void dump_node(struct filter_node *node)
> -{
> -	char c, **arg_array;
> -	int i;
> -
> -	switch(node->op) {
> -		case CHECK_NAME:
> -			printf("-name '%s' ", node->data.name_regex);
> -			break;
> -		case CHECK_MTIME:
> -			printf("-mtime %d '%s' ", node->data.t.time_op,
> -				ctime(&node->data.t.time));
> -			break;
> -		case CHECK_TYPE:
> -			switch(node->data.type) {
> -				case S_IFREG: c='f';
> -					break;
> -				case S_IFDIR: c='d';
> -					break;
> -				case S_IFCHR: c='c';
> -					break;
> -				case S_IFBLK: c='b';
> -					break;
> -				case S_IFLNK: c='l';
> -					break;
> -				case S_IFSOCK: c='s';
> -					break;
> -				case S_IFIFO: c='p';
> -					break;
> -				default: c='u';
> -					break;
> -			}
> -			
> -			printf("-type %c ", c);
> -			break;
> -		case OP_NOT:
> -			printf("! ");
> -			break;
> -		case OP_OR:
> -			printf("-o ");
> -			break;
> -		case OP_AND:
> -			printf("-a ");
> -			break;
> -		case LPAREN:
> -			printf("( ");
> -			break;
> -		case RPAREN:
> -			printf(") ");
> -			break;
> -		case ACTION_PRINT:
> -			printf("-print ");
> -			break;
> -		case ACTION_PRINT0:
> -			printf("-print0 ");
> -			break;
> -		case ACTION_EXEC:
> -			printf("-exec ");
> -			arg_array=node->data.e.exec_args;
> -			for(i=0; arg_array[i]; i++) {
> -				printf("%s ", arg_array[i]);
> -			}
> -			printf("; ");
> -			break;
> -		default:
> -			printf("unknown node type (%d) ", node->op);
> -			break;
> -	}
> -}
> -
See above.  It's going to be harder to debug filter expressions
without the ability to dump the filter nodes in ascii.

>  /* executes the command for a filter node
>   * return the program return value (0=success)
>   */
> -int do_exec(struct filter_node *filter, struct dirtree *node)
> +static int do_exec(struct filter_node *filter, struct dirtree *node)
>  {
>  	char *path;
>  	int plen;
> @@ -169,13 +93,9 @@
>  	}
>  	
>  	pid = fork();
> -	if (pid==0) {
> +	if (!pid) {
I prefer pid==0 here.  !pid has the same effect in the compiler,
but I'm not doing a boolean compare, I'm checking for the special value
0 from fork.  IMHO it's easier to read with the 0 present.
>  		/* child */
> -		ccode = execvp(arg_array[0], arg_array);
> -		if (ccode<0) {
> -			printf("Error: problem executing sub-program %s\n", arg_array[0]);
> -			exit(ccode);
> -		}
> +		xexec(arg_array);
>  	} else {
>  		/* parent */
>  		/* wait for child */
> @@ -188,11 +108,11 @@
>  }
>  
>  /* prefix evaluator */
> -/* returns 0 for failure or SUCCESS for success */
> -int evaluate(struct filter_node *filter, struct dirtree *node,
> +/* returns 0 for failure or 1 for success */
> +static int evaluate(struct filter_node *filter, struct dirtree *node,
>  	struct filter_node **fnext)
>  {
> -	int result, result2;
> +	int result;
>  	char *path;
>  	int plen = 0;
>  	struct stat st_buf;
> @@ -202,48 +122,23 @@
>  	/* if no filters, success */
>  	if (!filter) {
>  		*fnext = NULL;
> -		return SUCCESS;
> +		return 1;
>  	}
>  
>  	if (filter->op==OP_NOT) {
> -		result = evaluate(filter->next, node, fnext);
> -		if (result==0) {
> -			return SUCCESS;
> -		} else {
> -			return 0;
> -		}
> +		return !evaluate(filter->next, node, fnext);
>  	}
>  	if (filter->op==OP_OR) {
> -		result = evaluate(filter->next, node, fnext);
> -		result2 = evaluate(*fnext, node, fnext);
> -		if (result) {
> -			return result;
> -		} else {
> -			if (result2) {
> -				return result2;
> -			} else {
> -				return 0;
> -			}
> -		}
> +		return evaluate(filter->next, node, fnext) || evaluate(*fnext, node, fnext);
This will have different side effects than the original code.
The original code evaluates both sides, and then returns the 'or' operation
on the results.  The new one will (of course) do short-circuit evaluation of
the '||',  meaning that sometimes the second node won't be evaluated.
I believe that at one time, I actually had it coded that way, but for some
reason or other I went back to forcing the evaluation of both nodes.
Unfortunately, I can't remember the reason why I went back.  I would have expected
regular find to do short-circuit evaluation of expressions with 'or' operations,
so I'd be happy if this were correct.  I just have a nagging feeling that
there was some reason I couldn't do it this way.  I could be completely wrong,
though.  So unless testing reveals the need for evaluating both sides of the
'or', this should be fine.

Do you know if regular find does short-circuit evaluation?
What would regular find do with
  'find . -exec test1_with_side_effects {} \; -o -exec test2_with_side_effects {} \;'
?

>  	}
>  	if (filter->op==OP_AND) {
> -		result = evaluate(filter->next, node, fnext);
> -		if (result) {
> -			result2 = evaluate(*fnext, node, fnext);
> -			if (result && result2) {
> -				return SUCCESS;
> -			}
> -		}
> -		return 0;
> +		return evaluate(filter->next, node, fnext) && evaluate(*fnext, node, fnext);
>  	}
>  	/* we're down to single operations, mark our position */
>  	*fnext = filter->next;
>  	if (filter->op==CHECK_NAME) {
> -		//if (regex_cmp(fn->name_regex, node->name)==0)
> -		if (strcmp(filter->data.name_regex, node->name)==0)
> -			return SUCCESS;
> -		else
> -			return 0;
> +		// TODO: Do a regex comparison
> +		return !strcmp(filter->data.name_regex, node->name);
>  	}
>  	if (filter->op==CHECK_MTIME) {
>  		/* do mtime check */
> @@ -254,7 +149,7 @@
>  			return 0;
>  		}
>  		node_time = st_buf.st_mtime/SECONDS_PER_DAY;
> -		result = SUCCESS;
> +		result = 1;
>  		switch (filter->data.t.time_op) {
>  			/* do time compare here */
>  			case TEST_LT:
> @@ -285,11 +180,7 @@
>  		if (result<0) {
>  			return 0;
>  		}
> -		if ((st_buf.st_mode & S_IFMT) == filter->data.type) {
> -			return SUCCESS;
> -		} else {
> -			return 0;
> -		}
> +		return (st_buf.st_mode & S_IFMT) == filter->data.type;
>  	}
>  	
>  	
> @@ -302,21 +193,17 @@
>  		path = dirtree_path(node, &plen);
>  		printf("%s%c", path, terminator);
>  		free(path);
> -		return SUCCESS;
> +		return 1;
>  	}
>  
>  	if (filter->op==ACTION_EXEC) {
> -		if (do_exec(filter, node)==0) {
> -			return SUCCESS;
> -		} else {
> -			return 0;
> -		}
> +		return !do_exec(filter, node);
>  	}
> -	printf("ERROR: ran out of operations in filter list!!\n");
> -	return SUCCESS;
> +	error_msg("Ran out of operations in filter list!");
> +	return 1;
>  }
>  
> -int check_node_callback(struct dirtree *node)
> +static int check_node_callback(struct dirtree *node)
>  {
>  	char *path;
>  	int plen = 0;
> @@ -332,7 +219,7 @@
>  		return 0;
>  
>  	result = evaluate(filter_root, node, &junk);
> -	if (result & SUCCESS & !have_action) {
> +	if (result & !have_action) {
I have no explanation for my weird code.  I think it was
left over from when SUCCESS was something besides 1, and I had
to do an actual value compare here.  But the old code is
clearly just broken now.  Thanks for the fix.

>  		/* default action is just print the path */
>  		path = dirtree_path(node, &plen);
>  		printf("%s\n", path);
> @@ -342,7 +229,7 @@
>  }
>  
>  
> -void build_filter_list(void)
> +static void build_filter_list(void)
>  {
>  	struct filter_node *node_list;
>  	struct filter_node *op_stack;
> @@ -362,40 +249,33 @@
>  			xmalloc(sizeof(struct filter_node));
>  		node->op = OP_UNKNOWN;
>  		arg = toys.optargs[i];
> -		if (strcmp(arg, "-o") == 0) { 
> +		if (!strcmp(arg, "-o")) { 
>  			node->op = OP_OR;
>  		}
> -		if (strcmp(arg, "-a") == 0) { 
> +		if (!strcmp(arg, "-a")) { 
>  			node->op = OP_AND;
>  		}
> -		if (strcmp(arg, "!")==0) { 
> +		if (!strcmp(arg, "!")) { 
>  			node->op = OP_NOT;
>  		}
> -		if (strcmp(arg, "(") == 0) { 
> +		if (!strcmp(arg, "(")) { 
>  			node->op = LPAREN;
>  		}
> -		if (strcmp(arg, ")") == 0) { 
> +		if (!strcmp(arg, ")")) { 
>  			node->op = RPAREN;
>  		}
>  
> -		if (strcmp(arg, "-name") == 0) {
> +		if (!strcmp(arg, "-name")) {
>  			node->op = CHECK_NAME;
> -			arg = toys.optargs[i+1];
> -			if (!arg) {
> -				printf("Error: missing argument to -name\n");
> -				exit(1);
> -			}
> +			arg = toys.optargs[++i];
> +			if (!arg) error_exit("Missing argument to -name\n");
>  			node->data.name_regex = arg;
> -			i++;
>  		}
>  
> -		if (strcmp(arg, "-mtime") == 0) {
> +		if (!strcmp(arg, "-mtime")) {
>  			node->op = CHECK_MTIME;
> -			arg = toys.optargs[i+1];
> -			if (!arg) {
> -				printf("Error: missing argument to -mtime\n");
> -				exit(1);
> -			}
> +			arg = toys.optargs[++i];
> +			if (!arg) error_exit("Missing argument to -mtime\n");
>  			switch(arg[0]) {
>  				case '+':
>  					node->data.t.time_op=TEST_GT;
> @@ -411,16 +291,12 @@
>  			}
>  			/* convert to days (very crudely) */
>  			node->data.t.time = atoi(arg)/SECONDS_PER_DAY;
> -			i++;
>  		}
>  
> -		if (strcmp(arg, "-type") == 0) {
> +		if (!strcmp(arg, "-type")) {
>  			node->op = CHECK_TYPE;
> -			arg = toys.optargs[i+1];
> -			if (!arg) {
> -				printf("Error: missing argument to -type\n");
> -				exit(1);
> -			}
> +			arg = toys.optargs[++i];
> +			if (!arg) error_exit("Missing argument to -type\n");
>  			switch(arg[0]) {
>  				case 'f':
>  					node->data.type = S_IFREG;
> @@ -445,51 +321,39 @@
>  					node->data.type = S_IFIFO;
>  					break;
>  				default:
> -					printf("Error: unknown file type '%s'\n", arg);
> -					exit(1);
> +					error_exit("Unknown file type '%s'\n", arg);
>  			}
> -			i++;
>  		}
> -		if (strcmp(arg, "-print") == 0) {
> +		if (!strcmp(arg, "-print")) {
>  			node->op = ACTION_PRINT;
>  			have_action = 1;
>  		}
> -		if (strcmp(arg, "-print0") == 0) {
> +		if (!strcmp(arg, "-print0")) {
>  			node->op = ACTION_PRINT0;
>  			have_action = 1;
>  		}
> -		if (strcmp(arg, "-exec") == 0) {
> +		if (!strcmp(arg, "-exec")) {
>  			node->op = ACTION_EXEC;
>  			have_action = 1;
> -			exec_buf[0] =  0;
> -			j = 0;
> -			arg_array = xmalloc(sizeof(char *)*(j+1));
> -			i++;
> -			arg = toys.optargs[i];
> -			while (arg && (strcmp(arg, ";") != 0)) {
> +			arg_array = xmalloc(sizeof(char *));
> +			arg = toys.optargs[++i];
> +			for (j = 0; arg && strcmp(arg, ";"); j++) {
>  				/* new method */
>  				arg_array = xrealloc(arg_array, sizeof(char *) * (j+2));
>  				arg_array[j] = arg;
> -				if (strcmp(arg, "{}") == 0) {
> +				if (!strcmp(arg, "{}")) {
>  					node->data.e.arg_path_index = j;
>  				}
> -				j++;
>  
> -				i++;
> -				arg = toys.optargs[i];
> +				arg = toys.optargs[++i];
>  			}
> -			if (!arg) {
> -				printf("Error: missing ';' in exec command\n");
> -				exit(1);
> -			}
> +			if (!arg) error_exit("Missing ';' in exec command\n");
>  			arg_array[j] = 0;
>  			node->data.e.exec_args = arg_array;
>  		}
>  		if (node->op == OP_UNKNOWN) {
> -			if( arg[0]=='-') {
> -				printf("Error: unknown option '%s'\n", arg);
> -				exit(1);
> -			} else {
> +			if (arg[0] == '-') error_exit("Unknown option '%s'\n", arg);
> +			else {
>  				/* use toybuf as start directory */
>  				strcpy(toybuf, arg);
>  			}
> @@ -530,10 +394,7 @@
>  					op_node = op_stack;
>  				}
>  				/* rparen should be on op_stack */
> -				if (!op_stack) {
> -					printf("Error: missing right paren\n");
> -					exit(1);
> -				}
> +				if (!op_stack) error_exit("Missing right paren\n");
>  				/* remove rparen from op_stack */
>  				op_stack = op_stack->next;
>  				free(op_node);
> @@ -551,8 +412,7 @@
>  	op_node = op_stack;
>  	while (op_node) {
>  		/*if (op_node->op == RPAREN || op_node->op == LPAREN)  {
> -			printf("Error: extra paren found\n");
> -			exit(1);
> +			error_exit("Error: extra paren found\n");
>  		}
>  		*/
>  		op_stack = op_node->next;
> @@ -570,7 +430,7 @@
>  
>  	/* FIXTHIS - parse actions, if present */
>  
> -	if (toybuf[0]==0) {
> +	if (!toybuf[0]) {
I prefer the comparison with 0 here.  The 0 value
at the begging of the toybuf is not a boolean (in my mind anyway)
but a special value.  But that's just me.  It's all just
mental model, and if the '!' is easier to read for others
I'm fine with this change.

>  		strcpy(toybuf, ".");
>  	}
>  	dirtree_read(toybuf, check_node_callback);

Great stuff.  Thanks for working on this.
 -- Tim




More information about the Toybox mailing list