[Toybox] find: Improve operator handling
Felix Janda
felix.janda at posteo.de
Thu Apr 18 14:56:28 PDT 2013
Hello,
attached is a patch which improves how find processes composite
expressions like for example:
find . -name file -type f -o -name dir -type d
Below is a short description of some aspects of the toy which are
relevant for the patch or have changed with it.
The first step of build_filter_list() is to step through the command
line and fill the linked list node_list with one entry for each
expression and operator; e.g. in the above example we have four
expressions and one operator. I adapted this step to add the implicit
"-a"s where necessary; e.g. two would need to be added for the above
example. The condition when to add "-a" is unfortunately rather
complicated:
if ((o1>MAX_OP && o2>MAX_OP) ||
(o1==RPAREN && o2>=OP_NOT && o2!=RPAREN) ||
(o1>=OP_NOT && o1!=LPAREN && o2==LPAREN)) {
To understand this you need to know:
#define OP_UNKNOWN 0
#define OP_OR 2
#define OP_AND 3
#define OP_NOT 4
#define LPAREN 1
#define RPAREN 5
#define MAX_OP 5
#define CHECK_NAME 7
#define CHECK_MTIME 8
#define CHECK_TYPE 9
#define ACTION_PRINT 20
#define ACTION_PRINT0 21
#define ACTION_EXEC 22
In the example the resulting list (in words, read from tail to head) is:
-name file -a -type f -o -name dir -a -type d
Ok, after the command line has been processed into node_list it is
brought into a prefix form leaving the order of the expressions intact.
In the above example the prefix form would look like:
-o -a -name file -type f -a -name dir -type d
The algorithm is to walk through the list (which corresponds to walking
backwards through the command line) pushing expressions onto filter_root
and operators (which are not brackets) to op_stack. Depending on the
precendence of the operators, which is encoded in the OP_* and *PAREN
defines, at some points op_stack is emptied onto filter_root.
At the end of build_filter_list() filter_root contains the list of
expressions and operators in prefix form.
On to:
static int evaluate(struct filter_node *filter, struct dirtree *node,
struct filter_node **fnext)
After having parsed the command line find uses the dirtree functions to
walk the directory tree. For each node evaluate() is called to process
filter_root for the node. evaluate() is a recursive function. It reads
the first part of filter updates *fnext to point to the next part if
the first part was an expression and otherwise calls itself recursively.
"-a" and "-o" are here the most interesting since they will not evaluate
their second argument (with its side-effects), if their first argument
evaluated to 0 or 1 respectively. The same applies for conditional
expressions in C but we can't use that because *fnext still needs to be
updated. The relevant code is:
if (op==OP_OR || op==OP_AND) {
result = evaluate(filter->next, node, fnext);
if(!skip) {
if (op==OP_OR) {
skip = result;
result = evaluate(*fnext, node, fnext) || result;
} else {
skip = !result;
result = evaluate(*fnext, node, fnext) && result;
}
skip = 0;
}
else result = evaluate(*fnext, node, fnext);
return result;
}
Here skip is a static int intialized to 0. skip tells evaluate to return
after *fnext has been updated.
find now passes the tests I've written but these miss out still a lot of
stuff which can go wrong.
Cheers,
Felix
-------------- next part --------------
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1366317429 -7200
# Node ID 4985115191be966c915d8111da696b14495adc9f
# Parent a612857eb52d49922508911d2ba853fd8939d0bf
find: Improve operator processing
diff -r a612857eb52d -r 4985115191be toys/pending/find.c
--- a/toys/pending/find.c Tue Apr 16 22:45:47 2013 -0500
+++ b/toys/pending/find.c Thu Apr 18 22:37:09 2013 +0200
@@ -58,13 +58,15 @@
/* filter operation types */
#define OP_UNKNOWN 0
-#define OP_NOT 1
-#define OP_OR 2
-#define OP_AND 3
+#define OP_OR 2
+#define OP_AND 3
+#define OP_NOT 4
-#define LPAREN 4
+#define LPAREN 1
#define RPAREN 5
+#define MAX_OP 5
+
#define CHECK_NAME 7
#define CHECK_MTIME 8
#define CHECK_TYPE 9
@@ -111,30 +113,43 @@
int plen = 0;
struct stat st_buf;
time_t node_time;
- char terminator;
+ int op;
+ static int skip = 0;
/* if no filters, success */
if (!filter) {
*fnext = NULL;
return 1;
}
+ op = filter->op;
- if (filter->op==OP_NOT) return !evaluate(filter->next, node, fnext);
+ if (op==OP_NOT) return !evaluate(filter->next, node, fnext);
- if (filter->op==OP_OR)
- return evaluate(filter->next, node, fnext) || evaluate(*fnext, node, fnext);
-
- if (filter->op==OP_AND)
- return evaluate(filter->next, node, fnext) && evaluate(*fnext, node, fnext);
+ if (op==OP_OR || op==OP_AND) {
+ result = evaluate(filter->next, node, fnext);
+ if(!skip) {
+ if (op==OP_OR) {
+ skip = result;
+ result = evaluate(*fnext, node, fnext) || result;
+ } else {
+ skip = !result;
+ result = evaluate(*fnext, node, fnext) && result;
+ }
+ skip = 0;
+ }
+ else result = evaluate(*fnext, node, fnext);
+ return result;
+ }
// we're down to single operations, mark our position
*fnext = filter->next;
+ if(skip) return 0;
// TODO: Do a regex comparison
- if (filter->op==CHECK_NAME)
+ if (op==CHECK_NAME)
return !strcmp(filter->data.name_regex, node->name);
- if (filter->op==CHECK_MTIME) {
+ if (op==CHECK_MTIME) {
// do mtime check
path = dirtree_path(node, &plen);
result = stat(path, &st_buf);
@@ -163,7 +178,7 @@
return result;
}
- if (filter->op==CHECK_TYPE) {
+ if (op==CHECK_TYPE) {
path = dirtree_path(node, &plen);
result = lstat(path, &st_buf);
free(path);
@@ -172,8 +187,8 @@
}
- if (filter->op==ACTION_PRINT || filter->op==ACTION_PRINT0) {
- terminator = (filter->op==ACTION_PRINT)*'\n';
+ if (op==ACTION_PRINT || op==ACTION_PRINT0) {
+ char terminator = (op==ACTION_PRINT)*'\n';
path = dirtree_path(node, &plen);
printf("%s%c", path, terminator);
@@ -181,7 +196,7 @@
return 1;
}
- if (filter->op==ACTION_EXEC) return !do_exec(filter, node);
+ if (op==ACTION_EXEC) return !do_exec(filter, node);
error_msg("Ran out of operations in filter list!");
return 1;
@@ -218,6 +233,7 @@
struct filter_node *node_list, *op_stack, *node, *op_node, *next;
char *arg, **arg_array;
int i, j;
+ int prevop = 0;
/* part optargs here and build a filter list in prefix format */
@@ -303,6 +319,19 @@
if (arg[0] == '-') error_exit("bad option '%s'", arg);
else TT.dir = arg;
} else {
+ // add OP_AND where necessary
+ if (node_list) {
+ int o1 = node_list->op, o2 = node->op;
+ if ((o1>MAX_OP && o2>MAX_OP) ||
+ (o1==RPAREN && o2>=OP_NOT && o2!=RPAREN) ||
+ (o1>=OP_NOT && o1!=LPAREN && o2==LPAREN)) {
+ struct filter_node *n = (struct filter_node *)
+ xmalloc(sizeof(struct filter_node));
+ n->op = OP_AND;
+ n->next = node_list;
+ node_list = n;
+ }
+ }
/* push node */
node->next = node_list;;
node_list = node;
@@ -315,21 +344,17 @@
op_stack = NULL;
node = node_list;
while( node ) {
+ int op = node->op;
next = node->next;
- switch( node->op ) {
- case OP_AND:
- case OP_OR:
- case OP_NOT:
- case RPAREN:
- /* push to opstack */
- node->next = op_stack;
- op_stack = node;
- break;
- case LPAREN:
- free(node);
- /* pop opstack to output (up to rparen) */
+ if (op==LPAREN || op==RPAREN) {
+ free(node);
+ node = 0;
+ }
+ if (op<=MAX_OP) {
+ if (prevop > op) {
+ /* pop opstack to output */
op_node = op_stack;
- while (op_node && op_node->op != RPAREN) {
+ while (op_node) {
/* remove from op_stack */
op_stack = op_node->next;
/* push to output */
@@ -338,17 +363,18 @@
/* get next node */
op_node = op_stack;
}
- /* rparen should be on op_stack */
- if (!op_stack) error_exit("need ')'");
- /* remove rparen from op_stack */
- op_stack = op_stack->next;
- free(op_node);
- break;
- default:
+ }
+ if (node) {
+ /* push to opstack */
+ node->next = op_stack;
+ op_stack = node;
+ }
+ prevop = op*(op!=RPAREN);
+ }
+ else {
/* push to output */
node->next = filter_root;
filter_root = node;
- break;
}
node = next;
}
@@ -356,10 +382,6 @@
/* pop opstack to output till empty */
op_node = op_stack;
while (op_node) {
- /*if (op_node->op == RPAREN || op_node->op == LPAREN) {
- error_exit("Error: extra paren found\n");
- }
- */
op_stack = op_node->next;
op_node->next = filter_root;
filter_root = op_node;
More information about the Toybox
mailing list