[Toybox] find: Improve operator handling

Felix Janda felix.janda at posteo.de
Thu Apr 18 14:56:28 PDT 2013


Hello,

attached is a patch which improves how find processes composite
expressions like for example:

find . -name file -type f -o -name dir -type d


Below is a short description of some aspects of the toy which are
relevant for the patch or have changed with it.

The first step of build_filter_list() is to step through the command
line and fill the linked list node_list with one entry for each
expression and operator; e.g. in the above example we have four
expressions and one operator. I adapted this step to add the implicit
"-a"s where necessary; e.g. two would need to be added for the above
example. The condition when to add "-a" is unfortunately rather
complicated:

        if ((o1>MAX_OP && o2>MAX_OP) ||
           (o1==RPAREN && o2>=OP_NOT && o2!=RPAREN) ||
           (o1>=OP_NOT && o1!=LPAREN && o2==LPAREN)) {

To understand this you need to know:

#define OP_UNKNOWN      0
#define OP_OR           2
#define OP_AND          3
#define OP_NOT          4

#define LPAREN          1
#define RPAREN          5

#define MAX_OP          5

#define CHECK_NAME      7
#define CHECK_MTIME     8
#define CHECK_TYPE      9

#define ACTION_PRINT    20
#define ACTION_PRINT0   21
#define ACTION_EXEC     22

In the example the resulting list (in words, read from tail to head) is:

-name file -a -type f -o -name dir -a -type d


Ok, after the command line has been processed into node_list it is
brought into a prefix form leaving the order of the expressions intact.
In the above example the prefix form would look like:

-o -a -name file -type f -a -name dir -type d

The algorithm is to walk through the list (which corresponds to walking
backwards through the command line) pushing expressions onto filter_root
and operators (which are not brackets) to op_stack. Depending on the
precendence of the operators, which is encoded in the OP_* and *PAREN
defines, at some points op_stack is emptied onto filter_root.

At the end of build_filter_list() filter_root contains the list of
expressions and operators in prefix form.


On to:

static int evaluate(struct filter_node *filter, struct dirtree *node,
  struct filter_node **fnext)

After having parsed the command line find uses the dirtree functions to
walk the directory tree. For each node evaluate() is called to process
filter_root for the node. evaluate() is a recursive function. It reads
the first part of filter updates *fnext to point to the next part if
the first part was an expression and otherwise calls itself recursively.

"-a" and "-o" are here the most interesting since they will not evaluate
their second argument (with its side-effects), if their first argument
evaluated to 0 or 1 respectively. The same applies for conditional
expressions in C but we can't use that because *fnext still needs to be
updated. The relevant code is:

  if (op==OP_OR || op==OP_AND) {
    result = evaluate(filter->next, node, fnext);
    if(!skip) {
      if (op==OP_OR) {
        skip = result;
        result = evaluate(*fnext, node, fnext) || result;
      } else {
        skip = !result;
        result = evaluate(*fnext, node, fnext) && result;
      }
      skip = 0;
    }
    else result = evaluate(*fnext, node, fnext);
    return result;
  }

Here skip is a static int intialized to 0. skip tells evaluate to return
after *fnext has been updated.


find now passes the tests I've written but these miss out still a lot of
stuff which can go wrong.

Cheers,
Felix
-------------- next part --------------
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1366317429 -7200
# Node ID 4985115191be966c915d8111da696b14495adc9f
# Parent  a612857eb52d49922508911d2ba853fd8939d0bf
find: Improve operator processing

diff -r a612857eb52d -r 4985115191be toys/pending/find.c
--- a/toys/pending/find.c	Tue Apr 16 22:45:47 2013 -0500
+++ b/toys/pending/find.c	Thu Apr 18 22:37:09 2013 +0200
@@ -58,13 +58,15 @@
 
 /* filter operation types */
 #define OP_UNKNOWN	0
-#define OP_NOT		1	
-#define OP_OR		2	
-#define OP_AND		3	
+#define OP_OR		2
+#define OP_AND		3
+#define OP_NOT		4
 
-#define LPAREN		4 
+#define LPAREN		1 
 #define RPAREN		5 
 
+#define MAX_OP		5
+
 #define CHECK_NAME	7
 #define CHECK_MTIME	8
 #define CHECK_TYPE	9
@@ -111,30 +113,43 @@
   int plen = 0;
   struct stat st_buf;
   time_t node_time;
-  char terminator;
+  int op;
+  static int skip = 0;
 
   /* if no filters, success */
   if (!filter) {
     *fnext = NULL;
     return 1;
   }
+  op = filter->op;
 
-  if (filter->op==OP_NOT) return !evaluate(filter->next, node, fnext);
+  if (op==OP_NOT) return !evaluate(filter->next, node, fnext);
 
-  if (filter->op==OP_OR)
-    return evaluate(filter->next, node, fnext) || evaluate(*fnext, node, fnext);
-
-  if (filter->op==OP_AND)
-    return evaluate(filter->next, node, fnext) && evaluate(*fnext, node, fnext);
+  if (op==OP_OR || op==OP_AND) {
+    result = evaluate(filter->next, node, fnext);
+    if(!skip) {
+      if (op==OP_OR) {
+        skip = result;
+        result = evaluate(*fnext, node, fnext) || result;
+      } else {
+        skip = !result;
+        result = evaluate(*fnext, node, fnext) && result;
+      }
+      skip = 0;
+    }
+    else result = evaluate(*fnext, node, fnext);
+    return result;
+  }
 
   // we're down to single operations, mark our position
   *fnext = filter->next;
+  if(skip) return 0;
 
   // TODO: Do a regex comparison
-  if (filter->op==CHECK_NAME)
+  if (op==CHECK_NAME)
     return !strcmp(filter->data.name_regex, node->name);
 
-  if (filter->op==CHECK_MTIME) {
+  if (op==CHECK_MTIME) {
     // do mtime check
     path = dirtree_path(node, &plen);
     result = stat(path,  &st_buf);
@@ -163,7 +178,7 @@
     return result;
 
   }
-  if (filter->op==CHECK_TYPE) {
+  if (op==CHECK_TYPE) {
     path = dirtree_path(node, &plen);
     result = lstat(path,  &st_buf);
     free(path);
@@ -172,8 +187,8 @@
   }
 
 
-  if (filter->op==ACTION_PRINT || filter->op==ACTION_PRINT0) {
-    terminator = (filter->op==ACTION_PRINT)*'\n';
+  if (op==ACTION_PRINT || op==ACTION_PRINT0) {
+    char terminator = (op==ACTION_PRINT)*'\n';
 
     path = dirtree_path(node, &plen);
     printf("%s%c", path, terminator);
@@ -181,7 +196,7 @@
     return 1;
   }
 
-  if (filter->op==ACTION_EXEC) return !do_exec(filter, node);
+  if (op==ACTION_EXEC) return !do_exec(filter, node);
 
   error_msg("Ran out of operations in filter list!");
   return 1;
@@ -218,6 +233,7 @@
   struct filter_node *node_list, *op_stack, *node, *op_node, *next;
   char *arg, **arg_array;
   int i, j;
+  int prevop = 0;
 
   /* part optargs here and build a filter list in prefix format */
 
@@ -303,6 +319,19 @@
       if (arg[0] == '-') error_exit("bad option '%s'", arg);
       else TT.dir = arg;
     }  else {
+      // add OP_AND where necessary
+      if (node_list) {
+        int o1 = node_list->op, o2 = node->op;
+        if ((o1>MAX_OP && o2>MAX_OP) ||
+           (o1==RPAREN && o2>=OP_NOT && o2!=RPAREN) ||
+           (o1>=OP_NOT && o1!=LPAREN && o2==LPAREN)) {
+          struct filter_node *n = (struct filter_node *)
+                                  xmalloc(sizeof(struct filter_node));
+          n->op = OP_AND;
+          n->next = node_list;
+          node_list = n;
+        }
+      }
       /* push node */
       node->next = node_list;;
       node_list = node;
@@ -315,21 +344,17 @@
   op_stack = NULL;
   node = node_list;
   while( node ) {
+    int op = node->op;
     next = node->next;
-    switch( node->op ) {
-      case OP_AND:
-      case OP_OR:
-      case OP_NOT:
-      case RPAREN:
-        /* push to opstack */
-        node->next = op_stack;
-        op_stack = node;
-        break;
-      case LPAREN:
-        free(node);
-        /* pop opstack to output (up to rparen) */
+    if (op==LPAREN || op==RPAREN) {
+      free(node);
+      node = 0;
+    }
+    if (op<=MAX_OP) {
+      if (prevop > op) {
+        /* pop opstack to output */
         op_node = op_stack;
-        while (op_node && op_node->op != RPAREN) {
+        while (op_node) {
           /* remove from op_stack */
           op_stack = op_node->next;
           /* push to output */
@@ -338,17 +363,18 @@
           /* get next node */
           op_node = op_stack;
         }
-        /* rparen should be on op_stack */
-        if (!op_stack) error_exit("need ')'");
-        /* remove rparen from op_stack */
-        op_stack = op_stack->next;
-        free(op_node);
-        break;
-      default:
+      }
+      if (node) {
+        /* push to opstack */
+        node->next = op_stack;
+        op_stack = node;
+      }
+      prevop = op*(op!=RPAREN);
+    }
+    else {
         /* push to output */
         node->next = filter_root;
         filter_root = node;
-        break;
     }
     node = next;
   }
@@ -356,10 +382,6 @@
   /* pop opstack to output till empty */
   op_node = op_stack;
   while (op_node) {
-    /*if (op_node->op == RPAREN || op_node->op == LPAREN)  {
-      error_exit("Error: extra paren found\n");
-    }
-    */
     op_stack = op_node->next;
     op_node->next = filter_root;
     filter_root = op_node;


More information about the Toybox mailing list