[Toybox] Add new toy - uniq

Georgi Chorbadzhiyski gf at unixsol.org
Wed Mar 14 08:45:58 PDT 2012


Attached is a new toy - uniq. All parameters of uniq from coreutils are
implemented except -D.

-- 
Georgi Chorbadzhiyski
http://georgi.unixsol.org/
-------------- next part --------------
/* vi: set sw=4 ts=4:
 *
 * uniq.c - report or filter out repeated lines in a file
 *
 * Copyright 2012 Georgi Chorbadzhiyski <georgi at unixsol.org>
 *
 * See http://www.opengroup.org/onlinepubs/009695399/utilities/uniq.html

USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_BIN))

config UNIQ
	bool "uniq"
	default y
	help
	  usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]

	  Report or filter out repeated lines in a file

	  -c	show counts before each line
	  -d	show only lines that are repeated
	  -u	show only lines that are unique
	  -i	ignore case when comparing lines
	  -z	lines end with \0 not \n
	  -w	compare maximum X chars per line
	  -f	ignore first X fields
	  -s	ignore first X chars
*/

#include "toys.h"

DEFINE_GLOBALS(
	long maxchars;
	long nchars;
	long nfields;
	long repeats;
)

#define TT this.uniq

#define FLAG_z 16
#define FLAG_i 8
#define FLAG_c 4
#define FLAG_d 2
#define FLAG_u 1

static char *skip(char *str)
{
	int field = 0;
	long nchars = TT.nchars;
	long nfields = TT.nfields;
	// Skip fields first
	while (nfields && *str) {
		if (isspace((unsigned char)*str)) {
			if (field) {
				field = 0;
				nfields--;
			}
		} else if (!field) {
			field = 1;
		}
		str++;
	}
	// Skip chars
	while (nchars-- && *str)
		str++;
	return str;
}

static void print_line(FILE *f, char *line)
{
	if (TT.repeats == 0 && (toys.optflags & FLAG_d))
		return;
	if (TT.repeats > 0 && (toys.optflags & FLAG_u))
		return;
	if ((toys.optflags & FLAG_c)) {
		fprintf(f, "%7lu %s", TT.repeats + 1, line);
	} else {
		fprintf(f, "%s", line);
	}
	if (toys.optflags & FLAG_z)
		fprintf(f, "%c", '\0');
}

void uniq_main(void)
{
	FILE *infile = stdin;
	FILE *outfile = stdout;
	char *thisline = NULL;
	char *prevline = NULL;
	size_t thissize, prevsize = 0;
	char *tmpline;
	char eol = '\n';
	size_t tmpsize;

	if (toys.optc >= 1)
		infile = xfopen(toys.optargs[0], "r");

	if (toys.optc >= 2)
		outfile = xfopen(toys.optargs[1], "w");

	if (toys.optflags & FLAG_z)
		eol = '\0';

	// If first line can't be read
	if (getdelim(&prevline, &prevsize, eol, infile) < 0)
		return;

	while (getdelim(&thisline, &thissize, eol, infile) > 0) {
		int diff;
		char *t1, *t2;

		// If requested get the chosen fields + character offsets.
		if (TT.nfields || TT.nchars) {
			t1 = skip(thisline);
			t2 = skip(prevline);
		} else {
			t1 = thisline;
			t2 = prevline;
		}

		if (TT.maxchars == 0) {
			diff = !(toys.optflags & FLAG_i)
			        ? strcmp(t1, t2)
			        : strcasecmp(t1, t2);
		} else {
			diff = !(toys.optflags & FLAG_i)
			        ? strncmp(t1, t2, TT.maxchars)
			        : strncasecmp(t1, t2, TT.maxchars);
		}

		if (diff == 0) { // same
			TT.repeats++;
		} else {
			print_line(outfile, prevline);

			TT.repeats = 0;

			tmpline = prevline;
			prevline = thisline;
			thisline = tmpline;

			tmpsize = prevsize;
			prevsize = thissize;
			thissize = tmpsize;
		}
	}

	print_line(outfile, prevline);

	if (CFG_TOYBOX_FREE) {
		free(prevline);
		free(thisline);
	}
}
-------------- next part --------------
#!/bin/sh

function t_ext() {
	/usr/bin/uniq $@
	echo $?
}

function t_toybox() {
	./toybox uniq $@
	echo $?
}

function t() {
	echo "PARAMS: $@"
	[ $TOYBOX = 0 ] && t_ext $@
	[ $TOYBOX = 1 ] && t_toybox $@
}

function do_test {
	t "uniq_test.txt uniq_test.txt-copy"
	cat uniq_test.txt-copy
	t "-u uniq_test.txt"
	t "-c uniq_test.txt"
	t "-cu uniq_test.txt"
	t "-cd uniq_test.txt"
	t "-d uniq_test.txt"
	t "-i uniq_test.txt"
	t "-ic uniq_test.txt"
	t "-c -w 6 uniq_test.txt"
	t "-c -s 9 uniq_test.txt"
	t "-f 2 uniq_test.txt"
	t "-f 1 uniq_test.txt"
	t "-f 3 uniq_test.txt"
	t "-f 1 -s 4 uniq_test.txt"
	t "-z -c uniq_test.txt-zero"
	t "-z -u uniq_test.txt-zero"
	t "-z -d uniq_test.txt-zero"
}

TOYBOX=0
do_test > uniq_test.ext

TOYBOX=1
do_test > uniq_test.toybox

diff -uw uniq_test.ext uniq_test.toybox
[ $? = 0 ] && echo "OK" || echo "ERROR"
-------------- next part --------------
aaaaaaa
AAAAAAA
aAaAaAA
bbbbbbb aaaaaaa
ccccccc bbbbbbb aaaaaaa
ddddddd
ddddddd aaaaaaa
ddddddd bbbbbbb aaaaaaa
-------------- next part --------------
A non-text attachment was scrubbed...
Name: uniq_test.txt-zero
Type: application/octet-stream
Size: 168 bytes
Desc: not available
URL: <http://lists.landley.net/pipermail/toybox-landley.net/attachments/20120314/771c9a4f/attachment-0003.obj>


More information about the Toybox mailing list