PERFORCE change 144019 for review

Gabor Kovesdan gabor at FreeBSD.org
Tue Jun 24 11:52:48 UTC 2008


http://perforce.freebsd.org/chv.cgi?CH=144019

Change 144019 by gabor at gabor_server on 2008/06/24 11:51:51

	- Remove the gzip and bzip2 support. I'll pre-extract these files to
	  /temp, using mktemp, which will get rid of some limitations,
	  simplify the implementation of the wide character set and make the
	  code much more cleaner
	- Rename grep_fdopen to grep_stdin_open and simplify the cases, where it
	  is involved. That function is only used to open the standard input,
	  thus it doesn't need to be such complicated.

Affected files ...

.. //depot/projects/soc2008/gabor_textproc/grep/binary.c#9 edit
.. //depot/projects/soc2008/gabor_textproc/grep/file.c#7 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.c#41 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.h#23 edit
.. //depot/projects/soc2008/gabor_textproc/grep/util.c#36 edit

Differences ...

==== //depot/projects/soc2008/gabor_textproc/grep/binary.c#9 (text+ko) ====

@@ -34,70 +34,57 @@
 #endif
 #endif /* not lint */
 
-#include <bzlib.h>
 #include <ctype.h>
 #include <err.h>
 #include <stdio.h>
-#include <zlib.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "grep.h"
 
-#define	isbinary(ch)	(!isspace((ch)) && iscntrl((ch)))
+#define	iswbinary(ch)	(!iswspace((ch)) && iswcntrl((ch)))
 
 int
 bin_file(FILE *f)
 {
-	char	 buf[BUFSIZ];
-	size_t	 i, m;
+	wint_t	 ch = L'\0';
+	size_t	 i;
 	int	 ret = 0;
 
 	if (fseek(f, 0L, SEEK_SET) == -1)
 		return (0);
 
-	if ((m = fread(buf, 1, BUFSIZ, f)) == 0)
-		return (0);
-
-	for (i = 0; i < m; i++)
-		if (isbinary(buf[i])) {
+	for (i = 0; (i <= BUFSIZ) && (ch != WEOF); i++) {
+		ch = fgetwc(f);
+		if (iswbinary(ch)) {
 			ret = 1;
 			break;
 		}
+	}
 
 	rewind(f);
 	return (ret);
 }
 
 int
-gzbin_file(gzFile *f)
+mmbin_file(struct mmfile *f)
 {
-	char	 buf[BUFSIZ];
-	int	 i, m, ret = 0;
-
-	if (gzseek(f, 0L, SEEK_SET) == -1)
+	int	 i;
+	wchar_t	*wbuf;
+	size_t	 s;
+	
+	if ((s = mbstowcs(NULL, f->base, 0)) == -1)
 		return (0);
 
-	if ((m = gzread(f, buf, BUFSIZ)) <= 0)
-		return (0);
+	if ((wbuf = malloc((s + 1) * sizeof(wchar_t))) == NULL)
+		err(2, NULL);
 
-	for (i = 0; i < m; i++)
-		if (isbinary(buf[i])) {
-			ret = 1;
-			break;
-		}
+	mbstowcs(wbuf, f->base, s);
 
-	if (gzrewind(f) != 0)
-		err(1, "gzbin_file");
-	return (ret);
-}
-
-int
-mmbin_file(struct mmfile *f)
-{
-	int	 i;
-
 	/* XXX knows too much about mmf internals */
 	for (i = 0; i < BUFSIZ && i < f->len; i++)
-		if (isbinary(f->base[i]))
+		if (iswbinary(wbuf[i]))
 			return (1);
 	return (0);
 }

==== //depot/projects/soc2008/gabor_textproc/grep/file.c#7 (text+ko) ====

@@ -46,8 +46,6 @@
 #include "grep.h"
 
 static char	 fname[MAXPATHLEN];
-static char	*lnbuf;
-static size_t	 lnbuflen;
 
 #define FILE_STDIO	0
 #define FILE_MMAP	1
@@ -59,106 +57,21 @@
 	int		 noseek;
 	FILE		*f;
 	struct mmfile	*mmf;
-	gzFile		*gzf;
-	BZFILE		*bzf;
 };
 
-static char *
-bzfgetln(BZFILE *f, size_t *len)
-{
-	size_t	 n;
-	char	 c;
-	int	 bzerr = 0;
-
-	for (n = 0; ; ++n) {
-		BZ2_bzRead(&bzerr, f, &c, 1);
-
-		if ((bzerr == BZ_STREAM_END) || (bzerr == BZ_SEQUENCE_ERROR)) {
-			if (n == 0)
-				return (NULL);
-			else
-				break;
-		} else if (bzerr != BZ_OK)
-			err(2, NULL);
-
-		if (n >= lnbuflen) {
-			lnbuflen *= 2;
-			lnbuf = grep_realloc(lnbuf, ++lnbuflen);
-		}
-		if (c == '\n')
-			break;
-		lnbuf[n] = c;
-	}
-
-        *len = n;
-        return (lnbuf);
-}
-
-static char *
-gzfgetln(gzFile *f, size_t *len)
-{
-	size_t	 n;
-	int	 c;
-
-	for (n = 0; ; ++n) {
-		c = gzgetc(f);
-		if (c == -1) {
-			const char	*gzerrstr;
-			int		 gzerr;
-
-			if (gzeof(f))
-				break;
-
-			gzerrstr = gzerror(f, &gzerr);
-			if (gzerr == Z_ERRNO)
-				err(2, "%s", fname);
-			else
-				errx(2, "%s: %s", fname, gzerrstr);
-		}
-		if (n >= lnbuflen) {
-			lnbuflen *= 2;
-			lnbuf = grep_realloc(lnbuf, ++lnbuflen);
-		}
-		if (c == '\n')
-			break;
-		lnbuf[n] = c;
-	}
-
-	if (gzeof(f) && n == 0)
-		return (NULL);
-	*len = n;
-	return (lnbuf);
-}
-
 struct file *
-grep_fdopen(int fd, char *mode)
+grep_stdin_open(char *mode)
 {
 	struct file	*f;
 
-	if (fd == STDIN_FILENO)
-		snprintf(fname, sizeof fname, getstr(1));
-	else
-		snprintf(fname, sizeof fname, getstr(2), fd);
+	snprintf(fname, sizeof fname, getstr(1));
 
 	f = grep_malloc(sizeof *f);
 
-	if (Zflag) {
-		f->type = FILE_GZIP;
-		f->noseek = lseek(fd, 0L, SEEK_SET) == -1;
-		if ((f->gzf = gzdopen(fd, mode)) != NULL)
-			return (f);
-	} else if (Jflag) {
-		f->type = FILE_BZIP;
-		f->noseek = lseek(fd, 0L, SEEK_SET) == -1;
-		if ((f->bzf = BZ2_bzdopen(fd, mode)) != NULL)
-			return (f);
-	} else
-	{
-		f->type = FILE_STDIO;
-		f->noseek = isatty(fd);
-		if ((f->f = fdopen(fd, mode)) != NULL)
-			return (f);
-	}
+	f->type = FILE_STDIO;
+	f->noseek = isatty(FILE_STDIO);
+	if ((f->f = fdopen(FILE_STDIO, mode)) != NULL)
+		return (f);
 
 	free(f);
 	return (NULL);
@@ -174,24 +87,16 @@
 	f = grep_malloc(sizeof *f);
 	f->noseek = 0;
 
-	if (Zflag) {
-		f->type = FILE_GZIP;
-		if ((f->gzf = gzopen(fname, mode)) != NULL)
-			return (f);
-	} else if (Jflag) {
-		f->type = FILE_BZIP;
-		if ((f->bzf = BZ2_bzopen(fname, mode)) != NULL)
-			return (f);
-	} else {
-		/* try mmap first; if it fails, try stdio */
-		if ((f->mmf = mmopen(fname, mode)) != NULL) {
-			f->type = FILE_MMAP;
-			return (f);
-		}
-		f->type = FILE_STDIO;
-		if ((f->f = fopen(path, mode)) != NULL)
-			return (f);
+/* XXX: pre-extract gzip and bzip2 files */
+
+	/* try mmap first; if it fails, try stdio */
+	if ((f->mmf = mmopen(fname, mode)) != NULL) {
+		f->type = FILE_MMAP;
+		return (f);
 	}
+	f->type = FILE_STDIO;
+	if ((f->f = fopen(path, mode)) != NULL)
+		return (f);
 
 	free(f);
 	return (NULL);
@@ -208,12 +113,6 @@
 		return (bin_file(f->f));
 	case FILE_MMAP:
 		return (mmbin_file(f->mmf));
-	case FILE_GZIP:
-		return (gzbin_file(f->gzf));
-	case FILE_BZIP:
-/* XXX: we cannot seek in bzip2 files, just suppose that it is not binary
-		return (bzbin_file(f->bzf)); */
-		return (0);
 	default:
 		/* NOTREACHED */
 		errx(2, getstr(3));
@@ -228,10 +127,6 @@
 		return (fgetln(f->f, l));
 	case FILE_MMAP:
 		return (mmfgetln(f->mmf, l));
-	case FILE_GZIP:
-		return (gzfgetln(f->gzf, l));
-	case FILE_BZIP:
-		return (bzfgetln(f->bzf, l));
 	default:
 		/* NOTREACHED */
 		errx(2, getstr(3));
@@ -248,12 +143,6 @@
 	case FILE_MMAP:
 		mmclose(f->mmf);
 		break;
-	case FILE_GZIP:
-		gzclose(f->gzf);
-		break;
-	case FILE_BZIP:
-		BZ2_bzclose(f->bzf);
-		break;
 	default:
 		/* NOTREACHED */
 		errx(2, getstr(3));

==== //depot/projects/soc2008/gabor_textproc/grep/grep.c#41 (text+ko) ====

@@ -553,6 +553,7 @@
 			xflag = 1;
 			break;
 		case 'Z':
+			Jflag = 0;
 			Zflag++;
 			break;
 		case BIN_OPT:

==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#23 (text+ko) ====

@@ -129,7 +129,7 @@
 /* file.c */
 struct file;
 
-struct file	*grep_fdopen(int fd, char *mode);
+struct file	*grep_stdin_open(char *mode);
 struct file	*grep_open(char *path, char *mode);
 int		 grep_bin_file(struct file *f);
 char		*grep_fgetln(struct file *f, size_t *l);
@@ -137,5 +137,4 @@
 
 /* binary.c */
 int	 bin_file(FILE * f);
-int	 gzbin_file(gzFile * f);
 int	 mmbin_file(struct mmfile *f);

==== //depot/projects/soc2008/gabor_textproc/grep/util.c#36 (text+ko) ====

@@ -136,7 +136,7 @@
 			fn = label;
 		else
 			fn = getstr(1);
-		f = grep_fdopen(STDIN_FILENO, "r");
+		f = grep_stdin_open("r");
 	} else {
 		f = grep_open(fn, "r");
 	}


More information about the p4-projects mailing list