PERFORCE change 144019 for review
Gabor Kovesdan
gabor at FreeBSD.org
Tue Jun 24 11:52:48 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=144019
Change 144019 by gabor at gabor_server on 2008/06/24 11:51:51
- Remove the gzip and bzip2 support. I'll pre-extract these files to
/temp, using mktemp, which will get rid of some limitations,
simplify the implementation of the wide character set and make the
code much more cleaner
- Rename grep_fdopen to grep_stdin_open and simplify the cases, where it
is involved. That function is only used to open the standard input,
thus it doesn't need to be such complicated.
Affected files ...
.. //depot/projects/soc2008/gabor_textproc/grep/binary.c#9 edit
.. //depot/projects/soc2008/gabor_textproc/grep/file.c#7 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.c#41 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.h#23 edit
.. //depot/projects/soc2008/gabor_textproc/grep/util.c#36 edit
Differences ...
==== //depot/projects/soc2008/gabor_textproc/grep/binary.c#9 (text+ko) ====
@@ -34,70 +34,57 @@
#endif
#endif /* not lint */
-#include <bzlib.h>
#include <ctype.h>
#include <err.h>
#include <stdio.h>
-#include <zlib.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
#include "grep.h"
-#define isbinary(ch) (!isspace((ch)) && iscntrl((ch)))
+#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)))
int
bin_file(FILE *f)
{
- char buf[BUFSIZ];
- size_t i, m;
+ wint_t ch = L'\0';
+ size_t i;
int ret = 0;
if (fseek(f, 0L, SEEK_SET) == -1)
return (0);
- if ((m = fread(buf, 1, BUFSIZ, f)) == 0)
- return (0);
-
- for (i = 0; i < m; i++)
- if (isbinary(buf[i])) {
+ for (i = 0; (i <= BUFSIZ) && (ch != WEOF); i++) {
+ ch = fgetwc(f);
+ if (iswbinary(ch)) {
ret = 1;
break;
}
+ }
rewind(f);
return (ret);
}
int
-gzbin_file(gzFile *f)
+mmbin_file(struct mmfile *f)
{
- char buf[BUFSIZ];
- int i, m, ret = 0;
-
- if (gzseek(f, 0L, SEEK_SET) == -1)
+ int i;
+ wchar_t *wbuf;
+ size_t s;
+
+ if ((s = mbstowcs(NULL, f->base, 0)) == -1)
return (0);
- if ((m = gzread(f, buf, BUFSIZ)) <= 0)
- return (0);
+ if ((wbuf = malloc((s + 1) * sizeof(wchar_t))) == NULL)
+ err(2, NULL);
- for (i = 0; i < m; i++)
- if (isbinary(buf[i])) {
- ret = 1;
- break;
- }
+ mbstowcs(wbuf, f->base, s);
- if (gzrewind(f) != 0)
- err(1, "gzbin_file");
- return (ret);
-}
-
-int
-mmbin_file(struct mmfile *f)
-{
- int i;
-
/* XXX knows too much about mmf internals */
for (i = 0; i < BUFSIZ && i < f->len; i++)
- if (isbinary(f->base[i]))
+ if (iswbinary(wbuf[i]))
return (1);
return (0);
}
==== //depot/projects/soc2008/gabor_textproc/grep/file.c#7 (text+ko) ====
@@ -46,8 +46,6 @@
#include "grep.h"
static char fname[MAXPATHLEN];
-static char *lnbuf;
-static size_t lnbuflen;
#define FILE_STDIO 0
#define FILE_MMAP 1
@@ -59,106 +57,21 @@
int noseek;
FILE *f;
struct mmfile *mmf;
- gzFile *gzf;
- BZFILE *bzf;
};
-static char *
-bzfgetln(BZFILE *f, size_t *len)
-{
- size_t n;
- char c;
- int bzerr = 0;
-
- for (n = 0; ; ++n) {
- BZ2_bzRead(&bzerr, f, &c, 1);
-
- if ((bzerr == BZ_STREAM_END) || (bzerr == BZ_SEQUENCE_ERROR)) {
- if (n == 0)
- return (NULL);
- else
- break;
- } else if (bzerr != BZ_OK)
- err(2, NULL);
-
- if (n >= lnbuflen) {
- lnbuflen *= 2;
- lnbuf = grep_realloc(lnbuf, ++lnbuflen);
- }
- if (c == '\n')
- break;
- lnbuf[n] = c;
- }
-
- *len = n;
- return (lnbuf);
-}
-
-static char *
-gzfgetln(gzFile *f, size_t *len)
-{
- size_t n;
- int c;
-
- for (n = 0; ; ++n) {
- c = gzgetc(f);
- if (c == -1) {
- const char *gzerrstr;
- int gzerr;
-
- if (gzeof(f))
- break;
-
- gzerrstr = gzerror(f, &gzerr);
- if (gzerr == Z_ERRNO)
- err(2, "%s", fname);
- else
- errx(2, "%s: %s", fname, gzerrstr);
- }
- if (n >= lnbuflen) {
- lnbuflen *= 2;
- lnbuf = grep_realloc(lnbuf, ++lnbuflen);
- }
- if (c == '\n')
- break;
- lnbuf[n] = c;
- }
-
- if (gzeof(f) && n == 0)
- return (NULL);
- *len = n;
- return (lnbuf);
-}
-
struct file *
-grep_fdopen(int fd, char *mode)
+grep_stdin_open(char *mode)
{
struct file *f;
- if (fd == STDIN_FILENO)
- snprintf(fname, sizeof fname, getstr(1));
- else
- snprintf(fname, sizeof fname, getstr(2), fd);
+ snprintf(fname, sizeof fname, getstr(1));
f = grep_malloc(sizeof *f);
- if (Zflag) {
- f->type = FILE_GZIP;
- f->noseek = lseek(fd, 0L, SEEK_SET) == -1;
- if ((f->gzf = gzdopen(fd, mode)) != NULL)
- return (f);
- } else if (Jflag) {
- f->type = FILE_BZIP;
- f->noseek = lseek(fd, 0L, SEEK_SET) == -1;
- if ((f->bzf = BZ2_bzdopen(fd, mode)) != NULL)
- return (f);
- } else
- {
- f->type = FILE_STDIO;
- f->noseek = isatty(fd);
- if ((f->f = fdopen(fd, mode)) != NULL)
- return (f);
- }
+ f->type = FILE_STDIO;
+ f->noseek = isatty(FILE_STDIO);
+ if ((f->f = fdopen(FILE_STDIO, mode)) != NULL)
+ return (f);
free(f);
return (NULL);
@@ -174,24 +87,16 @@
f = grep_malloc(sizeof *f);
f->noseek = 0;
- if (Zflag) {
- f->type = FILE_GZIP;
- if ((f->gzf = gzopen(fname, mode)) != NULL)
- return (f);
- } else if (Jflag) {
- f->type = FILE_BZIP;
- if ((f->bzf = BZ2_bzopen(fname, mode)) != NULL)
- return (f);
- } else {
- /* try mmap first; if it fails, try stdio */
- if ((f->mmf = mmopen(fname, mode)) != NULL) {
- f->type = FILE_MMAP;
- return (f);
- }
- f->type = FILE_STDIO;
- if ((f->f = fopen(path, mode)) != NULL)
- return (f);
+/* XXX: pre-extract gzip and bzip2 files */
+
+ /* try mmap first; if it fails, try stdio */
+ if ((f->mmf = mmopen(fname, mode)) != NULL) {
+ f->type = FILE_MMAP;
+ return (f);
}
+ f->type = FILE_STDIO;
+ if ((f->f = fopen(path, mode)) != NULL)
+ return (f);
free(f);
return (NULL);
@@ -208,12 +113,6 @@
return (bin_file(f->f));
case FILE_MMAP:
return (mmbin_file(f->mmf));
- case FILE_GZIP:
- return (gzbin_file(f->gzf));
- case FILE_BZIP:
-/* XXX: we cannot seek in bzip2 files, just suppose that it is not binary
- return (bzbin_file(f->bzf)); */
- return (0);
default:
/* NOTREACHED */
errx(2, getstr(3));
@@ -228,10 +127,6 @@
return (fgetln(f->f, l));
case FILE_MMAP:
return (mmfgetln(f->mmf, l));
- case FILE_GZIP:
- return (gzfgetln(f->gzf, l));
- case FILE_BZIP:
- return (bzfgetln(f->bzf, l));
default:
/* NOTREACHED */
errx(2, getstr(3));
@@ -248,12 +143,6 @@
case FILE_MMAP:
mmclose(f->mmf);
break;
- case FILE_GZIP:
- gzclose(f->gzf);
- break;
- case FILE_BZIP:
- BZ2_bzclose(f->bzf);
- break;
default:
/* NOTREACHED */
errx(2, getstr(3));
==== //depot/projects/soc2008/gabor_textproc/grep/grep.c#41 (text+ko) ====
@@ -553,6 +553,7 @@
xflag = 1;
break;
case 'Z':
+ Jflag = 0;
Zflag++;
break;
case BIN_OPT:
==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#23 (text+ko) ====
@@ -129,7 +129,7 @@
/* file.c */
struct file;
-struct file *grep_fdopen(int fd, char *mode);
+struct file *grep_stdin_open(char *mode);
struct file *grep_open(char *path, char *mode);
int grep_bin_file(struct file *f);
char *grep_fgetln(struct file *f, size_t *l);
@@ -137,5 +137,4 @@
/* binary.c */
int bin_file(FILE * f);
-int gzbin_file(gzFile * f);
int mmbin_file(struct mmfile *f);
==== //depot/projects/soc2008/gabor_textproc/grep/util.c#36 (text+ko) ====
@@ -136,7 +136,7 @@
fn = label;
else
fn = getstr(1);
- f = grep_fdopen(STDIN_FILENO, "r");
+ f = grep_stdin_open("r");
} else {
f = grep_open(fn, "r");
}
More information about the p4-projects
mailing list