Sub-optimal libc's read-ahead buffering behaviour
Maxim Sobolev
sobomax at portaone.com
Wed Aug 3 14:03:37 GMT 2005
Hi,
I have found the scenario in which our libc behaves utterly
suboptimally. Consider the following piece of code reads and processes
every other 512-bytes block in a file (error handling intentionally
omitted):
FILE *f;
int i;
char buf[512];
f = fopen(...);
for (i = 0; feof(f) == 0; i++) {
fread(buf, sizeof(buf), 1, f);
do_process(buf);
fseek(f, i * 2 * sizeof(buf), SEEK_SET);
}
What I have discovered in this case is that libc reads 4096 bytes from
the file for *each* fread(3) call, despite the fact that it can only do
one actual read(2) for every fourth fread(3) and satisfy the rest from
the internal buffer (4096 bytes). However, if I replace fseek(3) with
just another dummy fread(3) everything works as expected - libc does
only one read for every 8 fread(3) calls (4 dummy and 4 real).
Is it something which should be fixed or are there some subtle reasons
for the current behaviour?
Following is piece of code which illustrates the problem:
#include <stdio.h>
#include <stdlib.h>
int
main(int argc, char **argv)
{
FILE *f;
int i;
char buf[512];
f = fopen("/dev/zero", "r");
for (i = 0; i < 16; i++) {
fread(buf, sizeof(buf), 1, f);
if (argc == 1)
fread(buf, sizeof(buf), 1, f);
else
fseek(f, i * 2 * sizeof(buf), SEEK_SET);
}
exit(0);
}
When run with zero arguments relevant truss output looks like:
open("/dev/zero",0x0,0666) = 3 (0x3)
fstat(3,0xbfbfe900) = 0 (0x0)
readlink("/etc/malloc.conf",0xbfbfe8c0,63) ERR#2 'No such file or
directory'
issetugid() = 0 (0x0)
mmap(0x0,4096,(0x3)PROT_READ|PROT_WRITE,(0x1002)MAP_ANON|MAP_PRIVATE,-1,0x0)
= 1209335808 (0x48150000)
break(0x804b000) = 0 (0x0)
break(0x804c000) = 0 (0x0)
ioctl(3,TIOCGETA,0xbfbfe940) ERR#19 'Operation not
supported by device'
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
exit(0x0)
While when I am specifying some argument it becomes:
open("/dev/zero",0x0,0666) = 3 (0x3)
fstat(3,0xbfbfe900) = 0 (0x0)
readlink("/etc/malloc.conf",0xbfbfe8c0,63) ERR#2 'No such file or
directory'
issetugid() = 0 (0x0)
mmap(0x0,4096,(0x3)PROT_READ|PROT_WRITE,(0x1002)MAP_ANON|MAP_PRIVATE,-1,0x0)
= 1209335808 (0x48150000)
break(0x804b000) = 0 (0x0)
break(0x804c000) = 0 (0x0)
ioctl(3,TIOCGETA,0xbfbfe940) ERR#19 'Operation not
supported by device'
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x0,SEEK_SET) = 0 (0x0)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x400,SEEK_SET) = 1024 (0x400)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x800,SEEK_SET) = 2048 (0x800)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0xc00,SEEK_SET) = 3072 (0xc00)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x1000,SEEK_SET) = 4096 (0x1000)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x1400,SEEK_SET) = 5120 (0x1400)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x1800,SEEK_SET) = 6144 (0x1800)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x1c00,SEEK_SET) = 7168 (0x1c00)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x2000,SEEK_SET) = 8192 (0x2000)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x2400,SEEK_SET) = 9216 (0x2400)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x2800,SEEK_SET) = 10240 (0x2800)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x2c00,SEEK_SET) = 11264 (0x2c00)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x3000,SEEK_SET) = 12288 (0x3000)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x3400,SEEK_SET) = 13312 (0x3400)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x3800,SEEK_SET) = 14336 (0x3800)
read(0x3,0x804b000,0x1000) = 4096 (0x1000)
lseek(3,0x3c00,SEEK_SET) = 15360 (0x3c00)
exit(0x0)
The output speaks for itself (32 syscalls instead of 4)!
-Maxim
More information about the freebsd-hackers
mailing list