New NFS server stress test hang
John
jwd at slowblink.com
Thu Jun 9 02:03:04 UTC 2011
Hi,
We've been running some stress tests of the new nfs server.
The system is at r222531 (head), 9 clients, two mounts each
to the server:
mount_nfs -o udp,nfsv3,rsize=32768,wsize=32768,noatime,nolockd,acregmin=1,acregmax=2,acdirmin=1,acdirmax=2,negnametimeo=2 ${servera}:/vol/datsrc /c/$servera/vol/datsrc
mount_nfs -o udp,nfsv3,rsize=32768,wsize=32768,noatime,nolockd,acregmin=1,acregmax=2,acdirmin=1,acdirmax=2,negnametimeo=0 ${servera}:/vol/datgen /c/$servera/vol/datgen
The system is still up & responsive, simply no nfs services
are working. All (200) threads appear to be active, but not
doing anything. The debugger is not compiled into this kernel.
We can run any other tracing commands desired. We can also
rebuild the kernel with the debugger enabled for any kernel
debugging needed.
While things are running correctly, sysctl & top will for
instance show the following for nfsd (threads collapsed):
vfs.nfsd.minthreads: 4
vfs.nfsd.maxthreads: 200
vfs.nfsd.threads: 60
vfs.nfsrv.minthreads: 1
vfs.nfsrv.maxthreads: 200
vfs.nfsrv.threads: 0
last pid: 35073; load averages: 6.74, 4.94, 4.56 up 6+22:17:25 16:16:25
111 processes: 13 running, 98 sleeping
Mem: 18M Active, 1048M Inact, 64G Wired, 8652K Cache, 9837M Buf, 28G Free
PID USERNAME THR PRI NICE SIZE RES STATE C TIME WCPU COMMAND
2049 root 61 49 0 10052K 1608K CPU2 0 49:43 1116.70% nfsd
Please let us know what we can do to help debug this.
Thanks!
John
The output of the following commands is below:
uname -a
top -d 1 -b
head -n 7 /usr/src/.svn/entries
sysctl -a | grep nfsd
sysctl -a | grep nfs | grep -v nfsd
nfsstat -sW
ps -auxww
netstat -i # All nfs data traffic is via 10G chelsio cards.
Amusing thing to note is the negative numbers in the nfsstat
output :-)
FreeBSD bb99za2a.unx.sas.com 9.0-CURRENT FreeBSD 9.0-CURRENT #6: Wed Jun 1 14:50:21 EDT 2011 maint1 at bb99za2a.unx.sas.com:/usr/obj/usr/src/sys/ZFS amd64
last pid: 53625; load averages: 0.15, 0.07, 0.02 up 7+22:02:05 16:01:05
251 processes: 1 running, 250 sleeping
Mem: 3584K Active, 1066M Inact, 87G Wired, 5844K Cache, 9837M Buf, 5426M Free
Swap:
PID USERNAME THR PRI NICE SIZE RES STATE C TIME WCPU COMMAND
2049 root 200 52 0 10052K 3472K nfsrc 1 102:27 0.00% nfsd
22696 root 1 20 0 18660K 1260K select 0 2:21 0.00% bwm-ng
2373 maint1 1 20 0 68140K 3776K select 1 0:29 0.00% sshd
22683 root 1 20 0 12184K 736K select 6 0:13 0.00% rlogind
16215 maint1 1 20 0 68140K 3296K select 11 0:08 0.00% sshd
2219 root 1 20 0 20508K 1732K select 6 0:05 0.00% sendmail
2230 root 1 20 0 14260K 672K nanslp 6 0:02 0.00% cron
1919 root 1 20 0 12312K 680K select 8 0:02 0.00% syslogd
1680 root 1 20 0 6276K 360K select 2 0:01 0.00% devd
2039 root 1 20 0 12308K 728K select 8 0:01 0.00% mountd
1943 root 1 20 0 14392K 724K select 0 0:00 0.00% rpcbind
2448 maint1 1 20 0 68140K 2200K select 3 0:00 0.00% sshd
2223 smmsp 1 20 0 20508K 1388K pause 3 0:00 0.00% sendmail
16220 root 1 20 0 17664K 3004K pause 1 0:00 0.00% csh
2378 root 1 20 0 17664K 1376K ttyin 2 0:00 0.00% csh
16219 maint1 1 27 0 41428K 1176K wait 1 0:00 0.00% su
2283 root 1 20 0 16344K 644K select 7 0:00 0.00% inetd
17046 root 1 20 0 17664K 2076K ttyin 7 0:00 0.00% csh
10
dir
222531
svn://svn.freebsd.org/base/head
svn://svn.freebsd.org/base
kern.features.nfsd: 1
vfs.nfsd.server_max_nfsvers: 4
vfs.nfsd.server_min_nfsvers: 2
vfs.nfsd.nfs_privport: 0
vfs.nfsd.enable_locallocks: 0
vfs.nfsd.issue_delegations: 0
vfs.nfsd.commit_miss: 0
vfs.nfsd.commit_blks: 17396119
vfs.nfsd.mirrormnt: 1
vfs.nfsd.minthreads: 4
vfs.nfsd.maxthreads: 200
vfs.nfsd.threads: 200
vfs.nfsd.request_space_used: 632932
vfs.nfsd.request_space_used_highest: 1044128
vfs.nfsd.request_space_high: 47185920
vfs.nfsd.request_space_low: 31457280
vfs.nfsd.request_space_throttled: 0
vfs.nfsd.request_space_throttle_count: 0
vfs.nfsrv.fha.max_nfsds_per_fh: 8
vfs.nfsrv.fha.max_reqs_per_nfsd: 4
kern.features.nfscl: 1
kern.features.nfsserver: 1
vfs.nfs.downdelayinitial: 12
vfs.nfs.downdelayinterval: 30
vfs.nfs.keytab_enctype: 1
vfs.nfs.skip_wcc_data_onerr: 1
vfs.nfs.nfs3_jukebox_delay: 10
vfs.nfs.reconnects: 0
vfs.nfs.bufpackets: 4
vfs.nfs.callback_addr:
vfs.nfs.realign_count: 0
vfs.nfs.realign_test: 0
vfs.nfs.nfs_directio_allow_mmap: 1
vfs.nfs.nfs_directio_enable: 0
vfs.nfs.clean_pages_on_close: 1
vfs.nfs.commit_on_close: 0
vfs.nfs.prime_access_cache: 0
vfs.nfs.access_cache_timeout: 60
vfs.nfs.diskless_rootpath:
vfs.nfs.diskless_valid: 0
vfs.nfs.nfs_ip_paranoia: 1
vfs.nfs.defect: 0
vfs.nfs.iodmax: 20
vfs.nfs.iodmin: 0
vfs.nfs.iodmaxidle: 120
vfs.acl_nfs4_old_semantics: 0
vfs.nfs_common.realign_count: 0
vfs.nfs_common.realign_test: 0
vfs.nfsrv.nfs_privport: 0
vfs.nfsrv.fha.bin_shift: 18
vfs.nfsrv.fha.fhe_stats: No file handle entries.
vfs.nfsrv.commit_miss: 0
vfs.nfsrv.commit_blks: 0
vfs.nfsrv.async: 0
vfs.nfsrv.gatherdelay_v3: 0
vfs.nfsrv.gatherdelay: 10000
vfs.nfsrv.minthreads: 1
vfs.nfsrv.maxthreads: 200
vfs.nfsrv.threads: 0
vfs.nfsrv.request_space_used: 0
vfs.nfsrv.request_space_used_highest: 0
vfs.nfsrv.request_space_high: 47185920
vfs.nfsrv.request_space_low: 31457280
vfs.nfsrv.request_space_throttled: 0
vfs.nfsrv.request_space_throttle_count: 0
Server Info:
Getattr Setattr Lookup Readlink Read Write Create Remove
0 0 4859875 16546194 0 0 0 0
Rename Link Symlink Mkdir Rmdir Readdir RdirPlus Access
0 -1523364522 0 990131252 0 0 0 0
Mknod Fsstat Fsinfo PathConf Commit
0 0 0 0 0
Server Ret-Failed
0
Server Faults
0
Server Cache Stats:
Inprog Idem Non-idem Misses
189710 0 154619 -14704992
Server Write Gathering:
WriteOps WriteRPC Opsaved
0 0 0
USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND
root 11 1180.6 0.0 0 192 ?? RL 1Jun11 130918:59.20 [idle]
root 0 0.0 0.0 0 5488 ?? DLs 1Jun11 476:54.70 [kernel]
root 1 0.0 0.0 6276 136 ?? ILs 1Jun11 0:00.03 /sbin/init --
root 2 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [ciss_notify0]
root 3 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [ciss_notify1]
root 4 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [ciss_notify2]
root 5 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [sctp_iterator]
root 6 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [xpt_thrd]
root 7 0.0 0.0 0 16 ?? DL 1Jun11 0:12.17 [g_mp_kt]
root 8 0.0 0.0 0 16 ?? DL 1Jun11 0:22.25 [pagedaemon]
root 9 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [vmdaemon]
root 10 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [audit]
root 12 0.0 0.0 0 656 ?? WL 1Jun11 208:26.93 [intr]
root 13 0.0 0.0 0 48 ?? DL 1Jun11 35:45.18 [geom]
root 14 0.0 0.0 0 16 ?? DL 1Jun11 2:29.63 [yarrow]
root 15 0.0 0.0 0 384 ?? DL 1Jun11 0:12.44 [usb]
root 16 0.0 0.0 0 16 ?? DL 1Jun11 0:02.43 [acpi_thermal]
root 17 0.0 0.0 0 16 ?? DL 1Jun11 0:00.25 [acpi_cooling0]
root 18 0.0 0.0 0 16 ?? DL 1Jun11 0:00.00 [pagezero]
root 19 0.0 0.0 0 16 ?? DL 1Jun11 0:01.48 [bufdaemon]
root 20 0.0 0.0 0 16 ?? DL 1Jun11 51:24.22 [syncer]
root 21 0.0 0.0 0 16 ?? DL 1Jun11 0:02.15 [vnlru]
root 22 0.0 0.0 0 16 ?? DL 1Jun11 0:31.61 [softdepflush]
root 1624 0.0 0.0 14364 324 ?? Is 1Jun11 0:00.00 /usr/sbin/moused -p /dev/ums0 -t auto -I /var/run/moused.ums0.pid
root 1648 0.0 0.0 14364 512 ?? Is 1Jun11 0:00.00 /usr/sbin/moused -p /dev/ums1 -t auto -I /var/run/moused.ums1.pid
root 1680 0.0 0.0 6276 360 ?? Is 1Jun11 0:00.90 /sbin/devd
root 1919 0.0 0.0 12312 680 ?? Is 1Jun11 0:01.56 /usr/sbin/syslogd -s
root 1943 0.0 0.0 14392 724 ?? Is 1Jun11 0:00.32 /usr/sbin/rpcbind
root 2039 0.0 0.0 12308 728 ?? Is 1Jun11 0:00.58 /usr/sbin/mountd /etc/exports /etc/zfs/exports
root 2048 0.0 0.0 10052 340 ?? Is 1Jun11 0:00.02 nfsd: master (nfsd)
root 2049 0.0 0.0 10052 3472 ?? D 1Jun11 4953:44.73 nfsd: server (nfsd)
root 2211 0.0 0.0 47000 1600 ?? Is 1Jun11 0:00.00 /usr/sbin/sshd
root 2219 0.0 0.0 20508 1732 ?? Ss 1Jun11 0:05.04 sendmail: accepting connections (sendmail)
smmsp 2223 0.0 0.0 20508 1388 ?? Is 1Jun11 0:00.12 sendmail: Queue runner at 00:30:00 for /var/spool/clientmqueue (sendmail)
root 2230 0.0 0.0 14260 672 ?? Ss 1Jun11 0:02.44 /usr/sbin/cron -s
root 2283 0.0 0.0 16344 644 ?? Is 1Jun11 0:00.03 /usr/sbin/inetd -wW -C 60
root 2371 0.0 0.0 68140 1444 ?? Is 1Jun11 0:00.02 sshd: maint1 [priv] (sshd)
maint1 2373 0.0 0.0 68140 3776 ?? I 1Jun11 0:29.10 sshd: maint1 at pts/0 (sshd)
root 2383 0.0 0.0 0 128 ?? DL 1Jun11 60:18.89 [zfskern]
root 2446 0.0 0.0 68140 1460 ?? Is 1Jun11 0:00.01 sshd: maint1 [priv] (sshd)
maint1 2448 0.0 0.0 68140 2200 ?? I 1Jun11 0:00.25 sshd: maint1 at pts/2 (sshd)
root 16213 0.0 0.0 68140 2900 ?? Is Thu04PM 0:00.01 sshd: maint1 [priv] (sshd)
maint1 16215 0.0 0.0 68140 3296 ?? S Thu04PM 0:07.96 sshd: maint1 at pts/1 (sshd)
root 22683 0.0 0.0 12184 736 ?? Ss Sat05PM 0:13.37 rlogind
root 33240 0.0 0.0 68140 2740 ?? Is Wed12PM 0:00.01 sshd: maint1 [priv] (sshd)
maint1 33242 0.0 0.0 68140 2780 ?? I Wed12PM 0:00.00 sshd: maint1 at pts/4 (sshd)
root 33279 0.0 0.0 0 16 ?? DL Wed12PM 36:13.14 [fct0-worker]
root 33281 0.0 0.0 0 16 ?? DL Wed12PM 2:09.48 [fct1-worker]
root 33283 0.0 0.0 0 16 ?? DL Wed12PM 2:05.68 [fioa-data-groom]
root 33284 0.0 0.0 0 16 ?? DL Wed12PM 10:48.29 [fio0-bio-submit]
root 33285 0.0 0.0 0 16 ?? DL Wed12PM 0:27.01 [fiob-data-groom]
root 33286 0.0 0.0 0 16 ?? DL Wed12PM 0:03.72 [fio1-bio-submit]
root 33689 0.0 0.0 0 16 ?? DL Wed12PM 0:00.00 [md0]
root 33691 0.0 0.0 0 16 ?? DL Wed12PM 0:00.00 [md1]
root 33693 0.0 0.0 0 16 ?? DL Wed12PM 0:00.00 [md2]
root 33695 0.0 0.0 0 16 ?? DL Wed12PM 0:00.00 [md3]
root 35749 0.0 0.0 12184 572 ?? Is 5:05PM 0:00.01 rlogind
root 52810 0.0 0.0 12184 724 ?? Is 1:18PM 0:00.00 rlogind
root 2326 0.0 0.0 41300 984 v0 Is 1Jun11 0:00.01 login [pam] (login)
root 34215 0.0 0.0 17664 2076 v0 I+ Wed01PM 0:00.01 -csh (csh)
root 2327 0.0 0.0 12184 300 v1 Is+ 1Jun11 0:00.00 /usr/libexec/getty Pc ttyv1
root 2328 0.0 0.0 12184 300 v2 Is+ 1Jun11 0:00.00 /usr/libexec/getty Pc ttyv2
root 2329 0.0 0.0 12184 300 v3 Is+ 1Jun11 0:00.00 /usr/libexec/getty Pc ttyv3
root 2330 0.0 0.0 12184 300 v4 Is+ 1Jun11 0:00.00 /usr/libexec/getty Pc ttyv4
root 2331 0.0 0.0 12184 300 v5 Is+ 1Jun11 0:00.00 /usr/libexec/getty Pc ttyv5
root 2332 0.0 0.0 12184 300 v6 Is+ 1Jun11 0:00.00 /usr/libexec/getty Pc ttyv6
root 2333 0.0 0.0 12184 300 v7 Is+ 1Jun11 0:00.00 /usr/libexec/getty Pc ttyv7
maint1 2374 0.0 0.0 14636 384 0 Is 1Jun11 0:00.00 -sh (sh)
root 2377 0.0 0.0 41428 568 0 I 1Jun11 0:00.00 su
root 2378 0.0 0.0 17664 1376 0 I+ 1Jun11 0:00.04 _su (csh)
maint1 16216 0.0 0.0 14636 888 1 Is Thu04PM 0:00.00 -sh (sh)
root 16219 0.0 0.0 41428 1176 1 I Thu04PM 0:00.04 su
root 16220 0.0 0.0 17664 3004 1 S Thu04PM 0:00.09 _su (csh)
root 53623 0.0 0.0 14636 1640 1 S+ 4:01PM 0:00.00 /bin/sh ./nfsdebug.sh
root 53633 0.0 0.0 14328 1304 1 R+ 4:01PM 0:00.00 ps -auxww
maint1 2449 0.0 0.0 14636 636 2 Is 1Jun11 0:00.01 -sh (sh)
root 17045 0.0 0.0 41428 1172 2 I Thu05PM 0:00.00 su
root 17046 0.0 0.0 17664 2076 2 I+ Thu05PM 0:00.03 _su (csh)
root 22684 0.0 0.0 41428 1240 3 Is Sat05PM 0:00.00 login [pam] (login)
root 22685 0.0 0.0 17664 1420 3 I Sat05PM 0:00.02 -csh (csh)
root 22696 0.0 0.0 18660 1260 3 S+ Sat05PM 2:20.85 bwm-ng
maint1 33243 0.0 0.0 14636 880 4 Is+ Wed12PM 0:00.00 -sh (sh)
root 35750 0.0 0.0 41428 984 5 Is 5:05PM 0:00.00 login [pam] (login)
root 35751 0.0 0.0 17664 1320 5 I+ 5:05PM 0:00.01 -csh (csh)
root 52811 0.0 0.0 41428 1152 6 Is 1:18PM 0:00.00 login [pam] (login)
root 52812 0.0 0.0 17664 1820 6 I+ 1:18PM 0:00.01 -csh (csh)
# netstat -i
Name Mtu Network Address Ipkts Ierrs Idrop Opkts Oerrs Coll
bce0 1500 <Link#1> 00:10:18:8d:d0:a4 18340277 26 0 2512640 0 0
bce0 1500 10.24.0.0 bb99za2a 12939843 - - 2511543 - -
bce0 1500 fe80::210:18f fe80::210:18ff:fe 0 - - 3 - -
bce1* 1500 <Link#2> 00:10:18:8d:d0:a6 0 0 0 0 0 0
cxgb0 9000 <Link#3> 00:07:43:07:33:f8 4464851870 0 0 4378199683 0 0
cxgb0 9000 172.21.21.0 172.21.21.83 4464472961 - - 4378064187 - -
cxgb0 9000 fe80::207:43f fe80::207:43ff:fe 0 - - 3 - -
cxgb1 1500 <Link#4> 00:07:43:07:33:f9 0 0 0 0 0 0
usbus 0 <Link#5> 0 0 0 0 0 0
usbus 0 <Link#6> 0 0 0 0 0 0
usbus 0 <Link#7> 0 0 0 0 0 0
usbus 0 <Link#8> 0 0 0 0 0 0
usbus 0 <Link#9> 0 0 0 0 0 0
usbus 0 <Link#10> 0 0 0 0 0 0
lo0 16384 <Link#11> 701 0 0 701 0 0
lo0 16384 your-net localhost 645 - - 645 - -
lo0 16384 localhost ::1 56 - - 56 - -
lo0 16384 fe80::1%lo0 fe80::1 0 - - 0 - -
More information about the freebsd-fs
mailing list