[squid-users] Squid suddenly stops

From: dtom <[email protected]>
Date: Tue, 09 Mar 2004 00:20:01 +0900

I posted mail that had mistaken Content-Type.
I apologize for having posted mail that is hard to read and posting same mail again.

---- here is an original mail -----
Hi squid users,

I hope someone can help with this problem.

Occasionally, My squid suddenly stops. Squid process exists but does not respo
nd.
No more log is written to access.log and squid process consumes about 50% of c
pu
(30% is usual cpu load).

Under this condition I got following data.
Do I need more data to analyze?
Is this problem bug or associated with OS setting?

==== environment ====
Host Type: FR/280
CPU: UltraSPARC III(1.015GHz) * 2
Memory: 2GB
Disc: 36GB * 2(mirroring)
OS: Sun Solaris8 2/02
Squid Cache: Version 2.5.STABLE4-20031230
Compiler: gcc-3.2.3
configure option: --enable-underscores --enable-storeio=diskd,aufs,ufs,null --
enable-removal-policies=heap,lru

==== squid.conf ====
http_port 18080
icp_port 0
cache_peer M.M.M.M parent 10080 7 no-query
cache_peer O.O.O.O parent 8080 7 no-query
cache_peer P.P.P.P parent 10080 7 no-query
cache_peer Q.Q.Q.Q parent 8080 7 no-query
acl QUERY urlpath_regex cgi-bin \? \.asp$ \.cgi$
acl CHU port port1
no_cache deny QUERY
no_cache deny CHU
cache_mem 768 MB
cache_swap_low 93
cache_swap_high 95
maximum_object_size 16384 KB
maximum_object_size_in_memory 32 KB
ipcache_size 16384
ipcache_low 93
ipcache_high 95
memory_replacement_policy heap GDSF
cache_dir diskd /var/squid/var/cache 4096 16 256 Q1=72 Q2=64
cache_access_log /var/squid/var/logs/access.log
cache_log /var/squid/var/logs/cache.log
cache_store_log none
pid_filename /var/squid/var/logs/squid.pid
ftp_user master@sample.com
dns_retransmit_interval 3 seconds
dns_nameservers A.A.A.A B.B.B.B C.C.C.C
auth_param basic children 5
auth_param basic realm Squid proxy-caching web server
auth_param basic credentialsttl 2 hours
refresh_pattern ^ftp: 1440 20% 10080
refresh_pattern ^gopher: 1440 0% 1440
refresh_pattern . 0 20% 4320
acl all src 0.0.0.0/0.0.0.0
acl manager proto cache_object
acl localhost src 127.0.0.1/255.255.255.255
acl to_localhost dst 127.0.0.0/8
acl SSL_ports port 443 563
acl Safe_ports port 80 # http
acl Safe_ports port 81 # http81
acl Safe_ports port 21 # ftp
acl Safe_ports port 443 563 # https, snews
acl Safe_ports port 70 # gopher
acl Safe_ports port 210 # wais
acl Safe_ports port 1025-65535 # unregistered ports
acl Safe_ports port 280 # http-mgmt
acl Safe_ports port 488 # gss-http
acl Safe_ports port 591 # filemaker
acl Safe_ports port 777 # multiling http
acl CONNECT method CONNECT
acl EADDR dst D.D.D.D/12 E.E.E.E/16
acl ADOM dstdomain .sample1.com
acl BDOM dstdomain .sample2.com
acl AADDR dst F.F.F.F/21 G.G.G.G/21
acl BADDR dst H.H.H.H/24
acl CADDR dst I.I.I.I/32
acl DADDR dst J.J.J.J/24 K.K.K.K/24 L.L.L.L/24
http_access allow manager localhost
http_access deny manager
http_access deny !Safe_ports
http_access deny CONNECT !SSL_ports
http_access deny to_localhost
http_access deny DADDR
http_access allow localhost
http_access deny all
http_reply_access allow all
icp_access allow all
cache_peer_access M.M.M.M allow ADOM SSL_ports
cache_peer_access M.M.M.M allow BDOM SSL_ports
cache_peer_access M.M.M.M allow AADDR SSL_ports
cache_peer_access M.M.M.M allow BADDR SSL_ports
cache_peer_access M.M.M.M deny all
cache_peer_access O.O.O.O allow ADOM !SSL_ports
cache_peer_access O.O.O.O allow BDOM !SSL_ports
cache_peer_access O.O.O.O allow AADDR !SSL_ports
cache_peer_access O.O.O.O allow BADDR !SSL_ports
cache_peer_access O.O.O.O deny all
cache_peer_access P.P.P.P allow !ADOM !AADDR !BDOM !BADDR !EADDR !CADDR SSL_po
rts
cache_peer_access P.P.P.P deny all
cache_peer_access Q.Q.Q.Q allow !ADOM !AADDR !BDOM !BADDR !EADDR !CADDR !SSL_p
orts
cache_peer_access Q.Q.Q.Q deny all
cache_effective_user nobody
cache_effective_group nogroup
visible_hostname sample
logfile_rotate 1
always_direct allow EADDR
always_direct allow CADDR
never_direct allow all
error_directory /usr/local/squid/share/errors/Japanese
coredump_dir /var/squid/var/cache
ignore_unknown_nameservers off

# truss -o /truss.txt -faeld -wall -rall -vall -p 23692
Base time stamp: 1076991269.7364 [ Tue Feb 17 13:14:29 JST 2004 ]
23692/1: psargs: (squid) -D
23692/3: 9.5866 lwp_cond_wait(0xFF075548, 0xFF075558, 0xFF06EDB0) Err#62 ETIM
E
23692/3: condvar type: USYNC_THREAD
23692/3: mutex type: USYNC_THREAD
23692/3: timeout: 300.000000000 sec

This is all that truss shows.

# prstat
Please wait... PID USERNAME SIZE RSS STATE PRI NICE TIME CPU PROCE
SS/NLWP
 23692 root 1135M 1135M run 0 0 12:11.57 50% squid/4
 23693 nobody 1240K 944K sleep 59 0 0:00.00 0.0% unlinkd/4
 
diskd didn't be printed.

# ps -Al
 F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
 8 O 60001 23692 23690 50 99 20 ? 145304 ? 716:59 squid
 8 S 60001 23693 23692 0 40 20 ? 155 ? ? 0:00 unlink
d
 8 S 60001 23694 23692 0 40 20 ? 336 ? ? 59:16 diskd

# vmstat 3
 procs memory page disk faults cpu
 r b w swap free re mf pi po fr de sr m0 m1 m3 m5 in sy cs us sy id
 0 0 0 2119616 459016 38 149 105 42 35 0 0 0 0 49 1 653 1014 992 7 5 88
 0 0 0 729896 270160 8 53 0 2 2 0 0 0 0 0 1 262 277 326 50 1 50
 0 0 0 729896 270168 8 51 0 0 0 0 0 0 0 0 0 263 681 330 50 0 50
 0 0 0 729896 270168 66 187 0 0 0 0 0 0 0 0 0 231 1488 321 50 1 49
 0 0 0 729896 270168 8 51 0 0 0 0 0 0 0 0 0 234 385 313 50 1 49
 0 0 0 729896 270168 8 51 0 2 2 0 0 0 0 0 0 258 411 331 50 0 50
 0 0 0 729896 270168 14 88 0 2 2 0 0 0 0 0 1 262 529 341 50 1 50
 0 0 0 729896 270168 66 187 0 0 0 0 0 0 0 2 5 320 1686 394 50 3 47
 0 0 0 729896 270160 8 51 0 0 0 0 0 0 0 0 0 248 300 319 50 1 49
 0 0 0 729896 270160 8 51 0 0 0 0 0 0 0 0 0 255 289 327 50 0 50
 0 0 0 729896 270160 66 187 0 0 0 0 0 0 0 0 0 307 1528 347 50 1 49
 0 0 0 729896 270160 8 51 0 2 2 0 0 0 0 0 1 230 330 306 50 1 50
 0 0 0 729896 270168 8 51 0 0 0 0 0 0 0 0 0 246 424 324 50 0 50
 0 0 0 729896 270168 8 51 0 0 0 0 0 0 0 0 0 227 347 315 50 0 50
 0 0 0 729896 270168 66 187 0 2 2 0 0 0 0 0 1 236 1517 314 50 1 49
 0 0 0 729896 270168 8 51 0 0 0 0 0 0 0 0 0 290 515 356 50 0 50
 0 0 0 729896 270168 8 51 0 0 0 0 0 0 0 0 0 277 369 347 50 1 49
 0 0 0 729656 269616 66 187 0 0 0 0 0 0 0 3 1 298 1579 369 50 4 45
 0 0 0 729896 270160 8 51 0 0 0 0 0 0 0 0 0 264 325 329 50 0 50

# gcore -o gcore 23692
# pstack gcore
core 'gcore.20040217.23692' of 23692: (squid) -D
----------------- lwp# 1 / thread# 1 --------------------
 0005e9d0 toMB (307f8708, 0, 1, a57d0, 205, 11f) + 34
 0003ed54 eventRun (138800, c8000, c8000, 179594, 179400, 9e800) + 64
 0005d6e0 main (c8400, ffbefe14, ffbefe20, 13800c, 0, 0) + 3ac
 0001cee8 _start (0, 0, 0, 0, 0, 0) + 58
----------------- lwp# 2 / thread# 2 --------------------
 ff11ea68 signotifywait ()
 ff04ed54 _dynamiclwps (ff06e000, 0, 0, 0, 0, 0) + 1c
 ff052030 thr_yield (0, 0, 0, 0, 0, 0) + 8c
----------------- lwp# 3 --------------------------------
 ff059770 lwp_cond_wait (ff075548, ff075558, ff06edb0)
 ff0490ac _age (3e, ff06ed9c, ff06e000, 0, 0, 4) + 74
 ff11c668 _door_return (ff175cb0, ff04a740, 0, 0, 0, 0) + 68
----------------- lwp# 4 --------------------------------
 ff11c60c door (0, 0, 0, 0, ff165d10, 4)
 ff056ba4 _sc_door_func (7, ff06f688, ff06f6a0, 3, ff06e000, 1) + 54
 ff04a740 _lwp_start (ff165d70, 0, 6000, ff175b74, 0, 0) + 18
 ff052030 thr_yield (0, 0, 0, 0, 0, 0) + 8c
-------------------------- thread# 3 --------------------
 ff04ddbc _reap_wait (ff0729e0, 20520, 0, ff06e000, 0, 0) + 38
 ff04db14 _reaper (ff06ee30, ff074740, ff0729e0, ff06ee08, 1, fe400000) + 38
 ff05b728 _thread_start (0, 0, 0, 0, 0, 0) + 40

# pldd gcore
core 'gcore.20040217.23692' of 23692: (squid) -D
/usr/lib/libcrypt_i.so.1
/usr/lib/libpthread.so.1
/usr/lib/libm.so.1
/usr/lib/libresolv.so.2
/usr/lib/libsocket.so.1
/usr/lib/libnsl.so.1
/usr/lib/libc.so.1
/usr/lib/libgen.so.1
/usr/lib/libdl.so.1
/usr/lib/libmp.so.2
/usr/platform/sun4u-us3/lib/libc_psr.so.1
/usr/lib/libthread.so.1
/usr/lib/nss_files.so.1

# pmap gcore
core 'gcore.20040217.23692' of 23692: (squid) -D
00010000 672K read/exec /usr/local/squid/sbin/squid
000C6000 16K read/write/exec /usr/local/squid/sbin/squid
000CA000 1158944K read/write/exec [ heap ]
FE790000 384K read/write/exec
FE802000 8K read/write
FE904000 8K read/write
FEA06000 8K read/write
FEB08000 8K read/write
FEC0A000 8K read/write
FED0C000 8K read/write
FEE0E000 8K read/write
FEF0C000 8K read/write
FEF10000 8K read/write
FF00E000 8K read/write
FF020000 24K read/exec /usr/lib/nss_files.so.1
FF036000 8K read/write/exec /usr/lib/nss_files.so.1
FF040000 120K read/exec /usr/lib/libthread.so.1
FF06E000 8K read/write/exec /usr/lib/libthread.so.1
FF070000 48K read/write/exec /usr/lib/libthread.so.1
FF080000 688K read/exec /usr/lib/libc.so.1
FF13C000 32K read/write/exec /usr/lib/libc.so.1
FF150000 8K read/write/exec
FF164000 8K read/write
FF174000 8K read/write
FF180000 568K read/exec /usr/lib/libnsl.so.1
FF21E000 40K read/write/exec /usr/lib/libnsl.so.1
FF228000 24K read/write/exec /usr/lib/libnsl.so.1
FF240000 8K read/write/exec
FF250000 16K read/exec /usr/lib/libmp.so.2
FF264000 8K read/write/exec /usr/lib/libmp.so.2
FF270000 8K read/exec /usr/platform/sun4u-us3/lib/libc_psr.so.1
FF280000 8K read/write/exec
FF290000 24K read/exec /usr/lib/libgen.so.1
FF2A6000 8K read/write/exec /usr/lib/libgen.so.1
FF2B0000 40K read/exec /usr/lib/libsocket.so.1
FF2CA000 8K read/write/exec /usr/lib/libsocket.so.1
FF2D0000 208K read/exec /usr/lib/libresolv.so.2
FF314000 16K read/write/exec /usr/lib/libresolv.so.2
FF318000 8K read/write/exec /usr/lib/libresolv.so.2
FF320000 88K read/exec /usr/lib/libm.so.1
FF344000 8K read/write/exec /usr/lib/libm.so.1
FF350000 24K read/exec /usr/lib/libpthread.so.1
FF366000 8K read/write/exec /usr/lib/libpthread.so.1
FF370000 8K read/exec /usr/lib/libcrypt_i.so.1
FF382000 8K read/write/exec /usr/lib/libcrypt_i.so.1
FF390000 8K read/exec /usr/lib/libdl.so.1
FF3A0000 8K read/write/exec
FF3B0000 152K read/exec /usr/lib/ld.so.1
FF3E6000 8K read/write/exec /usr/lib/ld.so.1
FFBDE000 72K read/write [ stack ]
 total 1162432K

# pflags gcore
core 'gcore.20040217.23692' of 23692: (squid) -D
        data model = _ILP32 flags = PR_RLC
  /1: flags = PR_STOPPED|PR_ISTOP
  why = PR_REQUESTED
  /2: flags = PR_STOPPED|PR_ISTOP|PR_ASLWP|PR_ASLEEP [ signotifywait() ]
  why = PR_REQUESTED
  /3: flags = PR_STOPPED|PR_ISTOP|PR_ASLEEP [ lwp_cond_wait(0xff075548,0xff0
75558,0xff06edb0) ]
  why = PR_REQUESTED
  /4: flags = PR_STOPPED|PR_ISTOP|PR_ASLEEP [ door(0x0,0x0,0x0,0x0,0xff165d1
0,0x4) ]
  why = PR_REQUESTED

Thanks in advance
Tomi
Received on Mon Mar 08 2004 - 08:17:39 MST

This archive was generated by hypermail pre-2.1.9 : Thu Apr 01 2004 - 12:00:01 MST