------------------------------------------------------------ revno: 13026 revision-id: squid3@treenet.co.nz-20131110230135-b24u084oxhugyfxk parent: squid3@treenet.co.nz-20131110225516-3iaf83q0y23ugpc9 author: Alex Rousskov committer: Amos Jeffries branch nick: 3.4 timestamp: Sun 2013-11-10 16:01:35 -0700 message: Replace blocking sleep(3) and close UDS socket on failures. The two addressed XXX were not causing any known serious bugs on their own, but the blocking sleep was ugly and possibly in the way of further kid registration fixes/improvements. ------------------------------------------------------------ # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: squid3@treenet.co.nz-20131110230135-b24u084oxhugyfxk # target_branch: http://bzr.squid-cache.org/bzr/squid3/3.4 # testament_sha1: a4b716ba297824d39d5ebed0ccf90ce050a5e624 # timestamp: 2013-11-10 23:57:57 +0000 # source_branch: http://bzr.squid-cache.org/bzr/squid3/3.4 # base_revision_id: squid3@treenet.co.nz-20131110225516-\ # 3iaf83q0y23ugpc9 # # Begin patch === modified file 'src/ipc/UdsOp.cc' --- src/ipc/UdsOp.cc 2013-06-03 14:05:16 +0000 +++ src/ipc/UdsOp.cc 2013-11-10 23:01:35 +0000 @@ -81,11 +81,21 @@ message(aMessage), retries(10), // TODO: make configurable? timeout(10), // TODO: make configurable? + sleeping(false), writing(false) { message.address(address); } +void Ipc::UdsSender::swanSong() +{ + // did we abort while waiting between retries? + if (sleeping) + cancelSleep(); + + UdsOp::swanSong(); +} + void Ipc::UdsSender::start() { UdsOp::start(); @@ -96,7 +106,7 @@ bool Ipc::UdsSender::doneAll() const { - return !writing && UdsOp::doneAll(); + return !writing && !sleeping && UdsOp::doneAll(); } void Ipc::UdsSender::write() @@ -114,8 +124,53 @@ debugs(54, 5, HERE << params.conn << " flag " << params.flag << " retries " << retries << " [" << this << ']'); writing = false; if (params.flag != COMM_OK && retries-- > 0) { - sleep(1); // do not spend all tries at once; XXX: use an async timed event instead of blocking here; store the time when we started writing so that we do not sleep if not needed? - write(); // XXX: should we close on error so that conn() reopens? + // perhaps a fresh connection and more time will help? + conn()->close(); + sleep(); + } +} + +/// pause for a while before resending the message +void Ipc::UdsSender::sleep() +{ + Must(!sleeping); + sleeping = true; + eventAdd("Ipc::UdsSender::DelayedRetry", + Ipc::UdsSender::DelayedRetry, + new Pointer(this), 1, 0, false); // TODO: Use Fibonacci increments +} + +/// stop sleeping (or do nothing if we were not) +void Ipc::UdsSender::cancelSleep() +{ + if (sleeping) { + // Why not delete the event? See Comm::ConnOpener::cancelSleep(). + sleeping = false; + debugs(54, 9, "stops sleeping"); + } +} + +/// legacy wrapper for Ipc::UdsSender::delayedRetry() +void Ipc::UdsSender::DelayedRetry(void *data) +{ + Pointer *ptr = static_cast(data); + assert(ptr); + if (UdsSender *us = dynamic_cast(ptr->valid())) { + // get back inside AsyncJob protection by scheduling an async job call + typedef NullaryMemFunT Dialer; + AsyncCall::Pointer call = JobCallback(54, 4, Dialer, us, Ipc::UdsSender::delayedRetry); + ScheduleCallHere(call); + } + delete ptr; +} + +/// make another sending attempt after a pause +void Ipc::UdsSender::delayedRetry() +{ + debugs(54, 5, HERE << sleeping); + if (sleeping) { + sleeping = false; + write(); // reopens the connection if needed } } === modified file 'src/ipc/UdsOp.h' --- src/ipc/UdsOp.h 2012-10-04 09:14:06 +0000 +++ src/ipc/UdsOp.h 2013-11-10 23:01:35 +0000 @@ -65,11 +65,17 @@ UdsSender(const String& pathAddr, const TypedMsgHdr& aMessage); protected: + virtual void swanSong(); // UdsOp (AsyncJob) API virtual void start(); // UdsOp (AsyncJob) API virtual bool doneAll() const; // UdsOp (AsyncJob) API virtual void timedout(); // UdsOp API private: + void sleep(); + void cancelSleep(); + static void DelayedRetry(void *data); + void delayedRetry(); + void write(); ///< schedule writing void wrote(const CommIoCbParams& params); ///< done writing or error @@ -77,6 +83,7 @@ TypedMsgHdr message; ///< what to send int retries; ///< how many times to try after a write error int timeout; ///< total time to send the message + bool sleeping; ///< whether we are waiting to retry a failed write bool writing; ///< whether Comm started and did not finish writing private: