------------------------------------------------------------ revno: 13201 [merge] revision-id: rousskov@measurement-factory.com-20140101201323-jm7t64zx1l0lnbh2 parent: rousskov@measurement-factory.com-20140101201116-8x39qph73c0rgcoi parent: rousskov@measurement-factory.com-20140101192049-xhaxfg4lmbd1i99a committer: Alex Rousskov branch nick: trunk timestamp: Wed 2014-01-01 13:13:23 -0700 message: Initial Large Rock and Collapsed Forwarding support. Large Rock: Support disk (and shared memory) caching of responses exceeding one db slot (or one shared memory page) in size. A single db slot/page size is still limited to 32KB (smaller values can be configured for disk caches using the newly added cache_dir slot-size option). Removal of old rock cache dir (followed by squid-z) is required -- the on-disk db structure has changed. Collapsed Forwarding: Optionally merge concurrent cachable requests for the same URI earlier: After the request headers have been parsed (as before), but now _before_ the response headers have been received. Merging of requests received by different SMP workers is supported. Controlled by the new collapsed_forwarding directive in squid.conf. Disabled by default because all but one of the merged requests have to be delayed (until the response headers are received) for the merging to work, which may be worse than forwarding all concurrent requests immediately. The overall feature idea and request eligibility conditions are based on Collapsed Forwarding in Squid2. Summary of other important changes (merged branch log contains the details): * Tightened StoreEntry locking. Split StoreEntry::lock() into "just lock" and "update entry reference time" interfaces, addressing an old XXX. Improved entry lock/unlock debugging. Needs more work. * Adjusted StoreIOState::write() API to allow callers detect write errors. * Simplified MemObject::write() API to remove an essentially unused callback. * Mark client streams that sent everything as STREAM_COMPLETE. The old code used STREAM_UNPLANNED_COMPLETE if the completed stream was associated with a non-persistent connection, which did not make sense to me and, IIRC, led to store entry aborts even though the entries were not damaged in any way. * mem_hdr::hasContigousContentRange() now returns true for empty ranges. * Support "appending" ReadWriteLock state that can be shared by readers and the writer. The writer promises not to update key metadata (except growing object size and next pointers) and readers promise to be careful when reading growing slices. * Fixed StoreEntry::mayStartSwapOut() logic to handle terminated swapouts. * Improved STORE_MEM_CLIENT detection and documented known (and mostly old) StoreEntry::storeClientType() problems. * Removed StoreEntry::hidden_mem_obj hack. * Polished StoreEntry debugging to report more info, less noise. Use e: prefix. * Added a script to extract store entry(ies) debugging from cache.log. ------------------------------------------------------------ Use --include-merges or -n0 to see merged revisions. ------------------------------------------------------------ # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: rousskov@measurement-factory.com-20140101201323-\ # jm7t64zx1l0lnbh2 # target_branch: http://bzr.squid-cache.org/bzr/squid3/trunk/ # testament_sha1: 821c70cc98b7c46474e449e9c0b723f0fe6f497b # timestamp: 2014-01-01 20:53:48 +0000 # source_branch: http://bzr.squid-cache.org/bzr/squid3/trunk/ # base_revision_id: rousskov@measurement-factory.com-20140101201116-\ # 8x39qph73c0rgcoi # # Begin patch === modified file 'include/snmp_impl.h' --- include/snmp_impl.h 2012-10-04 11:10:17 +0000 +++ include/snmp_impl.h 2013-03-21 21:06:48 +0000 @@ -65,7 +65,4 @@ struct trapVar *next; }; -/* from snmp.c */ -extern u_char sid[]; /* size SID_MAX_LEN */ - #endif /* SQUID_SNMP_IMPL_H */ === modified file 'scripts/find-alive.pl' --- scripts/find-alive.pl 2013-05-10 22:08:04 +0000 +++ scripts/find-alive.pl 2013-08-15 22:09:07 +0000 @@ -55,6 +55,14 @@ 'fd_open.*\sFD (\d+)', 'fd_close\s+FD (\d+)', ], + IpcStoreMapEntry => [ + 'StoreMap.* opened .*entry (\d+) for \S+ (\S+)', + 'StoreMap.* closed .*entry (\d+) for \S+ (\S+)', + ], + sh_page => [ + 'PageStack.* pop: (sh_page\S+) at', + 'PageStack.* push: (sh_page\S+) at', + ], ); if (!$Pairs{$Thing}) { @@ -70,29 +78,32 @@ my $reConstructor = $Pairs{$Thing}->[0]; my $reDestructor = $Pairs{$Thing}->[1]; -my %Alive = (); +my %AliveCount = (); +my %AliveImage = (); my $Count = 0; while () { - if (/$reConstructor/) { - #die($_) if $Alive{$1}; - $Alive{$1} = $_; - ++$Count; + if (my @conIds = (/$reConstructor/)) { + my $id = join(':', @conIds); + #die($_) if $Alive{$id}; + $AliveImage{$id} = $_; + ++$Count unless $AliveCount{$id}++; } - elsif (/$reDestructor/) { - #warn("unborn: $_") unless $Alive{$1}; - $Alive{$1} = undef(); + elsif (my @deIds = (/$reDestructor/)) { + my $id = join(':', @deIds); + #warn("unborn: $_") unless $AliveCount{$id}; + $AliveImage{$id} = undef() unless --$AliveCount{$id}; } } printf(STDERR "Found %d %s\n", $Count, $Thing); -my $AliveCount = 0; -foreach my $alive (sort grep { defined($_) } values %Alive) { +my $aliveCount = 0; +foreach my $alive (sort grep { defined($_) } values %AliveImage) { next unless defined $alive; printf("Alive: %s", $alive); - ++$AliveCount; + ++$aliveCount; } -printf(STDERR "found %d still-alive %s\n", $AliveCount, $Thing); +printf(STDERR "found %d still-alive %s\n", $aliveCount, $Thing); exit(0); === added file 'scripts/trace-entry.pl' --- scripts/trace-entry.pl 1970-01-01 00:00:00 +0000 +++ scripts/trace-entry.pl 2013-07-09 23:01:38 +0000 @@ -0,0 +1,273 @@ +#!/usr/bin/perl -w + +# Reads cache.log and displays lines that correspond to a given store entry. +# +# Store entry can be identified by its key or an anchor slot ID in a rock-style +# map. +# +# Currently, the script reads and remembers many irrelevant lines because it +# does not know which one should be tracked in advance. +# + +use strict; +use warnings; +use Carp; + +my @InterestingEntries = @ARGV; +#die("usage: $0 [entry number|key value|pointer address] ...\n"); + +my $LastEntryId = 0; +my %Entries = (); +my %EntriesByPartId = (); + +my %CurrentEntries = (); +my $Kid; +my %Entering = (); +my %Inside = (); + +my $DEB; + +while () { + my $line = $_; + #$DEB = 1 if /16:53:44.632/; + + ($Kid) = (/(kid\d+)[|]/); + $Kid = 'kid0' unless defined $Kid; + + &enterBlock($., $_) if + (/[|:] entering\b/ && !/Port::noteRead/) || + (/Port::noteRead/ && /handling/); + + next unless $Inside{$Kid}; + + while ($line =~ s@\b(entry) (\d+) .*?(\S*_map)@ @) { + &processEntryPartId("$3.$1", $2); + } + + while ($line =~ s@\b(slice|slot) (\d+)@ @) { + &processEntryPartId($1, $2); + } + + #while ($line =~ s@\b(page) (\w+)@ @) { + # &processEntryPartId($1, $2); + #} + + while ($line =~ s@\b(key) '?(\w+)@ @) { + &processEntryPartId($1, $2); + } + + while ($line =~ s@\b([A-Z0-9]{32})\b@ @) { + &processEntryPartId('key', $1); + } + + while ($line =~ s@\be:\S*?/(0x\w+)@ @ || $line =~ s@\bStoreEntry\s+(0x\w+)@ @) { + &processEntryPartId('pointer', $1); + } + + if ($line ne $_ || /[|:] leaving\b/) { + if (my $entry = $CurrentEntries{$Kid}) { + &updateEntry($entry, $Entering{$Kid}) if exists $Entering{$Kid}; + delete $Entering{$Kid}; + &updateEntry($entry, &historyLine($., $_)); + } + } + + &leaveBlock() if + (/[|:] leaving\b/ && !/Port::noteRead/) || + (/Port::noteRead/ && /handled/); +} + + +# merge same entries +my %cleanEntries = (); +foreach my $id (sort { $a <=> $b } keys %Entries) { + my $entry = $Entries{$id}; + + next unless &mergeAllLinkedEntries($entry); + + $entry->{id} = 1 + scalar keys %cleanEntries; + $cleanEntries{$entry->{id}} = $entry; +} +%Entries = %cleanEntries; + +printf("Saw %d entries\n", scalar keys %Entries); + +if (!@InterestingEntries) { # print all entries + foreach my $id (sort { $a <=> $b } keys %Entries) { + my $entry = $Entries{$id}; + reportEntry($entry, 1); + } +} else { + foreach my $description (@InterestingEntries) { + my ($part, $id) = ($description =~ /(\w+)\s+(\w+)/); + my $entry = &getExistingEntry($part, $id); + reportEntry($entry, 1); + } +} + +exit(0); + +sub enterBlock { + my ($lineNo, $lineText) = @_; + + $Entering{$Kid} = &historyLine($., $_); + die("double entrance, stopped") if $Inside{$Kid}; + $Inside{$Kid} = 1; +} + +sub leaveBlock { + $CurrentEntries{$Kid} = undef(); + delete $Entering{$Kid}; + $Inside{$Kid} = 0; +} + +sub processEntryPartId { + my ($part, $id) = @_; + + #warn("XXX1: $Kid| part.id: $part.$id\n") if $DEB; + + my $entry; + my $curEntry = $CurrentEntries{$Kid}; + my $oldEntry = &getExistingEntry($part, $id); + if ($curEntry && $oldEntry && $curEntry->{id} != $oldEntry->{id}) { + &linkEntries($curEntry, $oldEntry, "$part.$id"); + $entry = $curEntry; + } else { + $entry = $curEntry ? $curEntry : $oldEntry; + } + $entry = &getEntry($part, $id) unless defined $entry; + $CurrentEntries{$Kid} = $entry; + + $entry->{parts}->{$part} = {} unless exists $entry->{parts}->{$part}; + $entry->{parts}->{$part}->{$id} = $_ unless exists $entry->{parts}->{$part}->{$id}; +} + +sub historyLine { + my ($lineCount, $line) = @_; + return sprintf("#%06d %s", $lineCount, $line); +} + +sub updateEntry { + my ($entry, $historyLine) = @_; + + $entry->{history} .= $historyLine; +} + +sub linkEntries { + my ($e1, $e2, $ctx) = @_; + + $e1->{sameAs}->{$e2->{id}} = 1; + $e2->{sameAs}->{$e1->{id}} = 1; +} + +sub mergeAllLinkedEntries { + my ($entry) = @_; + + #warn(sprintf("merging %d <-- * %s\n", $entry->{id}, $entry->{merged} ? "skipped" : "")); + + return 0 if $entry->{merged}; + $entry->{merged} = 1; + + foreach my $otherId (keys %{$entry->{sameAs}}) { + my $otherE = $Entries{$otherId}; + die("missing internal entry$otherId, stopped") unless $otherE; + next if $otherE->{merged}; + &mergeAllLinkedEntries($otherE); + &mergeOneEntry($entry, $otherE); + $otherE->{merged} = 1; + } + + return 1; +} + +sub mergeOneEntry { + my ($entry, $otherE) = @_; + + #warn(sprintf("merging %d <-- %d\n", $entry->{id}, $otherE->{id})); + + foreach my $part (keys %{$otherE->{parts}}) { + foreach my $id (keys %{$otherE->{parts}->{$part}}) { + $entry->{parts}->{$part}->{$id} = $otherE->{parts}->{$part}->{$id}; + } + } + + $entry->{history} .= $otherE->{history}; +} + +sub getExistingEntry { + my ($part, $id) = @_; + + return $EntriesByPartId{$part}->{$id} if exists $EntriesByPartId{$part}; + return undef(); +} + +sub getEntry { + my ($part, $id) = @_; + + $EntriesByPartId{$part} = {} unless exists $EntriesByPartId{$part}; + my $entry = $EntriesByPartId{$part}->{$id}; + return $entry if $entry; + + $entry = { + id => ++$LastEntryId, + + parts => {}, + + history => '', + + reported => 0, + }; + + $entry->{parts}->{$part} = {}; + $EntriesByPartId{$part}->{$id} = $entry; + $Entries{$LastEntryId} = $entry; + return $entry; +} + + +sub reportEntry { + my ($entry, $recursive) = @_; + + return if $entry->{reported}; + $entry->{reported} = 1; + + printf("entry%d:\n", $entry->{id}); + + foreach my $part (keys %{$entry->{parts}}) { + printf("\t%s(s):", $part); + foreach my $id (keys %{$entry->{parts}->{$part}}) { + printf(" %s", $id); + } + print("\n"); + } + + &reportEntryHistory($entry); +} + +sub reportEntryParam { + my ($entry, $name, $value) = @_; + + $value = $entry->{$name} if @_ < 3; + $value = '?' unless defined $value; + $value = "\n$value" if $value =~ /\n/m; + printf("\t%s: %s\n", $name, $value); +} + +sub reportEntryHistory { + my ($entry) = @_; + + my $history = $entry->{history}; + my @lines = split(/\n/, $history); + &reportEntryParam($entry, 'history', (scalar @lines) . " lines"); + + my $lastKid = ''; + foreach my $line (sort @lines) { + my ($kid) = ($line =~ /(kid\d+)[|]/); + $kid = 'kid0' unless defined $kid; + + print "\n" if $lastKid ne $kid; + print "$line\n"; + $lastKid = $kid; + } + print "\n" if @lines; +} === added file 'src/CollapsedForwarding.cc' --- src/CollapsedForwarding.cc 1970-01-01 00:00:00 +0000 +++ src/CollapsedForwarding.cc 2013-12-31 18:49:41 +0000 @@ -0,0 +1,161 @@ +/* + * DEBUG: section 17 Request Forwarding + * + */ + +#include "squid.h" +#include "CollapsedForwarding.h" +#include "globals.h" +#include "ipc/mem/Segment.h" +#include "ipc/Messages.h" +#include "ipc/Port.h" +#include "ipc/TypedMsgHdr.h" +#include "MemObject.h" +#include "SquidConfig.h" +#include "Store.h" +#include "store_key_md5.h" +#include "tools.h" + +/// shared memory segment path to use for CollapsedForwarding queue +static const char *const ShmLabel = "cf"; +/// a single worker-to-worker queue capacity +// TODO: make configurable or compute from squid.conf settings if possible +static const int QueueCapacity = 1024; + +std::auto_ptr CollapsedForwarding::queue; + +/// IPC queue message +class CollapsedForwardingMsg +{ +public: + CollapsedForwardingMsg(): sender(-1), xitIndex(-1) {} + +public: + int sender; ///< kid ID of sending process + + /// transients index, so that workers can find [private] entries to sync + sfileno xitIndex; +}; + +// CollapsedForwarding + +void +CollapsedForwarding::Init() +{ + Must(!queue.get()); + if (UsingSmp() && IamWorkerProcess()) + queue.reset(new Queue(ShmLabel, KidIdentifier)); +} + +void +CollapsedForwarding::Broadcast(const StoreEntry &e) +{ + if (!queue.get()) + return; + + if (!e.mem_obj || e.mem_obj->xitTable.index < 0 || + !Store::Root().transientReaders(e)) { + debugs(17, 7, "nobody reads " << e); + return; + } + + CollapsedForwardingMsg msg; + msg.sender = KidIdentifier; + msg.xitIndex = e.mem_obj->xitTable.index; + + debugs(17, 5, e << " to " << Config.workers << "-1 workers"); + + // TODO: send only to workers who are waiting for data + for (int workerId = 1; workerId <= Config.workers; ++workerId) { + try { + if (workerId != KidIdentifier && queue->push(workerId, msg)) + Notify(workerId); + } catch (const Queue::Full &) { + debugs(17, DBG_IMPORTANT, "ERROR: Collapsed forwarding " << + "queue overflow for kid" << workerId << + " at " << queue->outSize(workerId) << " items"); + // TODO: grow queue size + } + } +} + +void +CollapsedForwarding::Notify(const int workerId) +{ + // TODO: Count and report the total number of notifications, pops, pushes. + debugs(17, 7, "to kid" << workerId); + Ipc::TypedMsgHdr msg; + msg.setType(Ipc::mtCollapsedForwardingNotification); + msg.putInt(KidIdentifier); + const String addr = Ipc::Port::MakeAddr(Ipc::strandAddrPfx, workerId); + Ipc::SendMessage(addr, msg); +} + +void +CollapsedForwarding::HandleNewData(const char *const when) +{ + debugs(17, 4, "popping all " << when); + CollapsedForwardingMsg msg; + int workerId; + int poppedCount = 0; + while (queue->pop(workerId, msg)) { + debugs(17, 3, "message from kid" << workerId); + if (workerId != msg.sender) { + debugs(17, DBG_IMPORTANT, "mismatching kid IDs: " << workerId << + " != " << msg.sender); + } + + debugs(17, 7, "handling entry " << msg.xitIndex << " in transients_map"); + Store::Root().syncCollapsed(msg.xitIndex); + debugs(17, 7, "handled entry " << msg.xitIndex << " in transients_map"); + + // XXX: stop and schedule an async call to continue + assert(++poppedCount < SQUID_MAXFD); + } +} + +void +CollapsedForwarding::HandleNotification(const Ipc::TypedMsgHdr &msg) +{ + const int from = msg.getInt(); + debugs(17, 7, "from " << from); + assert(queue.get()); + queue->clearReaderSignal(from); + HandleNewData("after notification"); +} + +/// initializes shared queue used by CollapsedForwarding +class CollapsedForwardingRr: public Ipc::Mem::RegisteredRunner +{ +public: + /* RegisteredRunner API */ + CollapsedForwardingRr(): owner(NULL) {} + virtual ~CollapsedForwardingRr(); + +protected: + virtual void create(const RunnerRegistry &); + virtual void open(const RunnerRegistry &); + +private: + Ipc::MultiQueue::Owner *owner; +}; + +RunnerRegistrationEntry(rrAfterConfig, CollapsedForwardingRr); + +void CollapsedForwardingRr::create(const RunnerRegistry &) +{ + Must(!owner); + owner = Ipc::MultiQueue::Init(ShmLabel, Config.workers, 1, + sizeof(CollapsedForwardingMsg), + QueueCapacity); +} + +void CollapsedForwardingRr::open(const RunnerRegistry &) +{ + CollapsedForwarding::Init(); +} + +CollapsedForwardingRr::~CollapsedForwardingRr() +{ + delete owner; +} === added file 'src/CollapsedForwarding.h' --- src/CollapsedForwarding.h 1970-01-01 00:00:00 +0000 +++ src/CollapsedForwarding.h 2013-12-31 18:49:41 +0000 @@ -0,0 +1,41 @@ +/* + * DEBUG: section 17 Request Forwarding + * + */ + +#ifndef SQUID_COLLAPSED_FORWARDING_H +#define SQUID_COLLAPSED_FORWARDING_H + +#include "ipc/forward.h" +#include "ipc/Queue.h" +#include "typedefs.h" + +#include + +class StoreEntry; + +/// Sends and handles collapsed forwarding notifications. +class CollapsedForwarding +{ +public: + /// open shared memory segment + static void Init(); + + /// notify other workers about changes in entry state (e.g., new data) + static void Broadcast(const StoreEntry &e); + + /// kick worker with empty IPC queue + static void Notify(const int workerId); + + /// handle new data messages in IPC queue + static void HandleNewData(const char *const when); + + /// handle queue push notifications from worker or disker + static void HandleNotification(const Ipc::TypedMsgHdr &msg); + +private: + typedef Ipc::MultiQueue Queue; + static std::auto_ptr queue; ///< IPC queue +}; + +#endif /* SQUID_COLLAPSED_FORWARDING_H */ === modified file 'src/DiskIO/IpcIo/IpcIoFile.cc' --- src/DiskIO/IpcIo/IpcIoFile.cc 2013-10-25 00:13:46 +0000 +++ src/DiskIO/IpcIo/IpcIoFile.cc 2013-12-31 18:49:41 +0000 @@ -184,13 +184,13 @@ bool IpcIoFile::canRead() const { - return diskId >= 0 && canWait(); + return diskId >= 0 && !error_ && canWait(); } bool IpcIoFile::canWrite() const { - return diskId >= 0 && canWait(); + return diskId >= 0 && !error_ && canWait(); } bool @@ -270,13 +270,19 @@ { bool ioError = false; if (!response) { - debugs(79, 3, HERE << "error: timeout"); + debugs(79, 3, "disker " << diskId << " timeout"); ioError = true; // I/O timeout does not warrant setting error_? } else if (response->xerrno) { - debugs(79, DBG_IMPORTANT, HERE << "error: " << xstrerr(response->xerrno)); + debugs(79, DBG_IMPORTANT, "ERROR: disker " << diskId << + " error writing " << writeRequest->len << " bytes at " << + writeRequest->offset << ": " << xstrerr(response->xerrno) << + "; this worker will stop using " << dbName); ioError = error_ = true; } else if (response->len != writeRequest->len) { - debugs(79, DBG_IMPORTANT, HERE << "problem: " << response->len << " < " << writeRequest->len); + debugs(79, DBG_IMPORTANT, "ERROR: disker " << diskId << " wrote " << + response->len << " instead of " << writeRequest->len << + " bytes (offset " << writeRequest->offset << "); " << + "this worker will stop using " << dbName); error_ = true; } @@ -653,27 +659,68 @@ } } +/// Tries to write buffer to disk (a few times if needed); +/// sets ipcIo results, but does no cleanup. The caller must cleanup. +static void +diskerWriteAttempts(IpcIoMsg &ipcIo) +{ + const char *buf = Ipc::Mem::PagePointer(ipcIo.page); + size_t toWrite = min(ipcIo.len, Ipc::Mem::PageSize()); + size_t wroteSoFar = 0; + off_t offset = ipcIo.offset; + // Partial writes to disk do happen. It is unlikely that the caller can + // handle partial writes by doing something other than writing leftovers + // again, so we try to write them ourselves to minimize overheads. + const int attemptLimit = 10; + for (int attempts = 1; attempts <= attemptLimit; ++attempts) { + const ssize_t result = pwrite(TheFile, buf, toWrite, offset); + ++statCounter.syscalls.disk.writes; + fd_bytes(TheFile, result, FD_WRITE); + + if (result < 0) { + ipcIo.xerrno = errno; + assert(ipcIo.xerrno); + debugs(47, DBG_IMPORTANT, "disker" << KidIdentifier << + " error writing " << toWrite << '/' << ipcIo.len << + " at " << ipcIo.offset << '+' << wroteSoFar << + " on " << attempts << " try: " << xstrerr(ipcIo.xerrno)); + ipcIo.len = wroteSoFar; + return; // bail on error + } + + const size_t wroteNow = static_cast(result); // result >= 0 + ipcIo.xerrno = 0; + + debugs(47,3, "disker" << KidIdentifier << " wrote " << + (wroteNow >= toWrite ? "all " : "just ") << wroteNow << + " out of " << toWrite << '/' << ipcIo.len << " at " << + ipcIo.offset << '+' << wroteSoFar << " on " << attempts << + " try"); + + wroteSoFar += wroteNow; + + if (wroteNow >= toWrite) { + ipcIo.xerrno = 0; + ipcIo.len = wroteSoFar; + return; // wrote everything there was to write + } + + buf += wroteNow; + offset += wroteNow; + toWrite -= wroteNow; + } + + debugs(47, DBG_IMPORTANT, "disker" << KidIdentifier << + " exhausted all " << attemptLimit << " attempts while writing " << + toWrite << '/' << ipcIo.len << " at " << ipcIo.offset << '+' << + wroteSoFar); + return; // not a fatal I/O error, unless the caller treats it as such +} + static void diskerWrite(IpcIoMsg &ipcIo) { - const char *const buf = Ipc::Mem::PagePointer(ipcIo.page); - const ssize_t wrote = pwrite(TheFile, buf, min(ipcIo.len, Ipc::Mem::PageSize()), ipcIo.offset); - ++statCounter.syscalls.disk.writes; - fd_bytes(TheFile, wrote, FD_WRITE); - - if (wrote >= 0) { - ipcIo.xerrno = 0; - const size_t len = static_cast(wrote); // safe because wrote > 0 - debugs(47,8, HERE << "disker" << KidIdentifier << " wrote " << - (len == ipcIo.len ? "all " : "just ") << wrote); - ipcIo.len = len; - } else { - ipcIo.xerrno = errno; - ipcIo.len = 0; - debugs(47,5, HERE << "disker" << KidIdentifier << " write error: " << - ipcIo.xerrno); - } - + diskerWriteAttempts(ipcIo); // may fail Ipc::Mem::PutPage(ipcIo.page); } === modified file 'src/FwdState.cc' --- src/FwdState.cc 2013-12-06 14:59:47 +0000 +++ src/FwdState.cc 2013-12-06 23:52:26 +0000 @@ -129,7 +129,7 @@ pconnRace = raceImpossible; start_t = squid_curtime; serverDestinations.reserve(Config.forward_max_tries); - e->lock(); + e->lock("FwdState"); EBIT_SET(e->flags, ENTRY_FWD_HDR_WAIT); } @@ -262,7 +262,7 @@ entry->unregisterAbort(); - entry->unlock(); + entry->unlock("FwdState"); entry = NULL; @@ -1229,7 +1229,7 @@ /*assert(!EBIT_TEST(entry->flags, ENTRY_DISPATCHED)); */ assert(entry->ping_status != PING_WAITING); - assert(entry->lock_count); + assert(entry->locked()); EBIT_SET(entry->flags, ENTRY_DISPATCHED); === modified file 'src/Makefile.am' --- src/Makefile.am 2013-12-17 17:05:17 +0000 +++ src/Makefile.am 2014-01-01 19:20:49 +0000 @@ -313,6 +313,8 @@ ClientRequestContext.h \ clientStream.cc \ clientStream.h \ + CollapsedForwarding.cc \ + CollapsedForwarding.h \ CompletionDispatcher.cc \ CompletionDispatcher.h \ CommRead.h \ @@ -540,6 +542,8 @@ swap_log_op.h \ SwapDir.cc \ SwapDir.h \ + Transients.cc \ + Transients.h \ MemStore.cc \ MemStore.h \ time.cc \ @@ -1218,6 +1222,7 @@ tests_testACLMaxUserIP_SOURCES= \ cbdata.cc \ ClientInfo.h \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ DiskIO/ReadRequest.cc \ @@ -1280,6 +1285,7 @@ swap_log_op.h \ tests/stub_SwapDir.cc \ SwapDir.h \ + Transients.cc \ log/access_log.h \ tests/stub_access_log.cc \ cache_cf.h \ @@ -1307,6 +1313,7 @@ tests/stub_Port.cc \ repl_modules.h \ tests/stub_store.cc \ + tests/stub_store_client.cc \ store_rebuild.h \ tests/stub_store_rebuild.cc \ tests/stub_store_stats.cc \ @@ -1418,6 +1425,7 @@ client_side_request.cc \ ClientInfo.h \ clientStream.cc \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ CpuAffinityMap.cc \ @@ -1561,6 +1569,7 @@ StoreSwapLogData.cc \ tools.h \ tools.cc \ + Transients.cc \ tests/stub_tunnel.cc \ tests/stub_SwapDir.cc \ MemStore.cc \ @@ -1627,6 +1636,7 @@ cbdata.cc \ client_db.h \ ClientInfo.h \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ $(DELAY_POOL_SOURCE) \ @@ -1707,6 +1717,7 @@ StrList.h \ StrList.cc \ tests/stub_SwapDir.cc \ + Transients.cc \ log/access_log.h \ tests/stub_access_log.cc \ tests/stub_acl.cc \ @@ -1738,6 +1749,7 @@ tests/stub_mime.cc \ tests/stub_pconn.cc \ tests/stub_Port.cc \ + tests/stub_stat.cc \ tests/stub_store_client.cc \ tests/stub_store_stats.cc \ store_rebuild.h \ @@ -1825,6 +1837,7 @@ client_side_request.cc \ ClientInfo.h \ clientStream.cc \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ CpuAffinityMap.cc \ @@ -1990,6 +2003,7 @@ time.cc \ tools.h \ tools.cc \ + Transients.cc \ tests/stub_tunnel.cc \ MemStore.cc \ $(UNLINKDSOURCE) \ @@ -2071,6 +2085,7 @@ client_side_request.cc \ ClientInfo.h \ clientStream.cc \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ CpuAffinityMap.cc \ @@ -2236,6 +2251,7 @@ time.cc \ tools.h \ tools.cc \ + Transients.cc \ tests/stub_tunnel.cc \ MemStore.cc \ $(UNLINKDSOURCE) \ @@ -2316,6 +2332,7 @@ client_side_request.cc \ ClientInfo.h \ clientStream.cc \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ CpuAffinityMap.cc \ @@ -2465,6 +2482,7 @@ StrList.h \ StrList.cc \ tests/stub_SwapDir.cc \ + Transients.cc \ tests/test_http_range.cc \ tests/stub_external_acl.cc \ tests/stub_ipc_Forwarder.cc \ @@ -2536,6 +2554,7 @@ HttpParser.h \ MemBuf.cc \ MemBuf.h \ + tests/stub_MemObject.cc \ Mem.h \ tests/stub_mem.cc \ String.cc \ @@ -2545,9 +2564,14 @@ tests/stub_SBufDetailedStats.cc \ tests/stub_cache_cf.cc \ tests/stub_cache_manager.cc \ + tests/stub_comm.cc \ + tests/stub_cbdata.cc \ tests/stub_debug.cc \ tests/stub_event.cc \ tests/stub_HelperChildConfig.cc \ + tests/stub_stmem.cc \ + tests/stub_store.cc \ + tests/stub_store_stats.cc \ tools.h \ tests/stub_tools.cc \ tests/testHttpParser.cc \ @@ -2619,6 +2643,7 @@ client_side_request.cc \ ClientInfo.h \ clientStream.cc \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ CpuAffinityMap.cc \ @@ -2757,6 +2782,7 @@ event.cc \ tools.h \ tools.cc \ + Transients.cc \ tests/stub_tunnel.cc \ tests/stub_SwapDir.cc \ MemStore.cc \ @@ -2819,6 +2845,7 @@ tests/stub_CacheDigest.cc \ cbdata.cc \ ClientInfo.h \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ $(DELAY_POOL_SOURCE) \ @@ -2922,12 +2949,14 @@ mime.h \ tests/stub_mime.cc \ tests/stub_Port.cc \ + tests/stub_stat.cc \ tests/stub_store_client.cc \ tests/stub_store_stats.cc \ store_rebuild.h \ tests/stub_store_rebuild.cc \ tests/stub_store_swapout.cc \ tools.h \ + Transients.cc \ tests/stub_tools.cc \ tests/stub_UdsOp.cc \ tests/testMain.cc \ @@ -3047,6 +3076,7 @@ tests/testUfs.h \ tests/stub_cache_manager.cc \ tests/stub_client_db.cc \ + tests/stub_CollapsedForwarding.cc \ tests/stub_HelperChildConfig.cc \ tests/stub_icp.cc \ tests/stub_ipc.cc \ @@ -3060,6 +3090,7 @@ internal.h \ tests/stub_internal.cc \ tests/stub_libformat.cc \ + tests/stub_stat.cc \ store_rebuild.h \ tests/stub_store_rebuild.cc \ tests/stub_store_stats.cc \ @@ -3083,6 +3114,7 @@ RequestFlags.cc \ SquidList.h \ SquidList.cc \ + Transients.cc \ MasterXaction.cc \ MasterXaction.h \ MemObject.cc \ @@ -3234,6 +3266,8 @@ tests_testRock_SOURCES = \ cbdata.cc \ CacheDigest.h \ + CollapsedForwarding.h \ + CollapsedForwarding.cc \ tests/stub_CacheDigest.cc \ ConfigOption.cc \ ConfigParser.cc \ @@ -3290,6 +3324,7 @@ tests/stub_StatHist.cc \ stmem.cc \ repl_modules.h \ + tests/stub_stat.cc \ store.cc \ StoreFileSystem.cc \ StoreIOState.cc \ @@ -3309,6 +3344,8 @@ StrList.h \ StrList.cc \ SwapDir.cc \ + Transients.h \ + Transients.cc \ tests/testRock.cc \ tests/testMain.cc \ tests/testRock.h \ @@ -3416,6 +3453,7 @@ client_side_request.cc \ ClientInfo.h \ clientStream.cc \ + tests/stub_CollapsedForwarding.cc \ ConfigOption.cc \ ConfigParser.cc \ CpuAffinityMap.cc \ @@ -3564,6 +3602,7 @@ String.cc \ StrList.h \ StrList.cc \ + Transients.cc \ tests/stub_SwapDir.cc \ MemStore.cc \ tests/stub_debug.cc \ @@ -3694,6 +3733,7 @@ tests/stub_HelperChildConfig.cc \ tests/stub_cache_cf.cc \ tests/stub_cache_manager.cc \ + tests/stub_store.cc \ tests/stub_store_stats.cc \ tests/stub_tools.cc \ SquidString.h \ === modified file 'src/MemObject.cc' --- src/MemObject.cc 2013-02-12 11:34:35 +0000 +++ src/MemObject.cc 2013-12-31 18:49:41 +0000 @@ -74,31 +74,53 @@ return Pool().inUseCount(); } +const char * +MemObject::storeId() const +{ + if (!storeId_.size()) { + debugs(20, DBG_IMPORTANT, "Bug: Missing MemObject::storeId value"); + dump(); + storeId_ = "[unknown_URI]"; + } + return storeId_.termedBuf(); +} + +const char * +MemObject::logUri() const +{ + return logUri_.size() ? logUri_.termedBuf() : storeId(); +} + +bool +MemObject::hasUris() const +{ + return storeId_.size(); +} + void -MemObject::resetUrls(char const *aUrl, char const *aLog_url) +MemObject::setUris(char const *aStoreId, char const *aLogUri, const HttpRequestMethod &aMethod) { - safe_free(url); - safe_free(log_url); /* XXX account log_url */ - log_url = xstrdup(aLog_url); - url = xstrdup(aUrl); + storeId_ = aStoreId; + + // fast pointer comparison for a common storeCreateEntry(url,url,...) case + if (!aLogUri || aLogUri == aStoreId) + logUri_.clean(); // use storeId_ by default to minimize copying + else + logUri_ = aLogUri; + + method = aMethod; + +#if URL_CHECKSUM_DEBUG + chksum = url_checksum(urlXXX()); +#endif } -MemObject::MemObject(char const *aUrl, char const *aLog_url) +MemObject::MemObject(): smpCollapsed(false) { debugs(20, 3, HERE << "new MemObject " << this); _reply = new HttpReply; HTTPMSGLOCK(_reply); - url = xstrdup(aUrl); - -#if URL_CHECKSUM_DEBUG - - chksum = url_checksum(url); - -#endif - - log_url = xstrdup(aLog_url); - object_sz = -1; /* XXX account log_url */ @@ -109,14 +131,17 @@ MemObject::~MemObject() { debugs(20, 3, HERE << "del MemObject " << this); - const Ctx ctx = ctx_enter(url); + const Ctx ctx = ctx_enter(hasUris() ? urlXXX() : "[unknown_ctx]"); + #if URL_CHECKSUM_DEBUG - - assert(chksum == url_checksum(url)); + checkUrlChecksum(); #endif - if (!shutting_down) + if (!shutting_down) { // Store::Root() is FATALly missing during shutdown + assert(xitTable.index < 0); + assert(memCache.index < 0); assert(swapout.sio == NULL); + } data_hdr.freeContent(); @@ -135,10 +160,6 @@ ctx_exit(ctx); /* must exit before we free mem->url */ - safe_free(url); - - safe_free(log_url); /* XXX account log_url */ - safe_free(vary_headers); } @@ -149,21 +170,17 @@ } void -MemObject::write ( StoreIOBuffer writeBuffer, STMCB *callback, void *callbackData) +MemObject::write(const StoreIOBuffer &writeBuffer) { PROF_start(MemObject_write); debugs(19, 6, "memWrite: offset " << writeBuffer.offset << " len " << writeBuffer.length); - /* the offset is into the content, not the headers */ - writeBuffer.offset += (_reply ? _reply->hdr_sz : 0); - /* We don't separate out mime headers yet, so ensure that the first * write is at offset 0 - where they start */ assert (data_hdr.endOffset() || writeBuffer.offset == 0); assert (data_hdr.write (writeBuffer)); - callback (callbackData, writeBuffer); PROF_stop(MemObject_write); } @@ -182,7 +199,8 @@ debugs(20, DBG_IMPORTANT, "MemObject->nclients: " << nclients); debugs(20, DBG_IMPORTANT, "MemObject->reply: " << _reply); debugs(20, DBG_IMPORTANT, "MemObject->request: " << request); - debugs(20, DBG_IMPORTANT, "MemObject->log_url: " << checkNullString(log_url)); + debugs(20, DBG_IMPORTANT, "MemObject->logUri: " << logUri_); + debugs(20, DBG_IMPORTANT, "MemObject->storeId: " << storeId_); } HttpReply const * @@ -226,7 +244,7 @@ MemObject::stat(MemBuf * mb) const { mb->Printf("\t%s %s\n", - RequestMethodStr(method), log_url); + RequestMethodStr(method), logUri()); if (vary_headers) mb->Printf("\tvary_headers: %s\n", vary_headers); mb->Printf("\tinmem_lo: %" PRId64 "\n", inmem_lo); @@ -238,6 +256,17 @@ mb->Printf("\tswapout: %" PRId64 " bytes written\n", (int64_t) swapout.sio->offset()); + if (xitTable.index >= 0) + mb->Printf("\ttransient index: %d state: %d\n", + xitTable.index, xitTable.io); + if (memCache.index >= 0) + mb->Printf("\tmem-cache index: %d state: %d offset: %" PRId64 "\n", + memCache.index, memCache.io, memCache.offset); + if (object_sz >= 0) + mb->Printf("\tobject_sz: %" PRId64 "\n", object_sz); + if (smpCollapsed) + mb->Printf("\tsmp-collapsed\n"); + StoreClientStats statsVisitor(mb); for_each(clients, statsVisitor); @@ -307,7 +336,15 @@ bool MemObject::readAheadPolicyCanRead() const { - return endOffset() - getReply()->hdr_sz < lowestMemReaderOffset() + Config.readAheadGap; + const bool canRead = endOffset() - getReply()->hdr_sz < + lowestMemReaderOffset() + Config.readAheadGap; + + if (!canRead) { + debugs(19, 9, "no: " << endOffset() << '-' << getReply()->hdr_sz << + " < " << lowestMemReaderOffset() << '+' << Config.readAheadGap); + } + + return canRead; } void @@ -321,7 +358,7 @@ void MemObject::checkUrlChecksum () const { - assert(chksum == url_checksum(url)); + assert(chksum == url_checksum(urlXXX())); } #endif @@ -400,11 +437,11 @@ void MemObject::trimUnSwappable() { - int64_t new_mem_lo = policyLowestOffsetToKeep(0); - assert (new_mem_lo > 0); - - data_hdr.freeDataUpto(new_mem_lo); - inmem_lo = new_mem_lo; + if (const int64_t new_mem_lo = policyLowestOffsetToKeep(false)) { + assert (new_mem_lo > 0); + data_hdr.freeDataUpto(new_mem_lo); + inmem_lo = new_mem_lo; + } // else we should not trim anything at this time } bool === modified file 'src/MemObject.h' --- src/MemObject.h 2012-09-22 20:07:31 +0000 +++ src/MemObject.h 2013-12-31 18:49:41 +0000 @@ -57,13 +57,16 @@ MEMPROXY_CLASS(MemObject); void dump() const; - MemObject(char const *, char const *); + MemObject(); ~MemObject(); - /// replaces construction-time URLs with correct ones; see hidden_mem_obj - void resetUrls(char const *aUrl, char const *aLog_url); - - void write(StoreIOBuffer, STMCB *, void *); + /// sets store ID, log URI, and request method; TODO: find a better name + void setUris(char const *aStoreId, char const *aLogUri, const HttpRequestMethod &aMethod); + + /// whether setUris() has been called + bool hasUris() const; + + void write(const StoreIOBuffer &buf); void unlinkRequest(); HttpReply const *getReply() const; void replaceHttpReply(HttpReply *newrep); @@ -98,8 +101,19 @@ void checkUrlChecksum() const; #endif + /// Before StoreID, code assumed that MemObject stores Request URI. + /// After StoreID, some old code still incorrectly assumes that. + /// Use this method to mark that incorrect assumption. + const char *urlXXX() const { return storeId(); } + + /// Entry StoreID (usually just Request URI); if a buggy code requests this + /// before the information is available, returns an "[unknown_URI]" string. + const char *storeId() const; + + /// client request URI used for logging; storeId() by default + const char *logUri() const; + HttpRequestMethod method; - char *url; mem_hdr data_hdr; int64_t inmem_lo; dlink_list clients; @@ -119,12 +133,41 @@ StoreIOState::Pointer sio; /// Decision states for StoreEntry::swapoutPossible() and related code. - typedef enum { swNeedsCheck = 0, swImpossible = -1, swPossible = +1 } Decision; + typedef enum { swNeedsCheck = 0, swImpossible = -1, swPossible = +1, swStarted } Decision; Decision decision; ///< current decision state }; SwapOut swapout; + /// cache "I/O" direction and status + typedef enum { ioUndecided, ioWriting, ioReading, ioDone } Io; + + /// State of an entry with regards to the [shared] in-transit table. + class XitTable + { + public: + XitTable(): index(-1), io(ioUndecided) {} + + int32_t index; ///< entry position inside the in-transit table + Io io; ///< current I/O state + }; + XitTable xitTable; ///< current [shared] memory caching state for the entry + + /// State of an entry with regards to the [shared] memory caching. + class MemCache + { + public: + MemCache(): index(-1), offset(0), io(ioUndecided) {} + + int32_t index; ///< entry position inside the memory cache + int64_t offset; ///< bytes written/read to/from the memory cache so far + + Io io; ///< current I/O state + }; + MemCache memCache; ///< current [shared] memory caching state for the entry + + bool smpCollapsed; ///< whether this entry gets data from another worker + /* Read only - this reply must be preserved by store clients */ /* The original reply. possibly with updated metadata. */ HttpRequest *request; @@ -137,7 +180,6 @@ STABH *callback; void *data; } abort; - char *log_url; RemovalPolicyNode repl; int id; int64_t object_sz; @@ -155,6 +197,9 @@ private: HttpReply *_reply; + mutable String storeId_; ///< StoreId for our entry (usually request URI) + mutable String logUri_; ///< URI used for logging (usually request URI) + DeferredReadManager deferredReads; }; === modified file 'src/MemStore.cc' --- src/MemStore.cc 2013-12-11 22:44:59 +0000 +++ src/MemStore.cc 2014-01-01 19:20:49 +0000 @@ -5,6 +5,7 @@ #include "squid.h" #include "base/RunnersRegistry.h" +#include "CollapsedForwarding.h" #include "HttpReply.h" #include "ipc/mem/Page.h" #include "ipc/mem/Pages.h" @@ -12,15 +13,22 @@ #include "MemStore.h" #include "mime_header.h" #include "SquidConfig.h" +#include "SquidMath.h" #include "StoreStats.h" #include "tools.h" /// shared memory segment path to use for MemStore maps -static const char *ShmLabel = "cache_mem"; - -// XXX: support storage using more than one page per entry - -MemStore::MemStore(): map(NULL), theCurrentSize(0) +static const char *MapLabel = "cache_mem_map"; +/// shared memory segment path to use for the free slices index +static const char *SpaceLabel = "cache_mem_space"; +// TODO: sync with Rock::SwapDir::*Path() + +// We store free slot IDs (i.e., "space") as Page objects so that we can use +// Ipc::Mem::PageStack. Pages require pool IDs. The value here is not really +// used except for a positivity test. A unique value is handy for debugging. +static const uint32_t SpacePoolId = 510716; + +MemStore::MemStore(): map(NULL), lastWritingSlice(-1) { } @@ -36,20 +44,26 @@ if (entryLimit <= 0) return; // no memory cache configured or a misconfiguration - const int64_t diskMaxSize = Store::Root().maxObjectSize(); - const int64_t memMaxSize = maxObjectSize(); - if (diskMaxSize == -1) { - debugs(20, DBG_IMPORTANT, "WARNING: disk-cache maximum object size " - "is unlimited but mem-cache maximum object size is " << - memMaxSize / 1024.0 << " KB"); - } else if (diskMaxSize > memMaxSize) { - debugs(20, DBG_IMPORTANT, "WARNING: disk-cache maximum object size " - "is too large for mem-cache: " << - diskMaxSize / 1024.0 << " KB > " << - memMaxSize / 1024.0 << " KB"); + // check compatibility with the disk cache, if any + if (Config.cacheSwap.n_configured > 0) { + const int64_t diskMaxSize = Store::Root().maxObjectSize(); + const int64_t memMaxSize = maxObjectSize(); + if (diskMaxSize == -1) { + debugs(20, DBG_IMPORTANT, "WARNING: disk-cache maximum object size " + "is unlimited but mem-cache maximum object size is " << + memMaxSize / 1024.0 << " KB"); + } else if (diskMaxSize > memMaxSize) { + debugs(20, DBG_IMPORTANT, "WARNING: disk-cache maximum object size " + "is too large for mem-cache: " << + diskMaxSize / 1024.0 << " KB > " << + memMaxSize / 1024.0 << " KB"); + } } - map = new MemStoreMap(ShmLabel); + freeSlots = shm_old(Ipc::Mem::PageStack)(SpaceLabel); + + Must(!map); + map = new MemStoreMap(MapLabel); map->cleaner = this; } @@ -71,7 +85,10 @@ { storeAppendPrintf(&e, "\n\nShared Memory Cache\n"); - storeAppendPrintf(&e, "Maximum Size: %.0f KB\n", Config.memMaxSize/1024.0); + storeAppendPrintf(&e, "Maximum Size: %.0f KB\n", maxSize()/1024.0); + storeAppendPrintf(&e, "Current Size: %.2f KB %.2f%%\n", + currentSize() / 1024.0, + Math::doublePercent(currentSize(), maxSize())); if (map) { const int limit = map->entryLimit(); @@ -80,6 +97,14 @@ storeAppendPrintf(&e, "Current entries: %" PRId64 " %.2f%%\n", currentCount(), (100.0 * currentCount() / limit)); + const unsigned int slotsFree = + Ipc::Mem::PagesAvailable(Ipc::Mem::PageId::cachePage); + if (slotsFree <= static_cast(limit)) { + const int usedSlots = limit - static_cast(slotsFree); + storeAppendPrintf(&e, "Used slots: %9d %.2f%%\n", + usedSlots, (100.0 * usedSlots / limit)); + } + if (limit < 100) { // XXX: otherwise too expensive to count Ipc::ReadWriteLockStats stats; map->updateStats(stats); @@ -103,13 +128,14 @@ uint64_t MemStore::maxSize() const { - return 0; // XXX: make configurable + return Config.memMaxSize; } uint64_t MemStore::currentSize() const { - return theCurrentSize; + return Ipc::Mem::PageLevel(Ipc::Mem::PageId::cachePage) * + Ipc::Mem::PageSize(); } uint64_t @@ -121,7 +147,7 @@ int64_t MemStore::maxObjectSize() const { - return Ipc::Mem::PageSize(); + return min(Config.Store.maxInMemObjSize, Config.memMaxSize); } void @@ -155,41 +181,22 @@ if (!map) return NULL; - // XXX: replace sfileno with a bigger word (sfileno is only for cache_dirs) sfileno index; - const Ipc::StoreMapSlot *const slot = map->openForReading(key, index); + const Ipc::StoreMapAnchor *const slot = map->openForReading(key, index); if (!slot) return NULL; - const Ipc::StoreMapSlot::Basics &basics = slot->basics; - const MemStoreMap::Extras &extras = map->extras(index); - // create a brand new store entry and initialize it with stored info StoreEntry *e = new StoreEntry(); - e->lock_count = 0; - - e->swap_file_sz = basics.swap_file_sz; - e->lastref = basics.lastref; - e->timestamp = basics.timestamp; - e->expires = basics.expires; - e->lastmod = basics.lastmod; - e->refcount = basics.refcount; - e->flags = basics.flags; - - e->store_status = STORE_OK; - e->mem_status = IN_MEMORY; // setMemStatus(IN_MEMORY) requires mem_obj - //e->swap_status = set in StoreEntry constructor to SWAPOUT_NONE; - e->ping_status = PING_NONE; - - EBIT_SET(e->flags, ENTRY_CACHABLE); - EBIT_CLR(e->flags, RELEASE_REQUEST); - EBIT_CLR(e->flags, KEY_PRIVATE); - EBIT_SET(e->flags, ENTRY_VALIDATED); - - const bool copied = copyFromShm(*e, extras); - - // we copied everything we could to local memory; no more need to lock - map->closeForReading(index); + + // XXX: We do not know the URLs yet, only the key, but we need to parse and + // store the response for the Root().get() callers to be happy because they + // expect IN_MEMORY entries to already have the response headers and body. + e->makeMemObject(); + + anchorEntry(*e, index, *slot); + + const bool copied = copyFromShm(*e, index, *slot); if (copied) { e->hashInsert(key); @@ -197,7 +204,7 @@ } debugs(20, 3, HERE << "mem-loading failed; freeing " << index); - map->free(index); // do not let others into the same trap + map->freeEntry(index); // do not let others into the same trap return NULL; } @@ -209,188 +216,522 @@ } bool -MemStore::copyFromShm(StoreEntry &e, const MemStoreMap::Extras &extras) -{ - const Ipc::Mem::PageId &page = extras.page; - - StoreIOBuffer sourceBuf(extras.storedSize, 0, - static_cast(PagePointer(page))); - - // XXX: We do not know the URLs yet, only the key, but we need to parse and - // store the response for the Root().get() callers to be happy because they - // expect IN_MEMORY entries to already have the response headers and body. - // At least one caller calls createMemObject() if there is not one, so - // we hide the true object until that happens (to avoid leaking TBD URLs). - e.createMemObject("TBD", "TBD"); +MemStore::anchorCollapsed(StoreEntry &collapsed, bool &inSync) +{ + if (!map) + return false; + + sfileno index; + const Ipc::StoreMapAnchor *const slot = map->openForReading( + reinterpret_cast(collapsed.key), index); + if (!slot) + return false; + + anchorEntry(collapsed, index, *slot); + inSync = updateCollapsedWith(collapsed, index, *slot); + return true; // even if inSync is false +} + +bool +MemStore::updateCollapsed(StoreEntry &collapsed) +{ + assert(collapsed.mem_obj); + + const sfileno index = collapsed.mem_obj->memCache.index; + + // already disconnected from the cache, no need to update + if (index < 0) + return true; + + if (!map) + return false; + + const Ipc::StoreMapAnchor &anchor = map->readableEntry(index); + return updateCollapsedWith(collapsed, index, anchor); +} + +/// updates collapsed entry after its anchor has been located +bool +MemStore::updateCollapsedWith(StoreEntry &collapsed, const sfileno index, const Ipc::StoreMapAnchor &anchor) +{ + collapsed.swap_file_sz = anchor.basics.swap_file_sz; + const bool copied = copyFromShm(collapsed, index, anchor); + return copied; +} + +/// anchors StoreEntry to an already locked map entry +void +MemStore::anchorEntry(StoreEntry &e, const sfileno index, const Ipc::StoreMapAnchor &anchor) +{ + const Ipc::StoreMapAnchor::Basics &basics = anchor.basics; + + e.swap_file_sz = basics.swap_file_sz; + e.lastref = basics.lastref; + e.timestamp = basics.timestamp; + e.expires = basics.expires; + e.lastmod = basics.lastmod; + e.refcount = basics.refcount; + e.flags = basics.flags; + + assert(e.mem_obj); + if (anchor.complete()) { + e.store_status = STORE_OK; + e.mem_obj->object_sz = e.swap_file_sz; + e.setMemStatus(IN_MEMORY); + } else { + e.store_status = STORE_PENDING; + assert(e.mem_obj->object_sz < 0); + e.setMemStatus(NOT_IN_MEMORY); + } + assert(e.swap_status == SWAPOUT_NONE); // set in StoreEntry constructor + e.ping_status = PING_NONE; + + EBIT_CLR(e.flags, RELEASE_REQUEST); + EBIT_CLR(e.flags, KEY_PRIVATE); + EBIT_SET(e.flags, ENTRY_VALIDATED); + + MemObject::MemCache &mc = e.mem_obj->memCache; + mc.index = index; + mc.io = MemObject::ioReading; +} + +/// copies the entire entry from shared to local memory +bool +MemStore::copyFromShm(StoreEntry &e, const sfileno index, const Ipc::StoreMapAnchor &anchor) +{ + debugs(20, 7, "mem-loading entry " << index << " from " << anchor.start); + assert(e.mem_obj); // emulate the usual Store code but w/o inapplicable checks and callbacks: - // from store_client::readBody(): + Ipc::StoreMapSliceId sid = anchor.start; // optimize: remember the last sid + bool wasEof = anchor.complete() && sid < 0; + int64_t sliceOffset = 0; + while (sid >= 0) { + const Ipc::StoreMapSlice &slice = map->readableSlice(index, sid); + // slice state may change during copying; take snapshots now + wasEof = anchor.complete() && slice.next < 0; + const Ipc::StoreMapSlice::Size wasSize = slice.size; + + debugs(20, 9, "entry " << index << " slice " << sid << " eof " << + wasEof << " wasSize " << wasSize << " <= " << + anchor.basics.swap_file_sz << " sliceOffset " << sliceOffset << + " mem.endOffset " << e.mem_obj->endOffset()); + + if (e.mem_obj->endOffset() < sliceOffset + wasSize) { + // size of the slice data that we already copied + const size_t prefixSize = e.mem_obj->endOffset() - sliceOffset; + assert(prefixSize <= wasSize); + + const MemStoreMap::Extras &extras = map->extras(sid); + char *page = static_cast(PagePointer(extras.page)); + const StoreIOBuffer sliceBuf(wasSize - prefixSize, + e.mem_obj->endOffset(), + page + prefixSize); + if (!copyFromShmSlice(e, sliceBuf, wasEof)) + return false; + debugs(20, 9, "entry " << index << " copied slice " << sid << + " from " << extras.page << " +" << prefixSize); + } + // else skip a [possibly incomplete] slice that we copied earlier + + // careful: the slice may have grown _and_ gotten the next slice ID! + if (slice.next >= 0) { + assert(!wasEof); + // here we know that slice.size may not change any more + if (wasSize >= slice.size) { // did not grow since we started copying + sliceOffset += wasSize; + sid = slice.next; + } + } else if (wasSize >= slice.size) { // did not grow + break; + } + } + + if (!wasEof) { + debugs(20, 7, "mem-loaded " << e.mem_obj->endOffset() << '/' << + anchor.basics.swap_file_sz << " bytes of " << e); + return true; + } + + debugs(20, 7, "mem-loaded all " << e.mem_obj->object_sz << '/' << + anchor.basics.swap_file_sz << " bytes of " << e); + + // from StoreEntry::complete() + e.mem_obj->object_sz = e.mem_obj->endOffset(); + e.store_status = STORE_OK; + e.setMemStatus(IN_MEMORY); + + assert(e.mem_obj->object_sz >= 0); + assert(static_cast(e.mem_obj->object_sz) == anchor.basics.swap_file_sz); + // would be nice to call validLength() here, but it needs e.key + + // we read the entire response into the local memory; no more need to lock + disconnect(e); + return true; +} + +/// imports one shared memory slice into local memory +bool +MemStore::copyFromShmSlice(StoreEntry &e, const StoreIOBuffer &buf, bool eof) +{ + debugs(20, 7, "buf: " << buf.offset << " + " << buf.length); + + // from store_client::readBody() + // parse headers if needed; they might span multiple slices! HttpReply *rep = (HttpReply *)e.getReply(); - const ssize_t end = headersEnd(sourceBuf.data, sourceBuf.length); - if (!rep->parseCharBuf(sourceBuf.data, end)) { - debugs(20, DBG_IMPORTANT, "Could not parse mem-cached headers: " << e); - return false; + if (rep->pstate < psParsed) { + // XXX: have to copy because httpMsgParseStep() requires 0-termination + MemBuf mb; + mb.init(buf.length+1, buf.length+1); + mb.append(buf.data, buf.length); + mb.terminate(); + const int result = rep->httpMsgParseStep(mb.buf, buf.length, eof); + if (result > 0) { + assert(rep->pstate == psParsed); + EBIT_CLR(e.flags, ENTRY_FWD_HDR_WAIT); + } else if (result < 0) { + debugs(20, DBG_IMPORTANT, "Corrupted mem-cached headers: " << e); + return false; + } else { // more slices are needed + assert(!eof); + } } - // local memory stores both headers and body - e.mem_obj->object_sz = sourceBuf.length; // from StoreEntry::complete() - - storeGetMemSpace(sourceBuf.length); // from StoreEntry::write() - - assert(e.mem_obj->data_hdr.write(sourceBuf)); // from MemObject::write() - const int64_t written = e.mem_obj->endOffset(); - // we should write all because StoreEntry::write() never fails - assert(written >= 0 && - static_cast(written) == sourceBuf.length); - // would be nice to call validLength() here, but it needs e.key - - debugs(20, 7, HERE << "mem-loaded all " << written << " bytes of " << e << - " from " << page); - - e.hideMemObject(); - + debugs(20, 7, "rep pstate: " << rep->pstate); + + // local memory stores both headers and body so copy regardless of pstate + const int64_t offBefore = e.mem_obj->endOffset(); + assert(e.mem_obj->data_hdr.write(buf)); // from MemObject::write() + const int64_t offAfter = e.mem_obj->endOffset(); + // expect to write the entire buf because StoreEntry::write() never fails + assert(offAfter >= 0 && offBefore <= offAfter && + static_cast(offAfter - offBefore) == buf.length); return true; } +/// whether we should cache the entry bool -MemStore::keepInLocalMemory(const StoreEntry &e) const +MemStore::shouldCache(const StoreEntry &e) const { + if (e.mem_status == IN_MEMORY) { + debugs(20, 5, "already loaded from mem-cache: " << e); + return false; + } + + if (e.mem_obj && e.mem_obj->memCache.offset > 0) { + debugs(20, 5, "already written to mem-cache: " << e); + return false; + } + if (!e.memoryCachable()) { debugs(20, 7, HERE << "Not memory cachable: " << e); return false; // will not cache due to entry state or properties } assert(e.mem_obj); - const int64_t loadedSize = e.mem_obj->endOffset(); + + if (e.mem_obj->vary_headers) { + // XXX: We must store/load SerialisedMetaData to cache Vary in RAM + debugs(20, 5, "Vary not yet supported: " << e.mem_obj->vary_headers); + return false; + } + const int64_t expectedSize = e.mem_obj->expectedReplySize(); // may be < 0 + + // objects of unknown size are not allowed into memory cache, for now + if (expectedSize < 0) { + debugs(20, 5, "Unknown expected size: " << e); + return false; + } + + const int64_t loadedSize = e.mem_obj->endOffset(); const int64_t ramSize = max(loadedSize, expectedSize); - if (ramSize > static_cast(Config.Store.maxInMemObjSize)) { + if (ramSize > maxObjectSize()) { debugs(20, 5, HERE << "Too big max(" << loadedSize << ", " << expectedSize << "): " << e); return false; // will not cache due to cachable entry size limits } - if (!willFit(ramSize)) { - debugs(20, 5, HERE << "Wont fit max(" << - loadedSize << ", " << expectedSize << "): " << e); - return false; // will not cache due to memory cache slot limit - } - - return true; -} - -void -MemStore::considerKeeping(StoreEntry &e) -{ - if (!keepInLocalMemory(e)) - return; - - // since we copy everything at once, we can only keep complete entries - if (e.store_status != STORE_OK) { - debugs(20, 7, HERE << "Incomplete: " << e); - return; - } - - assert(e.mem_obj); - - const int64_t loadedSize = e.mem_obj->endOffset(); - const int64_t expectedSize = e.mem_obj->expectedReplySize(); - - // objects of unknown size are not allowed into memory cache, for now - if (expectedSize < 0) { - debugs(20, 5, HERE << "Unknown expected size: " << e); - return; - } - - // since we copy everything at once, we can only keep fully loaded entries - if (loadedSize != expectedSize) { - debugs(20, 7, HERE << "partially loaded: " << loadedSize << " != " << - expectedSize); - return; - } - - if (e.mem_obj->vary_headers) { - // XXX: We must store/load SerialisedMetaData to cache Vary in RAM - debugs(20, 5, "Vary not yet supported: " << e.mem_obj->vary_headers); - return; - } - - keep(e); // may still fail -} - -bool -MemStore::willFit(int64_t need) const -{ - return need <= static_cast(Ipc::Mem::PageSize()); -} - -/// allocates map slot and calls copyToShm to store the entry in shared memory -void -MemStore::keep(StoreEntry &e) -{ if (!map) { debugs(20, 5, HERE << "No map to mem-cache " << e); - return; - } - + return false; + } + + if (EBIT_TEST(e.flags, ENTRY_SPECIAL)) { + debugs(20, 5, "Not mem-caching ENTRY_SPECIAL " << e); + return false; + } + + return true; +} + +/// locks map anchor and preps to store the entry in shared memory +bool +MemStore::startCaching(StoreEntry &e) +{ sfileno index = 0; - Ipc::StoreMapSlot *slot = map->openForWriting(reinterpret_cast(e.key), index); + Ipc::StoreMapAnchor *slot = map->openForWriting(reinterpret_cast(e.key), index); if (!slot) { debugs(20, 5, HERE << "No room in mem-cache map to index " << e); + return false; + } + + assert(e.mem_obj); + e.mem_obj->memCache.index = index; + e.mem_obj->memCache.io = MemObject::ioWriting; + slot->set(e); + map->startAppending(index); + return true; +} + +/// copies all local data to shared memory +void +MemStore::copyToShm(StoreEntry &e) +{ + // prevents remote readers from getting ENTRY_FWD_HDR_WAIT entries and + // not knowing when the wait is over + if (EBIT_TEST(e.flags, ENTRY_FWD_HDR_WAIT)) { + debugs(20, 5, "postponing copying " << e << " for ENTRY_FWD_HDR_WAIT"); return; } - MemStoreMap::Extras &extras = map->extras(index); - if (copyToShm(e, extras)) { - slot->set(e); - map->closeForWriting(index, false); - } else { - map->abortIo(index); - } + assert(map); + assert(e.mem_obj); + + const int32_t index = e.mem_obj->memCache.index; + assert(index >= 0); + Ipc::StoreMapAnchor &anchor = map->writeableEntry(index); + + const int64_t eSize = e.mem_obj->endOffset(); + if (e.mem_obj->memCache.offset >= eSize) { + debugs(20, 5, "postponing copying " << e << " for lack of news: " << + e.mem_obj->memCache.offset << " >= " << eSize); + return; // nothing to do (yet) + } + + if (anchor.start < 0) { // must allocate the very first slot for e + Ipc::Mem::PageId page; + anchor.start = reserveSapForWriting(page); // throws + map->extras(anchor.start).page = page; + } + + lastWritingSlice = anchor.start; + const size_t sliceCapacity = Ipc::Mem::PageSize(); + + // fill, skip slices that are already full + // Optimize: remember lastWritingSlice in e.mem_obj + while (e.mem_obj->memCache.offset < eSize) { + Ipc::StoreMap::Slice &slice = + map->writeableSlice(e.mem_obj->memCache.index, lastWritingSlice); + + if (slice.size >= sliceCapacity) { + if (slice.next >= 0) { + lastWritingSlice = slice.next; + continue; + } + + Ipc::Mem::PageId page; + slice.next = lastWritingSlice = reserveSapForWriting(page); + map->extras(lastWritingSlice).page = page; + debugs(20, 7, "entry " << index << " new slice: " << lastWritingSlice); + } + + copyToShmSlice(e, anchor); + } + + debugs(20, 7, "mem-cached available " << eSize << " bytes of " << e); } -/// uses mem_hdr::copy() to copy local data to shared memory -bool -MemStore::copyToShm(StoreEntry &e, MemStoreMap::Extras &extras) +/// copies at most one slice worth of local memory to shared memory +void +MemStore::copyToShmSlice(StoreEntry &e, Ipc::StoreMapAnchor &anchor) { - Ipc::Mem::PageId page; - if (!Ipc::Mem::GetPage(Ipc::Mem::PageId::cachePage, page)) { - debugs(20, 5, HERE << "No mem-cache page for " << e); - return false; // GetPage is responsible for any cleanup on failures - } + Ipc::StoreMap::Slice &slice = + map->writeableSlice(e.mem_obj->memCache.index, lastWritingSlice); + + Ipc::Mem::PageId page = map->extras(lastWritingSlice).page; + assert(lastWritingSlice >= 0 && page); + debugs(20, 7, "entry " << e << " slice " << lastWritingSlice << " has " << + page); const int64_t bufSize = Ipc::Mem::PageSize(); - const int64_t eSize = e.mem_obj->endOffset(); - - StoreIOBuffer sharedSpace(bufSize, 0, - static_cast(PagePointer(page))); + const int64_t sliceOffset = e.mem_obj->memCache.offset % bufSize; + StoreIOBuffer sharedSpace(bufSize - sliceOffset, e.mem_obj->memCache.offset, + static_cast(PagePointer(page)) + sliceOffset); // check that we kept everything or purge incomplete/sparse cached entry const ssize_t copied = e.mem_obj->data_hdr.copy(sharedSpace); - if (eSize != copied) { - debugs(20, 2, HERE << "Failed to mem-cache " << e << ": " << - eSize << "!=" << copied); - // cleanup - PutPage(page); - return false; - } - - debugs(20, 7, HERE << "mem-cached all " << eSize << " bytes of " << e << - " in " << page); - - theCurrentSize += Ipc::Mem::PageSize(); - // remember storage location and size - extras.page = page; - extras.storedSize = copied; - return true; -} - -void -MemStore::cleanReadable(const sfileno fileno) -{ - Ipc::Mem::PutPage(map->extras(fileno).page); - theCurrentSize -= Ipc::Mem::PageSize(); + if (copied <= 0) { + debugs(20, 2, "Failed to mem-cache " << (bufSize - sliceOffset) << + " bytes of " << e << " from " << e.mem_obj->memCache.offset << + " in " << page); + throw TexcHere("data_hdr.copy failure"); + } + + debugs(20, 7, "mem-cached " << copied << " bytes of " << e << + " from " << e.mem_obj->memCache.offset << " in " << page); + + slice.size += copied; + e.mem_obj->memCache.offset += copied; + anchor.basics.swap_file_sz = e.mem_obj->memCache.offset; +} + +/// finds a slot and a free page to fill or throws +sfileno +MemStore::reserveSapForWriting(Ipc::Mem::PageId &page) +{ + Ipc::Mem::PageId slot; + if (freeSlots->pop(slot)) { + debugs(20, 5, "got a previously free slot: " << slot); + + if (Ipc::Mem::GetPage(Ipc::Mem::PageId::cachePage, page)) { + debugs(20, 5, "and got a previously free page: " << page); + return slot.number - 1; + } else { + debugs(20, 3, "but there is no free page, returning " << slot); + freeSlots->push(slot); + } + } + + // catch free slots delivered to noteFreeMapSlice() + assert(!waitingFor); + waitingFor.slot = &slot; + waitingFor.page = &page; + if (map->purgeOne()) { + assert(!waitingFor); // noteFreeMapSlice() should have cleared it + assert(slot.set()); + assert(page.set()); + debugs(20, 5, "got previously busy " << slot << " and " << page); + return slot.number - 1; + } + assert(waitingFor.slot == &slot && waitingFor.page == &page); + waitingFor.slot = NULL; + waitingFor.page = NULL; + + debugs(47, 3, "cannot get a slice; entries: " << map->entryCount()); + throw TexcHere("ran out of mem-cache slots"); +} + +void +MemStore::noteFreeMapSlice(const sfileno sliceId) +{ + Ipc::Mem::PageId &pageId = map->extras(sliceId).page; + debugs(20, 9, "slice " << sliceId << " freed " << pageId); + assert(pageId); + Ipc::Mem::PageId slotId; + slotId.pool = SpacePoolId; + slotId.number = sliceId + 1; + if (!waitingFor) { + // must zero pageId before we give slice (and pageId extras!) to others + Ipc::Mem::PutPage(pageId); + freeSlots->push(slotId); + } else { + *waitingFor.slot = slotId; + *waitingFor.page = pageId; + waitingFor.slot = NULL; + waitingFor.page = NULL; + pageId = Ipc::Mem::PageId(); + } +} + +void +MemStore::write(StoreEntry &e) +{ + assert(e.mem_obj); + + debugs(20, 7, "entry " << e); + + switch (e.mem_obj->memCache.io) { + case MemObject::ioUndecided: + if (!shouldCache(e) || !startCaching(e)) { + e.mem_obj->memCache.io = MemObject::ioDone; + Store::Root().transientsAbandon(e); + return; + } + break; + + case MemObject::ioDone: + case MemObject::ioReading: + return; // we should not write in all of the above cases + + case MemObject::ioWriting: + break; // already decided to write and still writing + } + + try { + copyToShm(e); + if (e.store_status == STORE_OK) // done receiving new content + completeWriting(e); + else + CollapsedForwarding::Broadcast(e); + return; + } catch (const std::exception &x) { // TODO: should we catch ... as well? + debugs(20, 2, "mem-caching error writing entry " << e << ": " << x.what()); + // fall through to the error handling code + } + + disconnect(e); +} + +void +MemStore::completeWriting(StoreEntry &e) +{ + assert(e.mem_obj); + const int32_t index = e.mem_obj->memCache.index; + assert(index >= 0); + assert(map); + + debugs(20, 5, "mem-cached all " << e.mem_obj->memCache.offset << " bytes of " << e); + + e.mem_obj->memCache.index = -1; + e.mem_obj->memCache.io = MemObject::ioDone; + map->closeForWriting(index, false); + + CollapsedForwarding::Broadcast(e); // before we close our transient entry! + Store::Root().transientsCompleteWriting(e); +} + +void +MemStore::markForUnlink(StoreEntry &e) +{ + assert(e.mem_obj); + if (e.mem_obj->memCache.index >= 0) + map->freeEntry(e.mem_obj->memCache.index); +} + +void +MemStore::unlink(StoreEntry &e) +{ + if (e.mem_obj && e.mem_obj->memCache.index >= 0) { + map->freeEntry(e.mem_obj->memCache.index); + disconnect(e); + } else { + // the entry may have been loaded and then disconnected from the cache + map->freeEntryByKey(reinterpret_cast(e.key)); + } + + e.destroyMemObject(); // XXX: but it may contain useful info such as a client list. The old code used to do that though, right? +} + +void +MemStore::disconnect(StoreEntry &e) +{ + assert(e.mem_obj); + MemObject &mem_obj = *e.mem_obj; + if (mem_obj.memCache.index >= 0) { + if (mem_obj.memCache.io == MemObject::ioWriting) { + map->abortWriting(mem_obj.memCache.index); + mem_obj.memCache.index = -1; + mem_obj.memCache.io = MemObject::ioDone; + Store::Root().transientsAbandon(e); // broadcasts after the change + } else { + assert(mem_obj.memCache.io == MemObject::ioReading); + map->closeForReading(mem_obj.memCache.index); + mem_obj.memCache.index = -1; + mem_obj.memCache.io = MemObject::ioDone; + } + } } /// calculates maximum number of entries we need to store and map @@ -400,8 +741,8 @@ if (!Config.memShared || !Config.memMaxSize) return 0; // no memory cache configured - const int64_t entrySize = Ipc::Mem::PageSize(); // for now - const int64_t entryLimit = Config.memMaxSize / entrySize; + const int64_t minEntrySize = Ipc::Mem::PageSize(); + const int64_t entryLimit = Config.memMaxSize / minEntrySize; return entryLimit; } @@ -454,7 +795,7 @@ { public: /* RegisteredRunner API */ - MemStoreRr(): owner(NULL) {} + MemStoreRr(): spaceOwner(NULL), mapOwner(NULL) {} virtual void run(const RunnerRegistry &); virtual ~MemStoreRr(); @@ -462,7 +803,8 @@ virtual void create(const RunnerRegistry &); private: - MemStoreMap::Owner *owner; + Ipc::Mem::Owner *spaceOwner; ///< free slices Owner + MemStoreMap::Owner *mapOwner; ///< primary map Owner }; RunnerRegistrationEntry(rrAfterConfig, MemStoreRr); @@ -478,7 +820,6 @@ if (!Config.memShared) return; - Must(!owner); const int64_t entryLimit = MemStore::EntryLimit(); if (entryLimit <= 0) { if (Config.memMaxSize > 0) { @@ -488,10 +829,17 @@ } return; // no memory cache configured or a misconfiguration } - owner = MemStoreMap::Init(ShmLabel, entryLimit); + + Must(!spaceOwner); + spaceOwner = shm_new(Ipc::Mem::PageStack)(SpaceLabel, SpacePoolId, + entryLimit, + sizeof(Ipc::Mem::PageId)); + Must(!mapOwner); + mapOwner = MemStoreMap::Init(MapLabel, entryLimit); } MemStoreRr::~MemStoreRr() { - delete owner; + delete mapOwner; + delete spaceOwner; } === modified file 'src/MemStore.h' --- src/MemStore.h 2012-10-16 00:18:09 +0000 +++ src/MemStore.h 2013-12-31 18:49:41 +0000 @@ -2,13 +2,13 @@ #define SQUID_MEMSTORE_H #include "ipc/mem/Page.h" +#include "ipc/mem/PageStack.h" #include "ipc/StoreMap.h" #include "Store.h" // StoreEntry restoration info not already stored by Ipc::StoreMap struct MemStoreMapExtras { - Ipc::Mem::PageId page; ///< shared memory page with the entry content - int64_t storedSize; ///< total size of the stored entry content + Ipc::Mem::PageId page; ///< shared memory page with entry slice content }; typedef Ipc::StoreMapWithExtras MemStoreMap; @@ -20,12 +20,21 @@ MemStore(); virtual ~MemStore(); - /// cache the entry or forget about it until the next considerKeeping call - void considerKeeping(StoreEntry &e); - /// whether e should be kept in local RAM for possible future caching bool keepInLocalMemory(const StoreEntry &e) const; + /// copy non-shared entry data of the being-cached entry to our cache + void write(StoreEntry &e); + + /// all data has been received; there will be no more write() calls + void completeWriting(StoreEntry &e); + + /// remove from the cache + void unlink(StoreEntry &e); + + /// called when the entry is about to forget its association with mem cache + void disconnect(StoreEntry &e); + /* Store API */ virtual int callback(); virtual StoreEntry * get(const cache_key *); @@ -39,25 +48,50 @@ virtual void getStats(StoreInfoStats &stats) const; virtual void stat(StoreEntry &) const; virtual StoreSearch *search(String const url, HttpRequest *); + virtual void markForUnlink(StoreEntry &e); virtual void reference(StoreEntry &); virtual bool dereference(StoreEntry &, bool); virtual void maintain(); + virtual bool anchorCollapsed(StoreEntry &collapsed, bool &inSync); + virtual bool updateCollapsed(StoreEntry &collapsed); static int64_t EntryLimit(); protected: - bool willFit(int64_t needed) const; - void keep(StoreEntry &e); - - bool copyToShm(StoreEntry &e, MemStoreMap::Extras &extras); - bool copyFromShm(StoreEntry &e, const MemStoreMap::Extras &extras); + bool shouldCache(const StoreEntry &e) const; + bool startCaching(StoreEntry &e); + + void copyToShm(StoreEntry &e); + void copyToShmSlice(StoreEntry &e, Ipc::StoreMapAnchor &anchor); + bool copyFromShm(StoreEntry &e, const sfileno index, const Ipc::StoreMapAnchor &anchor); + bool copyFromShmSlice(StoreEntry &e, const StoreIOBuffer &buf, bool eof); + + void anchorEntry(StoreEntry &e, const sfileno index, const Ipc::StoreMapAnchor &anchor); + bool updateCollapsedWith(StoreEntry &collapsed, const sfileno index, const Ipc::StoreMapAnchor &anchor); + + sfileno reserveSapForWriting(Ipc::Mem::PageId &page); // Ipc::StoreMapCleaner API - virtual void cleanReadable(const sfileno fileno); + virtual void noteFreeMapSlice(const sfileno sliceId); private: + // TODO: move freeSlots into map + Ipc::Mem::Pointer freeSlots; ///< unused map slot IDs MemStoreMap *map; ///< index of mem-cached entries - uint64_t theCurrentSize; ///< currently used space in the storage area + + /// the last allocate slice for writing a store entry (during copyToShm) + sfileno lastWritingSlice; + + /// temporary storage for slot and page ID pointers; for the waiting cache + class SlotAndPage + { + public: + SlotAndPage(): slot(NULL), page(NULL) {} + bool operator !() const { return !slot && !page; } + Ipc::Mem::PageId *slot; ///< local slot variable, waiting to be filled + Ipc::Mem::PageId *page; ///< local page variable, waiting to be filled + }; + SlotAndPage waitingFor; ///< a cache for a single "hot" free slot and page }; // Why use Store as a base? MemStore and SwapDir are both "caches". === modified file 'src/Server.cc' --- src/Server.cc 2013-12-19 00:55:46 +0000 +++ src/Server.cc 2014-01-01 19:20:49 +0000 @@ -75,7 +75,7 @@ fwd = theFwdState; entry = fwd->entry; - entry->lock(); + entry->lock("ServerStateData"); request = fwd->request; HTTPMSGLOCK(request); @@ -90,7 +90,7 @@ assert(!adaptedBodySource); #endif - entry->unlock(); + entry->unlock("ServerStateData"); HTTPMSGUNLOCK(request); HTTPMSGUNLOCK(theVirginReply); @@ -175,7 +175,7 @@ // give entry the reply because haveParsedReplyHeaders() expects it there entry->replaceHttpReply(theFinalReply, false); // but do not write yet haveParsedReplyHeaders(); // update the entry/reply (e.g., set timestamps) - if (EBIT_TEST(entry->flags, ENTRY_CACHABLE) && blockCaching()) + if (!EBIT_TEST(entry->flags, RELEASE_REQUEST) && blockCaching()) entry->release(); entry->startWriting(); // write the updated entry to store === modified file 'src/SquidConfig.h' --- src/SquidConfig.h 2013-12-18 17:19:00 +0000 +++ src/SquidConfig.h 2014-01-01 19:20:49 +0000 @@ -330,6 +330,7 @@ int emailErrData; int httpd_suppress_version_string; int global_internal_static; + int collapsed_forwarding; #if FOLLOW_X_FORWARDED_FOR int acl_uses_indirect_client; === modified file 'src/Store.h' --- src/Store.h 2013-07-15 07:49:43 +0000 +++ src/Store.h 2013-12-19 04:53:35 +0000 @@ -80,7 +80,6 @@ virtual const char *getMD5Text() const; StoreEntry(); - StoreEntry(const char *url, const char *log_url); virtual ~StoreEntry(); virtual HttpReply const *getReply() const; @@ -119,8 +118,13 @@ int locked() const; int validToSend() const; bool memoryCachable() const; ///< may be cached in memory - void createMemObject(const char *, const char *); - void hideMemObject(); ///< no mem_obj for callers until createMemObject + + /// if needed, initialize mem_obj member w/o URI-related information + MemObject *makeMemObject(); + + /// initialize mem_obj member (if needed) and supply URI-related info + void createMemObject(const char *storeId, const char *logUri, const HttpRequestMethod &aMethod); + void dump(int debug_lvl) const; void hashDelete(); void hashInsert(const cache_key *); @@ -147,7 +151,6 @@ virtual RefCount store() const; MemObject *mem_obj; - MemObject *hidden_mem_obj; ///< mem_obj created before URLs were known RemovalPolicyNode repl; /* START OF ON-DISK STORE_META_STD TLV field */ time_t timestamp; @@ -164,8 +167,6 @@ sdirno swap_dirn:7; - unsigned short lock_count; /* Assume < 65536! */ - mem_status_t mem_status:3; ping_status_t ping_status:3; @@ -197,13 +198,23 @@ virtual void buffer(); /** flush any buffered content */ virtual void flush(); - /** reduce the memory lock count on the entry */ - virtual int unlock(); - /** increate the memory lock count on the entry */ virtual int64_t objectLen() const; virtual int64_t contentLen() const; - virtual void lock(); + /// claim shared ownership of this entry (for use in a given context) + /// matching lock() and unlock() contexts eases leak triage but is optional + void lock(const char *context); + + /// disclaim shared ownership; may remove entry from store and delete it + /// returns remaning lock level (zero for unlocked and possibly gone entry) + int unlock(const char *context); + + /// returns a local concurrent use counter, for debugging + int locks() const { return static_cast(lock_count); } + + /// update last reference timestamp and related Store metadata + void touch(); + virtual void release(); #if USE_ADAPTATION @@ -216,6 +227,8 @@ private: static MemAllocator *pool; + unsigned short lock_count; /* Assume < 65536! */ + #if USE_ADAPTATION /// producer callback registered with deferProducer AsyncCall::Pointer deferredProducer; @@ -348,6 +361,11 @@ virtual void maintain() = 0; /* perform regular maintenance should be private and self registered ... */ // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// informs stores that this entry will be eventually unlinked + virtual void markForUnlink(StoreEntry &e) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here // because test cases use non-StoreController derivatives as Root /// called when the entry is no longer needed by any transaction virtual void handleIdleEntry(StoreEntry &e) {} @@ -355,7 +373,54 @@ // XXX: This method belongs to Store::Root/StoreController, but it is here // because test cases use non-StoreController derivatives as Root /// called to get rid of no longer needed entry data in RAM, if any - virtual void maybeTrimMemory(StoreEntry &e, const bool preserveSwappable) {} + virtual void memoryOut(StoreEntry &e, const bool preserveSwappable) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// makes the entry available for collapsing future requests + virtual void allowCollapsing(StoreEntry *e, const RequestFlags &reqFlags, const HttpRequestMethod &reqMethod) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// marks the entry completed for collapsed requests + virtual void transientsCompleteWriting(StoreEntry &e) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// Update local intransit entry after changes made by appending worker. + virtual void syncCollapsed(const sfileno xitIndex) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// calls Root().transients->abandon() if transients are tracked + virtual void transientsAbandon(StoreEntry &e) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// number of the transient entry readers some time ago + virtual int transientReaders(const StoreEntry &e) const { return 0; } + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// disassociates the entry from the intransit table + virtual void transientsDisconnect(MemObject &mem_obj) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// removes the entry from the memory cache + virtual void memoryUnlink(StoreEntry &e) {} + + // XXX: This method belongs to Store::Root/StoreController, but it is here + // to avoid casting Root() to StoreController until Root() API is fixed. + /// disassociates the entry from the memory cache, preserving cached data + virtual void memoryDisconnect(StoreEntry &e) {} + + /// If the entry is not found, return false. Otherwise, return true after + /// tying the entry to this cache and setting inSync to updateCollapsed(). + virtual bool anchorCollapsed(StoreEntry &collapsed, bool &inSync) { return false; } + + /// update a local collapsed entry with fresh info from this cache (if any) + virtual bool updateCollapsed(StoreEntry &collapsed) { return false; } private: static RefCount CurrentRoot; @@ -383,9 +448,14 @@ StoreEntry *storeGetPublicByRequestMethod(HttpRequest * request, const HttpRequestMethod& method); /// \ingroup StoreAPI +/// Like storeCreatePureEntry(), but also locks the entry and sets entry key. StoreEntry *storeCreateEntry(const char *, const char *, const RequestFlags &, const HttpRequestMethod&); /// \ingroup StoreAPI +/// Creates a new StoreEntry with mem_obj and sets initial flags/states. +StoreEntry *storeCreatePureEntry(const char *storeId, const char *logUrl, const RequestFlags &, const HttpRequestMethod&); + +/// \ingroup StoreAPI void storeInit(void); /// \ingroup StoreAPI === modified file 'src/StoreClient.h' --- src/StoreClient.h 2013-06-27 15:58:46 +0000 +++ src/StoreClient.h 2013-08-15 22:09:07 +0000 @@ -96,6 +96,8 @@ StoreIOBuffer copyInto; private: + bool moreToSend() const; + void fileRead(); void scheduleDiskRead(); void scheduleMemRead(); === modified file 'src/StoreEntryStream.h' --- src/StoreEntryStream.h 2012-09-01 14:38:36 +0000 +++ src/StoreEntryStream.h 2013-06-27 21:26:57 +0000 @@ -48,13 +48,12 @@ public: StoreEntryStreamBuf(StoreEntry *anEntry) : theEntry(anEntry) { - - theEntry->lock(); + theEntry->lock("StoreEntryStreamBuf"); theEntry->buffer(); } ~StoreEntryStreamBuf() { - theEntry->unlock(); + theEntry->unlock("StoreEntryStreamBuf"); } protected: === modified file 'src/StoreIOState.h' --- src/StoreIOState.h 2013-01-20 21:25:52 +0000 +++ src/StoreIOState.h 2013-03-21 21:06:48 +0000 @@ -81,7 +81,11 @@ off_t offset() const; virtual void read_(char *buf, size_t size, off_t offset, STRCB * callback, void *callback_data) = 0; - virtual void write(char const *buf, size_t size, off_t offset, FREE * free_func) = 0; + /** write the given buffer and free it when it is no longer needed + * \param offset zero for the very first write and -1 for all other writes + * \retval false if write failed (callback has been or will be called) + */ + virtual bool write(char const *buf, size_t size, off_t offset, FREE * free_func) = 0; typedef enum { wroteAll, ///< success: caller supplied all data it wanted to swap out === modified file 'src/StoreMetaURL.cc' --- src/StoreMetaURL.cc 2013-10-25 00:13:46 +0000 +++ src/StoreMetaURL.cc 2013-12-06 23:52:26 +0000 @@ -41,12 +41,12 @@ { assert (getType() == STORE_META_URL); - if (!e->mem_obj->url) + if (!e->mem_obj->hasUris()) return true; - if (strcasecmp(e->mem_obj->url, (char *)value)) { + if (strcasecmp(e->mem_obj->urlXXX(), (char *)value)) { debugs(20, DBG_IMPORTANT, "storeClientReadHeader: URL mismatch"); - debugs(20, DBG_IMPORTANT, "\t{" << (char *) value << "} != {" << e->mem_obj->url << "}"); + debugs(20, DBG_IMPORTANT, "\t{" << (char *) value << "} != {" << e->mem_obj->urlXXX() << "}"); return false; } === modified file 'src/StoreSwapLogData.h' --- src/StoreSwapLogData.h 2013-09-30 12:30:50 +0000 +++ src/StoreSwapLogData.h 2013-12-06 23:52:26 +0000 @@ -152,8 +152,6 @@ /** * The last time that a client requested this object. - * Strictly speaking, this time is set whenever the StoreEntry - * is locked (via storeLockObject()). */ SwappedTime lastref; === modified file 'src/SwapDir.h' --- src/SwapDir.h 2013-02-09 05:16:04 +0000 +++ src/SwapDir.h 2013-07-02 19:23:49 +0000 @@ -37,6 +37,9 @@ /* forward decls */ class RemovalPolicy; class MemStore; +class Transients; +class RequestFlags; +class HttpRequestMethod; /* Store dir configuration routines */ /* SwapDir *sd, char *path ( + char *opt later when the strtok mess is gone) */ @@ -58,8 +61,17 @@ virtual void get(String const, STOREGETCLIENT, void * cbdata); /* Store parent API */ + virtual void markForUnlink(StoreEntry &e); virtual void handleIdleEntry(StoreEntry &e); - virtual void maybeTrimMemory(StoreEntry &e, const bool preserveSwappable); + virtual void transientsCompleteWriting(StoreEntry &e); + virtual void transientsAbandon(StoreEntry &e); + virtual int transientReaders(const StoreEntry &e) const; + virtual void transientsDisconnect(MemObject &mem_obj); + virtual void memoryOut(StoreEntry &e, const bool preserveSwappable); + virtual void memoryUnlink(StoreEntry &e); + virtual void memoryDisconnect(StoreEntry &e); + virtual void allowCollapsing(StoreEntry *e, const RequestFlags &reqFlags, const HttpRequestMethod &reqMethod); + virtual void syncCollapsed(const sfileno xitIndex); virtual void init(); @@ -91,10 +103,18 @@ private: void createOneStore(Store &aStore); + StoreEntry *find(const cache_key *key); bool keepForLocalMemoryCache(const StoreEntry &e) const; + bool anchorCollapsed(StoreEntry &collapsed, bool &inSync); + bool anchorCollapsedOnDisk(StoreEntry &collapsed, bool &inSync); StorePointer swapDir; ///< summary view of all disk caches MemStore *memStore; ///< memory cache + + /// A shared table of public store entries that do not know whether they + /// will belong to a memory cache, a disk cache, or will be uncachable + /// when the response header comes. Used for SMP collapsed forwarding. + Transients *transients; }; /* migrating from the Config based list of swapdirs */ === added file 'src/Transients.cc' --- src/Transients.cc 1970-01-01 00:00:00 +0000 +++ src/Transients.cc 2013-12-31 18:49:41 +0000 @@ -0,0 +1,425 @@ +/* + * DEBUG: section 20 Storage Manager + * + */ + +#include "squid.h" +#include "base/RunnersRegistry.h" +#include "CollapsedForwarding.h" +#include "HttpReply.h" +#include "ipc/mem/Page.h" +#include "ipc/mem/Pages.h" +#include "MemObject.h" +#include "mime_header.h" +#include "SquidConfig.h" +#include "SquidMath.h" +#include "StoreStats.h" +#include "tools.h" +#include "Transients.h" + +#if HAVE_LIMITS_H +#include +#endif + +/// shared memory segment path to use for Transients maps +static const char *MapLabel = "transients_map"; + +Transients::Transients(): map(NULL), locals(NULL) +{ +} + +Transients::~Transients() +{ + delete map; + delete locals; +} + +void +Transients::init() +{ + const int64_t entryLimit = EntryLimit(); + if (entryLimit <= 0) + return; // no SMP support or a misconfiguration + + Must(!map); + map = new TransientsMap(MapLabel); + map->cleaner = this; + + locals = new Locals(entryLimit, 0); +} + +void +Transients::getStats(StoreInfoStats &stats) const +{ +#if TRANSIENT_STATS_SUPPORTED + const size_t pageSize = Ipc::Mem::PageSize(); + + stats.mem.shared = true; + stats.mem.capacity = + Ipc::Mem::PageLimit(Ipc::Mem::PageId::cachePage) * pageSize; + stats.mem.size = + Ipc::Mem::PageLevel(Ipc::Mem::PageId::cachePage) * pageSize; + stats.mem.count = currentCount(); +#endif +} + +void +Transients::stat(StoreEntry &e) const +{ + storeAppendPrintf(&e, "\n\nTransient Objects\n"); + + storeAppendPrintf(&e, "Maximum Size: %.0f KB\n", maxSize()/1024.0); + storeAppendPrintf(&e, "Current Size: %.2f KB %.2f%%\n", + currentSize() / 1024.0, + Math::doublePercent(currentSize(), maxSize())); + + if (map) { + const int limit = map->entryLimit(); + storeAppendPrintf(&e, "Maximum entries: %9d\n", limit); + if (limit > 0) { + storeAppendPrintf(&e, "Current entries: %" PRId64 " %.2f%%\n", + currentCount(), (100.0 * currentCount() / limit)); + } + } +} + +void +Transients::maintain() +{ + // no lazy garbage collection needed +} + +uint64_t +Transients::minSize() const +{ + return 0; // XXX: irrelevant, but Store parent forces us to implement this +} + +uint64_t +Transients::maxSize() const +{ + // Squid currently does not limit the total size of all transient objects + return std::numeric_limits::max(); +} + +uint64_t +Transients::currentSize() const +{ + // TODO: we do not get enough information to calculate this + // StoreEntry should update associated stores when its size changes + return 0; +} + +uint64_t +Transients::currentCount() const +{ + return map ? map->entryCount() : 0; +} + +int64_t +Transients::maxObjectSize() const +{ + // Squid currently does not limit the size of a transient object + return std::numeric_limits::max(); +} + +void +Transients::reference(StoreEntry &) +{ + // no replacement policy (but the cache(s) storing the entry may have one) +} + +bool +Transients::dereference(StoreEntry &, bool) +{ + // no need to keep e in the global store_table for us; we have our own map + return false; +} + +int +Transients::callback() +{ + return 0; +} + +StoreSearch * +Transients::search(String const, HttpRequest *) +{ + fatal("not implemented"); + return NULL; +} + +StoreEntry * +Transients::get(const cache_key *key) +{ + if (!map) + return NULL; + + sfileno index; + const Ipc::StoreMapAnchor *anchor = map->openForReading(key, index); + if (!anchor) + return NULL; + + // If we already have a local entry, the store_table should have found it. + // Since it did not, the local entry key must have changed from public to + // private. We still need to keep the private entry around for syncing as + // its clients depend on it, but we should not allow new clients to join. + if (StoreEntry *oldE = locals->at(index)) { + debugs(20, 3, "not joining private " << *oldE); + assert(EBIT_TEST(oldE->flags, KEY_PRIVATE)); + } else if (StoreEntry *newE = copyFromShm(index)) { + return newE; // keep read lock to receive updates from others + } + + // private entry or loading failure + map->closeForReading(index); + return NULL; +} + +StoreEntry * +Transients::copyFromShm(const sfileno index) +{ + const TransientsMap::Extras &extras = map->extras(index); + + // create a brand new store entry and initialize it with stored info + StoreEntry *e = storeCreatePureEntry(extras.url, extras.url, + extras.reqFlags, extras.reqMethod); + + assert(e->mem_obj); + e->mem_obj->method = extras.reqMethod; + e->mem_obj->xitTable.io = MemObject::ioReading; + e->mem_obj->xitTable.index = index; + + e->setPublicKey(); + assert(e->key); + + // How do we know its SMP- and not just locally-collapsed? A worker gets + // locally-collapsed entries from the local store_table, not Transients. + // TODO: Can we remove smpCollapsed by not syncing non-transient entries? + e->mem_obj->smpCollapsed = true; + + assert(!locals->at(index)); + // We do not lock e because we do not want to prevent its destruction; + // e is tied to us via mem_obj so we will know when it is destructed. + locals->at(index) = e; + return e; +} + +void +Transients::get(String const key, STOREGETCLIENT aCallback, void *aCallbackData) +{ + // XXX: not needed but Store parent forces us to implement this + fatal("Transients::get(key,callback,data) should not be called"); +} + +StoreEntry * +Transients::findCollapsed(const sfileno index) +{ + if (!map) + return NULL; + + if (StoreEntry *oldE = locals->at(index)) { + debugs(20, 5, "found " << *oldE << " at " << index << " in " << MapLabel); + assert(oldE->mem_obj && oldE->mem_obj->xitTable.index == index); + return oldE; + } + + debugs(20, 3, "no entry at " << index << " in " << MapLabel); + return NULL; +} + +void +Transients::startWriting(StoreEntry *e, const RequestFlags &reqFlags, + const HttpRequestMethod &reqMethod) +{ + assert(e); + assert(e->mem_obj); + assert(e->mem_obj->xitTable.index < 0); + + if (!map) { + debugs(20, 5, "No map to add " << *e); + return; + } + + sfileno index = 0; + Ipc::StoreMapAnchor *slot = map->openForWriting(reinterpret_cast(e->key), index); + if (!slot) { + debugs(20, 5, "collision registering " << *e); + return; + } + + try { + if (copyToShm(*e, index, reqFlags, reqMethod)) { + slot->set(*e); + e->mem_obj->xitTable.io = MemObject::ioWriting; + e->mem_obj->xitTable.index = index; + map->startAppending(index); + // keep write lock -- we will be supplying others with updates + return; + } + // fall through to the error handling code + } catch (const std::exception &x) { // TODO: should we catch ... as well? + debugs(20, 2, "error keeping entry " << index << + ' ' << *e << ": " << x.what()); + // fall through to the error handling code + } + + map->abortWriting(index); +} + +/// copies all relevant local data to shared memory +bool +Transients::copyToShm(const StoreEntry &e, const sfileno index, + const RequestFlags &reqFlags, + const HttpRequestMethod &reqMethod) +{ + TransientsMap::Extras &extras = map->extras(index); + + const char *url = e.url(); + const size_t urlLen = strlen(url); + Must(urlLen < sizeof(extras.url)); // we have space to store it all, plus 0 + strncpy(extras.url, url, sizeof(extras.url)); + extras.url[urlLen] = '\0'; + + extras.reqFlags = reqFlags; + + Must(reqMethod != Http::METHOD_OTHER); + extras.reqMethod = reqMethod.id(); + + return true; +} + +void +Transients::noteFreeMapSlice(const sfileno sliceId) +{ + // TODO: we should probably find the entry being deleted and abort it +} + +void +Transients::abandon(const StoreEntry &e) +{ + assert(e.mem_obj && map); + map->freeEntry(e.mem_obj->xitTable.index); // just marks the locked entry + CollapsedForwarding::Broadcast(e); + // We do not unlock the entry now because the problem is most likely with + // the server resource rather than a specific cache writer, so we want to + // prevent other readers from collapsing requests for that resource. +} + +bool +Transients::abandoned(const StoreEntry &e) const +{ + assert(e.mem_obj); + return abandonedAt(e.mem_obj->xitTable.index); +} + +/// whether an in-transit entry at the index is now abandoned by its writer +bool +Transients::abandonedAt(const sfileno index) const +{ + assert(map); + return map->readableEntry(index).waitingToBeFreed; +} + +void +Transients::completeWriting(const StoreEntry &e) +{ + if (e.mem_obj && e.mem_obj->xitTable.index >= 0) { + assert(e.mem_obj->xitTable.io == MemObject::ioWriting); + // there will be no more updates from us after this, so we must prevent + // future readers from joining + map->freeEntry(e.mem_obj->xitTable.index); // just marks the locked entry + map->closeForWriting(e.mem_obj->xitTable.index); + e.mem_obj->xitTable.index = -1; + e.mem_obj->xitTable.io = MemObject::ioDone; + } +} + +int +Transients::readers(const StoreEntry &e) const +{ + if (e.mem_obj && e.mem_obj->xitTable.index >= 0) { + assert(map); + return map->peekAtEntry(e.mem_obj->xitTable.index).lock.readers; + } + return 0; +} + +void +Transients::markForUnlink(StoreEntry &e) +{ + if (e.mem_obj && e.mem_obj->xitTable.io == MemObject::ioWriting) + abandon(e); +} + +void +Transients::disconnect(MemObject &mem_obj) +{ + if (mem_obj.xitTable.index >= 0) { + assert(map); + if (mem_obj.xitTable.io == MemObject::ioWriting) { + map->abortWriting(mem_obj.xitTable.index); + } else { + assert(mem_obj.xitTable.io == MemObject::ioReading); + map->closeForReading(mem_obj.xitTable.index); + } + locals->at(mem_obj.xitTable.index) = NULL; + mem_obj.xitTable.index = -1; + mem_obj.xitTable.io = MemObject::ioDone; + } +} + +/// calculates maximum number of entries we need to store and map +int64_t +Transients::EntryLimit() +{ + // TODO: we should also check whether any SMP-aware caching is configured + if (!UsingSmp() || !Config.onoff.collapsed_forwarding) + return 0; // no SMP collapsed forwarding possible or needed + + return 16*1024; // TODO: make configurable? +} + +/// initializes shared memory segment used by Transients +class TransientsRr: public Ipc::Mem::RegisteredRunner +{ +public: + /* RegisteredRunner API */ + TransientsRr(): mapOwner(NULL) {} + virtual void run(const RunnerRegistry &); + virtual ~TransientsRr(); + +protected: + virtual void create(const RunnerRegistry &); + +private: + TransientsMap::Owner *mapOwner; +}; + +RunnerRegistrationEntry(rrAfterConfig, TransientsRr); + +void +TransientsRr::run(const RunnerRegistry &r) +{ + assert(Config.memShared.configured()); + Ipc::Mem::RegisteredRunner::run(r); +} + +void +TransientsRr::create(const RunnerRegistry &) +{ + if (!Config.onoff.collapsed_forwarding) + return; + + const int64_t entryLimit = Transients::EntryLimit(); + if (entryLimit <= 0) + return; // no SMP configured or a misconfiguration + + Must(!mapOwner); + mapOwner = TransientsMap::Init(MapLabel, entryLimit); +} + +TransientsRr::~TransientsRr() +{ + delete mapOwner; +} === added file 'src/Transients.h' --- src/Transients.h 1970-01-01 00:00:00 +0000 +++ src/Transients.h 2013-12-31 18:09:24 +0000 @@ -0,0 +1,89 @@ +#ifndef SQUID_TRANSIENTS_H +#define SQUID_TRANSIENTS_H + +#include "http/MethodType.h" +#include "ipc/mem/Page.h" +#include "ipc/mem/PageStack.h" +#include "ipc/StoreMap.h" +#include "Store.h" +#include + +// StoreEntry restoration info not already stored by Ipc::StoreMap +struct TransientsMapExtras { + char url[MAX_URL+1]; ///< Request-URI; TODO: decrease MAX_URL by one + RequestFlags reqFlags; ///< request flags + Http::MethodType reqMethod; ///< request method; extensions are not supported +}; +typedef Ipc::StoreMapWithExtras TransientsMap; + +/// Keeps track of store entries being delivered to clients that arrived before +/// those entries were [fully] cached. This shared table is necessary to sync +/// the entry-writing worker with entry-reading worker(s). +class Transients: public Store, public Ipc::StoreMapCleaner +{ +public: + Transients(); + virtual ~Transients(); + + /// return a local, previously collapsed entry + StoreEntry *findCollapsed(const sfileno xitIndex); + + /// add an in-transit entry suitable for collapsing future requests + void startWriting(StoreEntry *e, const RequestFlags &reqFlags, const HttpRequestMethod &reqMethod); + + /// called when the in-transit entry has been successfully cached + void completeWriting(const StoreEntry &e); + + /// the calling entry writer no longer expects to cache this entry + void abandon(const StoreEntry &e); + + /// whether an in-transit entry is now abandoned by its writer + bool abandoned(const StoreEntry &e) const; + + /// number of entry readers some time ago + int readers(const StoreEntry &e) const; + + /// the caller is done writing or reading this entry + void disconnect(MemObject &mem_obj); + + /* Store API */ + virtual int callback(); + virtual StoreEntry * get(const cache_key *); + virtual void get(String const key , STOREGETCLIENT callback, void *cbdata); + virtual void init(); + virtual uint64_t maxSize() const; + virtual uint64_t minSize() const; + virtual uint64_t currentSize() const; + virtual uint64_t currentCount() const; + virtual int64_t maxObjectSize() const; + virtual void getStats(StoreInfoStats &stats) const; + virtual void stat(StoreEntry &) const; + virtual StoreSearch *search(String const url, HttpRequest *); + virtual void reference(StoreEntry &); + virtual bool dereference(StoreEntry &, bool); + virtual void markForUnlink(StoreEntry &e); + virtual void maintain(); + + static int64_t EntryLimit(); + +protected: + StoreEntry *copyFromShm(const sfileno index); + bool copyToShm(const StoreEntry &e, const sfileno index, const RequestFlags &reqFlags, const HttpRequestMethod &reqMethod); + + bool abandonedAt(const sfileno index) const; + + // Ipc::StoreMapCleaner API + virtual void noteFreeMapSlice(const sfileno sliceId); + +private: + /// shared packed info indexed by Store keys, for creating new StoreEntries + TransientsMap *map; + + typedef std::vector Locals; + /// local collapsed entries indexed by transient ID, for syncing old StoreEntries + Locals *locals; +}; + +// TODO: Why use Store as a base? We are not really a cache. + +#endif /* SQUID_TRANSIENTS_H */ === modified file 'src/acl/Asn.cc' --- src/acl/Asn.cc 2013-10-25 19:07:30 +0000 +++ src/acl/Asn.cc 2013-12-06 23:52:26 +0000 @@ -125,7 +125,7 @@ { debugs(53, 3, entry->url()); storeUnregister(sc, entry, this); - entry->unlock(); + entry->unlock("~ASState"); } /** entry into the radix tree */ @@ -273,7 +273,7 @@ asState->sc = storeClientListAdd(e, asState); FwdState::fwdStart(Comm::ConnectionPointer(), e, asState->request.getRaw()); } else { - e->lock(); + e->lock("Asn"); asState->sc = storeClientListAdd(e, asState); } === modified file 'src/cf.data.pre' --- src/cf.data.pre 2013-12-18 17:19:00 +0000 +++ src/cf.data.pre 2014-01-01 19:20:49 +0000 @@ -130,12 +130,6 @@ This option is not yet supported by Squid-3. DOC_END -NAME: collapsed_forwarding -TYPE: obsolete -DOC_START - This option is not yet supported by Squid-3. see http://bugs.squid-cache.org/show_bug.cgi?id=3495 -DOC_END - NAME: error_map TYPE: obsolete DOC_START @@ -3438,13 +3432,11 @@ ==== The rock store type ==== Usage: - cache_dir rock Directory-Name Mbytes [options] + cache_dir rock Directory-Name Mbytes [options] The Rock Store type is a database-style storage. All cached - entries are stored in a "database" file, using fixed-size slots, - one entry per slot. The database size is specified in MB. The - slot size is specified in bytes using the max-size option. See - below for more info on the max-size option. + entries are stored in a "database" file, using fixed-size slots. + A single entry occupies one or more slots. If possible, Squid using Rock Store creates a dedicated kid process called "disker" to avoid blocking Squid worker(s) on disk @@ -3475,6 +3467,16 @@ and when set to zero, disables the disk I/O rate limit enforcement. Currently supported by IpcIo module only. + slot-size=bytes: The size of a database "record" used for + storing cached responses. A cached response occupies at least + one slot and all database I/O is done using individual slots so + increasing this parameter leads to more disk space waste while + decreasing it leads to more disk I/O overheads. Should be a + multiple of your operating system I/O page size. Defaults to + 16KBytes. A housekeeping header is stored with each slot and + smaller slot-sizes will be rejected. The header is smaller than + 100 bytes. + ==== COMMON OPTIONS ==== @@ -5702,6 +5704,25 @@ or response to be rejected. DOC_END +NAME: collapsed_forwarding +COMMENT: (on|off) +TYPE: onoff +LOC: Config.onoff.collapsed_forwarding +DEFAULT: off +DOC_START + This option controls whether Squid is allowed to merge multiple + potentially cachable requests for the same URI before Squid knows + whether the response is going to be cachable. + + This feature is disabled by default: Enabling collapsed forwarding + needlessly delays forwarding requests that look cachable (when they are + collapsed) but then need to be forwarded individually anyway because + they end up being for uncachable content. However, in some cases, such + as accelleration of highly cachable content with periodic or groupped + expiration times, the gains from collapsing [large volumes of + simultenous refresh requests] outweigh losses from such delays. +DOC_END + COMMENT_START TIMEOUTS ----------------------------------------------------------------------------- === modified file 'src/client_side.cc' --- src/client_side.cc 2013-12-06 14:59:47 +0000 +++ src/client_side.cc 2013-12-06 23:52:26 +0000 @@ -4262,7 +4262,7 @@ if (vary) { /* Oops... something odd is going on here.. */ debugs(33, DBG_IMPORTANT, "varyEvaluateMatch: Oops. Not a Vary object on second attempt, '" << - entry->mem_obj->url << "' '" << vary << "'"); + entry->mem_obj->urlXXX() << "' '" << vary << "'"); safe_free(request->vary_headers); return VARY_CANCEL; } @@ -4304,7 +4304,7 @@ * found the requested variant. Bail out */ debugs(33, DBG_IMPORTANT, "varyEvaluateMatch: Oops. Not a Vary match on second attempt, '" << - entry->mem_obj->url << "' '" << vary << "'"); + entry->mem_obj->urlXXX() << "' '" << vary << "'"); return VARY_CANCEL; } } === modified file 'src/client_side_reply.cc' --- src/client_side_reply.cc 2013-12-19 13:47:27 +0000 +++ src/client_side_reply.cc 2014-01-01 19:20:49 +0000 @@ -140,7 +140,7 @@ void clientReplyContext::setReplyToStoreEntry(StoreEntry *entry) { - entry->lock(); // removeClientStoreReference() unlocks + entry->lock("clientReplyContext::setReplyToStoreEntry"); // removeClientStoreReference() unlocks sc = storeClientListAdd(entry, this); #if USE_DELAY_POOLS sc->setDelayId(DelayId::DelayClient(http)); @@ -162,7 +162,7 @@ *ep = NULL; storeUnregister(sc_tmp, e, this); *scp = NULL; - e->unlock(); + e->unlock("clientReplyContext::removeStoreReference"); } } @@ -487,8 +487,8 @@ */ assert(http->logType == LOG_TCP_HIT); - if (strcmp(e->mem_obj->url, http->request->storeId()) != 0) { - debugs(33, DBG_IMPORTANT, "clientProcessHit: URL mismatch, '" << e->mem_obj->url << "' != '" << http->request->storeId() << "'"); + if (strcmp(e->mem_obj->storeId(), http->request->storeId()) != 0) { + debugs(33, DBG_IMPORTANT, "clientProcessHit: URL mismatch, '" << e->mem_obj->storeId() << "' != '" << http->request->storeId() << "'"); http->logType = LOG_TCP_MISS; // we lack a more precise LOG_*_MISS code processMiss(); return; @@ -823,7 +823,7 @@ for (HttpRequestMethod m(Http::METHOD_NONE); m != Http::METHOD_ENUM_END; ++m) { if (m.respMaybeCacheable()) { if (StoreEntry *entry = storeGetPublic(url, m)) { - debugs(88, 5, "purging " << RequestMethodStr(m) << ' ' << url); + debugs(88, 5, "purging " << *entry << ' ' << RequestMethodStr(m) << ' ' << url); #if USE_HTCP neighborsHtcpClear(entry, url, req, m, HTCP_CLR_INVALIDATION); if (m == Http::METHOD_GET || m == Http::METHOD_HEAD) { @@ -893,17 +893,16 @@ ErrorState *err = clientBuildError(ERR_ACCESS_DENIED, Http::scForbidden, NULL, http->getConn()->clientConnection->remote, http->request); startError(err); - return; + return; // XXX: leaking unused entry if some store does not keep it } StoreIOBuffer localTempBuffer; /* Swap in the metadata */ http->storeEntry(entry); - http->storeEntry()->lock(); - http->storeEntry()->createMemObject(storeId(), http->log_uri); - - http->storeEntry()->mem_obj->method = http->request->method; + http->storeEntry()->lock("clientReplyContext::purgeFoundObject"); + http->storeEntry()->createMemObject(storeId(), http->log_uri, + http->request->method); sc = storeClientListAdd(http->storeEntry(), this); @@ -1204,7 +1203,7 @@ } if ((done = checkTransferDone()) != 0 || flags.complete) { - debugs(88, 5, "clientReplyStatus: transfer is DONE"); + debugs(88, 5, "clientReplyStatus: transfer is DONE: " << done << flags.complete); /* Ok we're finished, but how? */ const int64_t expectedBodySize = @@ -1224,13 +1223,9 @@ return STREAM_UNPLANNED_COMPLETE; } - if (http->request->flags.proxyKeepalive) { - debugs(88, 5, "clientReplyStatus: stream complete and can keepalive"); - return STREAM_COMPLETE; - } - - debugs(88, 5, "clientReplyStatus: stream was not expected to complete!"); - return STREAM_UNPLANNED_COMPLETE; + debugs(88, 5, "clientReplyStatus: stream complete; keepalive=" << + http->request->flags.proxyKeepalive); + return STREAM_COMPLETE; } // XXX: Should this be checked earlier? We could return above w/o checking. @@ -1566,6 +1561,23 @@ buildReplyHeader(); } +/// Safely disposes of an entry pointing to a cache hit that we do not want. +/// We cannot just ignore the entry because it may be locking or otherwise +/// holding an associated cache resource of some sort. +void +clientReplyContext::forgetHit() +{ + StoreEntry *e = http->storeEntry(); + assert(e); // or we are not dealing with a hit + // We probably have not locked the entry earlier, unfortunately. We lock it + // now so that we can unlock two lines later (and trigger cleanup). + // Ideally, ClientHttpRequest::storeEntry() should lock/unlock, but it is + // used so inconsistently that simply adding locking there leads to bugs. + e->lock("clientReplyContext::forgetHit"); + http->storeEntry(NULL); + e->unlock("clientReplyContext::forgetHit"); // may delete e +} + void clientReplyContext::identifyStoreObject() { @@ -1611,7 +1623,7 @@ if (NULL == http->storeEntry()) { /** \li If no StoreEntry object is current assume this object isn't in the cache set MISS*/ - debugs(85, 3, "clientProcessRequest2: StoreEntry is NULL - MISS"); + debugs(85, 3, "StoreEntry is NULL - MISS"); http->logType = LOG_TCP_MISS; doGetMoreData(); return; @@ -1619,7 +1631,7 @@ if (Config.onoff.offline) { /** \li If we are running in offline mode set to HIT */ - debugs(85, 3, "clientProcessRequest2: offline HIT"); + debugs(85, 3, "offline HIT " << *e); http->logType = LOG_TCP_HIT; doGetMoreData(); return; @@ -1627,16 +1639,16 @@ if (http->redirect.status) { /** \li If redirection status is True force this to be a MISS */ - debugs(85, 3, HERE << "REDIRECT status forced StoreEntry to NULL (no body on 3XX responses)"); - http->storeEntry(NULL); + debugs(85, 3, "REDIRECT status forced StoreEntry to NULL (no body on 3XX responses) " << *e); + forgetHit(); http->logType = LOG_TCP_REDIRECT; doGetMoreData(); return; } if (!e->validToSend()) { - debugs(85, 3, "clientProcessRequest2: !storeEntryValidToSend MISS" ); - http->storeEntry(NULL); + debugs(85, 3, "!storeEntryValidToSend MISS " << *e); + forgetHit(); http->logType = LOG_TCP_MISS; doGetMoreData(); return; @@ -1644,21 +1656,21 @@ if (EBIT_TEST(e->flags, ENTRY_SPECIAL)) { /* \li Special entries are always hits, no matter what the client says */ - debugs(85, 3, "clientProcessRequest2: ENTRY_SPECIAL HIT"); + debugs(85, 3, "ENTRY_SPECIAL HIT " << *e); http->logType = LOG_TCP_HIT; doGetMoreData(); return; } if (r->flags.noCache) { - debugs(85, 3, "clientProcessRequest2: no-cache REFRESH MISS"); - http->storeEntry(NULL); + debugs(85, 3, "no-cache REFRESH MISS " << *e); + forgetHit(); http->logType = LOG_TCP_CLIENT_REFRESH_MISS; doGetMoreData(); return; } - debugs(85, 3, "clientProcessRequest2: default HIT"); + debugs(85, 3, "default HIT " << *e); http->logType = LOG_TCP_HIT; doGetMoreData(); } @@ -1730,9 +1742,10 @@ /* someone found the object in the cache for us */ StoreIOBuffer localTempBuffer; - http->storeEntry()->lock(); + http->storeEntry()->lock("clientReplyContext::doGetMoreData"); - if (http->storeEntry()->mem_obj == NULL) { + MemObject *mem_obj = http->storeEntry()->makeMemObject(); + if (!mem_obj->hasUris()) { /* * This if-block exists because we don't want to clobber * a preexiting mem_obj->method value if the mem_obj @@ -1740,13 +1753,12 @@ * is a cache hit for a GET response, we want to keep * the method as GET. */ - http->storeEntry()->createMemObject(storeId(), http->log_uri); - http->storeEntry()->mem_obj->method = http->request->method; + mem_obj->setUris(storeId(), http->log_uri, http->request->method); /** * Here we can see if the object was * created using URL or alternative StoreID from helper. */ - debugs(88, 3, "mem_obj->url: " << http->storeEntry()->mem_obj->url); + debugs(88, 3, "storeId: " << http->storeEntry()->mem_obj->storeId()); } sc = storeClientListAdd(http->storeEntry(), this); @@ -2182,6 +2194,16 @@ StoreEntry *e = storeCreateEntry(storeId(), http->log_uri, reqFlags, m); + // Make entry collapsable ASAP, to increase collapsing chances for others, + // TODO: every must-revalidate and similar request MUST reach the origin, + // but do we have to prohibit others from collapsing on that request? + if (Config.onoff.collapsed_forwarding && reqFlags.cachable && + !reqFlags.needValidation && + (m == Http::METHOD_GET || m == Http::METHOD_HEAD)) { + // make the entry available for future requests now + Store::Root().allowCollapsing(e, reqFlags, m); + } + sc = storeClientListAdd(e, this); #if USE_DELAY_POOLS === modified file 'src/client_side_reply.h' --- src/client_side_reply.h 2013-12-18 17:19:00 +0000 +++ src/client_side_reply.h 2014-01-01 19:20:49 +0000 @@ -140,6 +140,7 @@ void triggerInitialStoreRead(); void sendClientOldEntry(); void purgeAllCached(); + void forgetHit(); bool blockedHit() const; void sendBodyTooLargeError(); === modified file 'src/client_side_request.cc' --- src/client_side_request.cc 2013-12-06 14:59:47 +0000 +++ src/client_side_request.cc 2013-12-06 23:52:26 +0000 @@ -1636,12 +1636,12 @@ ClientHttpRequest::loggingEntry(StoreEntry *newEntry) { if (loggingEntry_) - loggingEntry_->unlock(); + loggingEntry_->unlock("ClientHttpRequest::loggingEntry"); loggingEntry_ = newEntry; if (loggingEntry_) - loggingEntry_->lock(); + loggingEntry_->lock("ClientHttpRequest::loggingEntry"); } /* @@ -1809,7 +1809,7 @@ errorAppendEntry(e, calloutContext->error); calloutContext->error = NULL; getConn()->setServerBump(srvBump); - e->unlock(); + e->unlock("ClientHttpRequest::doCallouts+sslBumpNeeded"); } else #endif { @@ -1824,7 +1824,7 @@ getConn()->flags.readMore = true; // resume any pipeline reads. node = (clientStreamNode *)client_stream.tail->data; clientStreamRead(node, this, node->readBuffer); - e->unlock(); + e->unlock("ClientHttpRequest::doCallouts-sslBumpNeeded"); return; } } === modified file 'src/enums.h' --- src/enums.h 2013-10-01 23:21:17 +0000 +++ src/enums.h 2013-12-06 23:52:26 +0000 @@ -122,7 +122,7 @@ DELAY_SENDING, RELEASE_REQUEST, REFRESH_REQUEST, - ENTRY_CACHABLE, + ENTRY_CACHABLE_RESERVED_FOR_FUTURE_USE, ENTRY_DISPATCHED, KEY_PRIVATE, ENTRY_FWD_HDR_WAIT, === modified file 'src/errorpage.cc' --- src/errorpage.cc 2013-11-23 00:58:42 +0000 +++ src/errorpage.cc 2013-12-06 23:52:26 +0000 @@ -634,15 +634,14 @@ } } - entry->lock(); + entry->lock("errorAppendEntry"); entry->buffer(); entry->replaceHttpReply( err->BuildHttpReply() ); - EBIT_CLR(entry->flags, ENTRY_FWD_HDR_WAIT); entry->flush(); entry->complete(); entry->negativeCache(); entry->releaseRequest(); - entry->unlock(); + entry->unlock("errorAppendEntry"); delete err; } === modified file 'src/format/Format.cc' --- src/format/Format.cc 2013-12-09 16:54:56 +0000 +++ src/format/Format.cc 2014-01-01 19:20:49 +0000 @@ -500,7 +500,7 @@ snprintf(tmp, sizeof(tmp), "%0*" PRId64 ".%0*d", fmt->zero && (fmt->widthMin - precision - 1 >= 0) ? fmt->widthMin - precision - 1 : 0, static_cast(al->cache.start_time.tv_sec), precision, (int)(al->cache.start_time.tv_usec / fmt->divisor)); out = tmp; } - break; + break; case LFT_TIME_TO_HANDLE_REQUEST: outint = al->cache.msec; === modified file 'src/fs/Makefile.am' --- src/fs/Makefile.am 2013-09-30 12:30:50 +0000 +++ src/fs/Makefile.am 2013-12-06 23:52:26 +0000 @@ -28,7 +28,9 @@ ufs/RebuildState.cc librock_la_SOURCES = \ + rock/RockDbCell.cc \ rock/RockDbCell.h \ + rock/RockForward.h \ rock/RockIoState.cc \ rock/RockIoState.h \ rock/RockIoRequests.cc \ === added file 'src/fs/rock/RockDbCell.cc' --- src/fs/rock/RockDbCell.cc 1970-01-01 00:00:00 +0000 +++ src/fs/rock/RockDbCell.cc 2013-01-07 19:41:41 +0000 @@ -0,0 +1,11 @@ +/* + * DEBUG: section 79 Disk IO Routines + */ + +#include "squid.h" +#include "fs/rock/RockDbCell.h" + +Rock::DbCellHeader::DbCellHeader() +{ + memset(this, 0, sizeof(*this)); +} === modified file 'src/fs/rock/RockDbCell.h' --- src/fs/rock/RockDbCell.h 2011-09-10 16:38:36 +0000 +++ src/fs/rock/RockDbCell.h 2013-12-31 18:49:41 +0000 @@ -1,23 +1,41 @@ #ifndef SQUID_FS_ROCK_DB_CELL_H #define SQUID_FS_ROCK_DB_CELL_H +#include "typedefs.h" + namespace Rock { /** \ingroup Rock * Meta-information at the beginning of every db cell. + * Links multiple map slots belonging to the same entry into an entry chain. * Stored on disk and used as sizeof() argument so it must remain POD. */ class DbCellHeader { public: - DbCellHeader(): payloadSize(0), reserved(0) {} - - /// whether the freshly loaded header fields make sense - bool sane() const { return payloadSize >= 0 && reserved == 0; } - - int64_t payloadSize; ///< cell contents size excluding this header - int64_t reserved; ///< reserved for future use (next cell pointer?) + DbCellHeader(); + + /// true iff no entry occupies this slot + bool empty() const { return !firstSlot && !nextSlot && !payloadSize; } + + /* members below are not meaningful if empty() */ + + /// whether this slot is not corrupted + bool sane(const size_t slotSize, int slotLimit) const { + return + 0 <= firstSlot && firstSlot < slotLimit && + -1 <= nextSlot && nextSlot < slotLimit && + version > 0 && + 0 < payloadSize && payloadSize <= slotSize - sizeof(DbCellHeader); + } + + uint64_t key[2]; ///< StoreEntry key + uint64_t entrySize; ///< total entry content size or zero if still unknown + uint32_t payloadSize; ///< slot contents size, always positive + uint32_t version; ///< detects conflicts among same-key entries + sfileno firstSlot; ///< slot ID of the first slot occupied by the entry + sfileno nextSlot; ///< slot ID of the next slot occupied by the entry }; } // namespace Rock === added file 'src/fs/rock/RockForward.h' --- src/fs/rock/RockForward.h 1970-01-01 00:00:00 +0000 +++ src/fs/rock/RockForward.h 2013-12-31 18:09:24 +0000 @@ -0,0 +1,33 @@ +#ifndef SQUID_FS_ROCK_FORWARD_H +#define SQUID_FS_ROCK_FORWARD_H + +namespace Ipc +{ + +class StoreMapAnchor; +class StoreMapSlice; + +namespace Mem +{ +class PageId; +} + +} + +namespace Rock +{ + +class SwapDir; + +/// db cell number, starting with cell 0 (always occupied by the db header) +typedef sfileno SlotId; + +class Rebuild; + +class IoState; + +class DbCellHeader; + +} + +#endif /* SQUID_FS_ROCK_FORWARD_H */ === modified file 'src/fs/rock/RockIoRequests.cc' --- src/fs/rock/RockIoRequests.cc 2012-09-01 14:38:36 +0000 +++ src/fs/rock/RockIoRequests.cc 2013-07-29 00:43:55 +0000 @@ -18,6 +18,9 @@ Rock::WriteRequest::WriteRequest(const ::WriteRequest &base, const IoState::Pointer &anSio): ::WriteRequest(base), - sio(anSio) + sio(anSio), + sidCurrent(-1), + sidNext(-1), + eof(false) { } === modified file 'src/fs/rock/RockIoRequests.h' --- src/fs/rock/RockIoRequests.h 2012-08-28 13:00:30 +0000 +++ src/fs/rock/RockIoRequests.h 2013-07-29 00:43:55 +0000 @@ -28,6 +28,15 @@ WriteRequest(const ::WriteRequest &base, const IoState::Pointer &anSio); IoState::Pointer sio; + /// slot being written using this write request + SlotId sidCurrent; + + /// allocated next slot (negative if we are writing the last slot) + SlotId sidNext; + + /// whether this is the last request for the entry + bool eof; + private: CBDATA_CLASS2(WriteRequest); }; === modified file 'src/fs/rock/RockIoState.cc' --- src/fs/rock/RockIoState.cc 2013-10-25 00:13:46 +0000 +++ src/fs/rock/RockIoState.cc 2013-12-31 18:49:41 +0000 @@ -3,6 +3,8 @@ */ #include "squid.h" +#include "base/TextException.h" +#include "CollapsedForwarding.h" #include "DiskIO/DiskIOModule.h" #include "DiskIO/DiskIOStrategy.h" #include "DiskIO/WriteRequest.h" @@ -10,21 +12,27 @@ #include "fs/rock/RockIoState.h" #include "fs/rock/RockSwapDir.h" #include "globals.h" +#include "Mem.h" #include "MemObject.h" #include "Parsing.h" +#include "Transients.h" -Rock::IoState::IoState(SwapDir *dir, +Rock::IoState::IoState(Rock::SwapDir::Pointer &aDir, StoreEntry *anEntry, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data): - slotSize(0), - diskOffset(-1), - payloadEnd(-1) + readableAnchor_(NULL), + writeableAnchor_(NULL), + sidCurrent(-1), + dir(aDir), + slotSize(dir->slotSize), + objOffset(0), + theBuf(dir->slotSize) { e = anEntry; - // swap_filen, swap_dirn, diskOffset, and payloadEnd are set by the caller - slotSize = dir->maxObjectSize(); + e->lock("rock I/O"); + // anchor, swap_filen, and swap_dirn are set by the caller file_callback = cbFile; callback = cbIo; callback_data = cbdataReference(data); @@ -35,9 +43,17 @@ Rock::IoState::~IoState() { --store_open_disk_fd; + + // The dir map entry may still be open for reading at the point because + // the map entry lock is associated with StoreEntry, not IoState. + // assert(!readableAnchor_); + assert(shutting_down || !writeableAnchor_); + if (callback_data) cbdataReferenceDone(callback_data); theFile = NULL; + + e->unlock("rock I/O"); } void @@ -48,96 +64,280 @@ theFile = aFile; } +const Ipc::StoreMapAnchor & +Rock::IoState::readAnchor() const +{ + assert(readableAnchor_); + return *readableAnchor_; +} + +Ipc::StoreMapAnchor & +Rock::IoState::writeAnchor() +{ + assert(writeableAnchor_); + return *writeableAnchor_; +} + +/// convenience wrapper returning the map slot we are reading now +const Ipc::StoreMapSlice & +Rock::IoState::currentReadableSlice() const +{ + return dir->map->readableSlice(swap_filen, sidCurrent); +} + void Rock::IoState::read_(char *buf, size_t len, off_t coreOff, STRCB *cb, void *data) { + debugs(79, 7, swap_filen << " reads from " << coreOff); + assert(theFile != NULL); assert(coreOff >= 0); - offset_ = coreOff; - - // we skip our cell header; it is only read when building the map - const int64_t cellOffset = sizeof(DbCellHeader) + - static_cast(coreOff); - assert(cellOffset <= payloadEnd); - - // Core specifies buffer length, but we must not exceed stored entry size - if (cellOffset + (int64_t)len > payloadEnd) - len = payloadEnd - cellOffset; + + // if we are dealing with the first read or + // if the offset went backwords, start searching from the beginning + if (sidCurrent < 0 || coreOff < objOffset) { + sidCurrent = readAnchor().start; + objOffset = 0; + } + + while (sidCurrent >= 0 && coreOff >= objOffset + currentReadableSlice().size) { + objOffset += currentReadableSlice().size; + sidCurrent = currentReadableSlice().next; + } assert(read.callback == NULL); assert(read.callback_data == NULL); read.callback = cb; read.callback_data = cbdataReference(data); - theFile->read(new ReadRequest( - ::ReadRequest(buf, diskOffset + cellOffset, len), this)); + // punt if read offset is too big (because of client bugs or collapsing) + if (sidCurrent < 0) { + debugs(79, 5, "no " << coreOff << " in " << *e); + callReaderBack(buf, 0); + return; + } + + offset_ = coreOff; + len = min(len, + static_cast(objOffset + currentReadableSlice().size - coreOff)); + const uint64_t diskOffset = dir->diskOffset(sidCurrent); + theFile->read(new ReadRequest(::ReadRequest(buf, + diskOffset + sizeof(DbCellHeader) + coreOff - objOffset, len), this)); } -// We only buffer data here; we actually write when close() is called. -// We buffer, in part, to avoid forcing OS to _read_ old unwritten portions -// of the slot when the write does not end at the page or sector boundary. void +Rock::IoState::callReaderBack(const char *buf, int rlen) +{ + debugs(79, 5, rlen << " bytes for " << *e); + StoreIOState::STRCB *callb = read.callback; + assert(callb); + read.callback = NULL; + void *cbdata; + if (cbdataReferenceValidDone(read.callback_data, &cbdata)) + callb(cbdata, buf, rlen, this); +} + +/// wraps tryWrite() to handle deep write failures centrally and safely +bool Rock::IoState::write(char const *buf, size_t size, off_t coreOff, FREE *dtor) { - // TODO: move to create? - if (!coreOff) { - assert(theBuf.isNull()); - assert(payloadEnd <= slotSize); - theBuf.init(min(payloadEnd, slotSize), slotSize); - // start with our header; TODO: consider making it a trailer - DbCellHeader header; - assert(static_cast(sizeof(header)) <= payloadEnd); - header.payloadSize = payloadEnd - sizeof(header); - theBuf.append(reinterpret_cast(&header), sizeof(header)); - } else { - // Core uses -1 offset as "append". Sigh. - assert(coreOff == -1); - assert(!theBuf.isNull()); + bool success = false; + try { + tryWrite(buf, size, coreOff); + success = true; + } catch (const std::exception &ex) { // TODO: should we catch ... as well? + debugs(79, 2, "db write error: " << ex.what()); + dir->writeError(*e); + finishedWriting(DISK_ERROR); + // 'this' might be gone beyond this point; fall through to free buf } - theBuf.append(buf, size); - offset_ += size; // so that Core thinks we wrote it + // careful: 'this' might be gone here if (dtor) (dtor)(const_cast(buf)); // cast due to a broken API? -} - -// write what was buffered during write() calls -void -Rock::IoState::startWriting() + + return success; +} + +/** + * Possibly send data to be written to disk: + * We only write data when full slot is accumulated or when close() is called. + * We buffer, in part, to avoid forcing OS to _read_ old unwritten portions of + * the slot when the write does not end at the page or sector boundary. + */ +void +Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff) +{ + debugs(79, 7, swap_filen << " writes " << size << " more"); + + // either this is the first write or append; we do not support write gaps + assert(!coreOff || coreOff == -1); + + // allocate the first slice during the first write + if (!coreOff) { + assert(sidCurrent < 0); + sidCurrent = reserveSlotForWriting(); // throws on failures + assert(sidCurrent >= 0); + writeAnchor().start = sidCurrent; + } + + // buffer incoming data in slot buffer and write overflowing or final slots + // quit when no data left or we stopped writing on reentrant error + while (size > 0 && theFile != NULL) { + assert(sidCurrent >= 0); + const size_t processed = writeToBuffer(buf, size); + buf += processed; + size -= processed; + const bool overflow = size > 0; + + // We do not write a full buffer without overflow because + // we would not yet know what to set the nextSlot to. + if (overflow) { + const SlotId sidNext = reserveSlotForWriting(); // throws + assert(sidNext >= 0); + writeToDisk(sidNext); + } else if (Store::Root().transientReaders(*e)) { + // write partial buffer for all remote hit readers to see + writeBufToDisk(-1, false); + } + } + +} + +/// Buffers incoming data for the current slot. +/// \return the number of bytes buffered +size_t +Rock::IoState::writeToBuffer(char const *buf, size_t size) +{ + // do not buffer a cell header for nothing + if (!size) + return 0; + + if (!theBuf.size) { + // will fill the header in writeToDisk when the next slot is known + theBuf.appended(sizeof(DbCellHeader)); + } + + size_t forCurrentSlot = min(size, static_cast(theBuf.spaceSize())); + theBuf.append(buf, forCurrentSlot); + offset_ += forCurrentSlot; // so that Core thinks we wrote it + return forCurrentSlot; +} + +/// write what was buffered during write() calls +/// negative sidNext means this is the last write request for this entry +void +Rock::IoState::writeToDisk(const SlotId sidNext) { assert(theFile != NULL); - assert(!theBuf.isNull()); + assert(theBuf.size >= sizeof(DbCellHeader)); // TODO: if DiskIO module is mmap-based, we should be writing whole pages // to avoid triggering read-page;new_head+old_tail;write-page overheads + writeBufToDisk(sidNext, sidNext < 0); + theBuf.clear(); + + sidCurrent = sidNext; +} + +/// creates and submits a request to write current slot buffer to disk +/// eof is true if and only this is the last slot +void +Rock::IoState::writeBufToDisk(const SlotId sidNext, bool eof) +{ + // no slots after the last/eof slot (but partial slots may have a nil next) + assert(!eof || sidNext < 0); + + // finalize db cell header + DbCellHeader header; + memcpy(header.key, e->key, sizeof(header.key)); + header.firstSlot = writeAnchor().start; + header.nextSlot = sidNext; + header.payloadSize = theBuf.size - sizeof(DbCellHeader); + header.entrySize = eof ? offset_ : 0; // storeSwapOutFileClosed sets swap_file_sz after write + header.version = writeAnchor().basics.timestamp; + + // copy finalized db cell header into buffer + memcpy(theBuf.mem, &header, sizeof(DbCellHeader)); + + // and now allocate another buffer for the WriteRequest so that + // we can support concurrent WriteRequests (and to ease cleaning) + // TODO: should we limit the number of outstanding requests? + size_t wBufCap = 0; + void *wBuf = memAllocBuf(theBuf.size, &wBufCap); + memcpy(wBuf, theBuf.mem, theBuf.size); + + const uint64_t diskOffset = dir->diskOffset(sidCurrent); debugs(79, 5, HERE << swap_filen << " at " << diskOffset << '+' << - theBuf.contentSize()); - - assert(theBuf.contentSize() <= slotSize); + theBuf.size); + + WriteRequest *const r = new WriteRequest( + ::WriteRequest(static_cast(wBuf), diskOffset, theBuf.size, + memFreeBufFunc(wBufCap)), this); + r->sidCurrent = sidCurrent; + r->sidNext = sidNext; + r->eof = eof; + // theFile->write may call writeCompleted immediatelly - theFile->write(new WriteRequest(::WriteRequest(theBuf.content(), - diskOffset, theBuf.contentSize(), theBuf.freeFunc()), this)); -} - -// + theFile->write(r); +} + +/// finds and returns a free db slot to fill or throws +Rock::SlotId +Rock::IoState::reserveSlotForWriting() +{ + Ipc::Mem::PageId pageId; + if (dir->useFreeSlot(pageId)) + return pageId.number-1; + + // This may happen when the number of available db slots is close to the + // number of concurrent requests reading or writing those slots, which may + // happen when the db is "small" compared to the request traffic OR when we + // are rebuilding and have not loaded "many" entries or empty slots yet. + throw TexcHere("ran out of free db slots"); +} + void Rock::IoState::finishedWriting(const int errFlag) { // we incremented offset_ while accumulating data in write() + // we do not reset writeableAnchor_ here because we still keep the lock + CollapsedForwarding::Broadcast(*e); callBack(errFlag); } void Rock::IoState::close(int how) { - debugs(79, 3, HERE << swap_filen << " accumulated: " << offset_ << - " how=" << how); - if (how == wroteAll && !theBuf.isNull()) - startWriting(); - else - callBack(how == writerGone ? DISK_ERROR : 0); // TODO: add DISK_CALLER_GONE + debugs(79, 3, swap_filen << " offset: " << offset_ << " how: " << how << + " buf: " << theBuf.size << " callback: " << callback); + + if (!theFile) { + debugs(79, 3, "I/O already canceled"); + assert(!callback); + // We keep writeableAnchor_ after callBack() on I/O errors. + assert(!readableAnchor_); + return; + } + + switch (how) { + case wroteAll: + assert(theBuf.size > 0); // we never flush last bytes on our own + writeToDisk(-1); // flush last, yet unwritten slot to disk + return; // writeCompleted() will callBack() + + case writerGone: + assert(writeableAnchor_); + dir->writeError(*e); // abort a partially stored entry + finishedWriting(DISK_ERROR); + return; + + case readerDone: + callBack(0); + return; + } } /// close callback (STIOCB) dialer: breaks dependencies and === modified file 'src/fs/rock/RockIoState.h' --- src/fs/rock/RockIoState.h 2011-09-06 22:32:30 +0000 +++ src/fs/rock/RockIoState.h 2013-12-31 18:49:41 +0000 @@ -1,14 +1,15 @@ #ifndef SQUID_FS_ROCK_IO_STATE_H #define SQUID_FS_ROCK_IO_STATE_H -#include "MemBuf.h" -#include "SwapDir.h" +#include "fs/rock/RockSwapDir.h" +#include "MemBlob.h" class DiskFile; namespace Rock { +class DbCellHeader; class SwapDir; /// \ingroup Rock @@ -17,34 +18,52 @@ public: typedef RefCount Pointer; - IoState(SwapDir *dir, StoreEntry *e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data); + IoState(Rock::SwapDir::Pointer &aDir, StoreEntry *e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data); virtual ~IoState(); void file(const RefCount &aFile); // ::StoreIOState API virtual void read_(char *buf, size_t size, off_t offset, STRCB * callback, void *callback_data); - virtual void write(char const *buf, size_t size, off_t offset, FREE * free_func); + virtual bool write(char const *buf, size_t size, off_t offset, FREE * free_func); virtual void close(int how); - /// called by SwapDir when writing is done - void finishedWriting(int errFlag); - - int64_t slotSize; ///< db cell size - int64_t diskOffset; ///< the start of this cell inside the db file - - /// when reading: number of bytes previously written to the db cell; - /// when writing: maximum payload offset in a db cell - int64_t payloadEnd; + /// whether we are still waiting for the I/O results (i.e., not closed) + bool stillWaiting() const { return theFile != NULL; } + + /// forwards read data to the reader that initiated this I/O + void callReaderBack(const char *buf, int rlen); + + /// called by SwapDir::writeCompleted() after the last write and on error + void finishedWriting(const int errFlag); MEMPROXY_CLASS(IoState); + /* one and only one of these will be set and locked; access via *Anchor() */ + const Ipc::StoreMapAnchor *readableAnchor_; ///< starting point for reading + Ipc::StoreMapAnchor *writeableAnchor_; ///< starting point for writing + + SlotId sidCurrent; ///< ID of the db slot currently being read or written + private: - void startWriting(); + const Ipc::StoreMapAnchor &readAnchor() const; + Ipc::StoreMapAnchor &writeAnchor(); + const Ipc::StoreMapSlice ¤tReadableSlice() const; + + void tryWrite(char const *buf, size_t size, off_t offset); + size_t writeToBuffer(char const *buf, size_t size); + void writeToDisk(const SlotId nextSlot); + void writeBufToDisk(const SlotId nextSlot, const bool eof); + SlotId reserveSlotForWriting(); + void callBack(int errflag); + Rock::SwapDir::Pointer dir; ///< swap dir that initiated I/O + const size_t slotSize; ///< db cell size + int64_t objOffset; ///< object offset for current db slot + RefCount theFile; // "file" responsible for this I/O - MemBuf theBuf; // use for write content accumulation only + MemBlob theBuf; // use for write content accumulation only }; MEMPROXY_CLASS_INLINE(IoState); === modified file 'src/fs/rock/RockRebuild.cc' --- src/fs/rock/RockRebuild.cc 2013-10-25 00:13:46 +0000 +++ src/fs/rock/RockRebuild.cc 2013-12-31 18:49:41 +0000 @@ -8,6 +8,7 @@ #include "fs/rock/RockRebuild.h" #include "fs/rock/RockSwapDir.h" #include "globals.h" +#include "ipc/StoreMap.h" #include "md5.h" #include "SquidTime.h" #include "store_rebuild.h" @@ -20,19 +21,97 @@ CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild); +/** + \defgroup RockFsRebuild Rock Store Rebuild + \ingroup Filesystems + * + \section Overview Overview + * Several layers of information are manipualted during the rebuild: + \par + * Store Entry: Response message plus all the metainformation associated with + * it. Identified by store key. At any given time, from Squid point + * of view, there is only one entry with a given key, but several + * different entries with the same key can be observed in any historical + * archive (such as an access log or a store database). + \par + * Slot chain: A sequence of db slots representing a Store Entry state at + * some point in time. Identified by key+version combination. Due to + * transaction aborts, crashes, and idle periods, some chains may contain + * incomplete or stale information. We assume that no two different chains + * have the same key and version. If that assumption fails, we may serve a + * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed. + \par + * Db slot: A db record containing a piece of a single store entry and linked + * to other slots with the same key and version fields, forming a chain. + * Slots are identified by their absolute position in the database file, + * which is naturally unique. + \par + * Except for the "mapped", "freed", and "more" fields, LoadingEntry info is + * entry-level and is stored at fileno position. In other words, the array of + * LoadingEntries should be interpreted as two arrays, one that maps slot ID + * to the LoadingEntry::mapped/free/more members, and the second one that maps + * fileno to all other LoadingEntry members. StoreMap maps slot key to fileno. + \par + * When information from the newly loaded db slot contradicts the entry-level + * information collected so far (e.g., the versions do not match or the total + * chain size after the slot contribution exceeds the expected number), the + * whole entry (and not just the chain or the slot!) is declared corrupted. + \par + * Why invalidate the whole entry? Rock Store is written for high-load + * environments with large caches, where there is usually very few idle slots + * in the database. A space occupied by a purged entry is usually immediately + * reclaimed. A Squid crash or a transaction abort is rather unlikely to + * leave a relatively large number of stale slots in the database. Thus, the + * number of potentially corrupted entries is relatively small. On the other + * hand, the damage from serving a single hadgepodge entry may be significant + * to the user. In such an environment, invalidating the whole entry has + * negligible performance impact but saves us from high-damage bugs. + */ + +namespace Rock +{ + +/// maintains information about the store entry being loaded from disk +/// used for identifying partially stored/loaded entries +class LoadingEntry +{ +public: + LoadingEntry(): size(0), version(0), state(leEmpty), anchored(0), + mapped(0), freed(0), more(-1) {} + + /* store entry-level information indexed by sfileno */ + uint64_t size; ///< payload seen so far + uint32_t version; ///< DbCellHeader::version to distinguish same-URL chains + uint32_t state:3; ///< current entry state (one of the State values) + uint32_t anchored:1; ///< whether we loaded the inode slot for this entry + + /* db slot-level information indexed by slotId, starting with firstSlot */ + uint32_t mapped:1; ///< whether this slot was added to a mapped entry + uint32_t freed:1; ///< whether this slot was marked as free + sfileno more:25; ///< another slot in some entry chain (unordered) + bool used() const { return freed || mapped || more != -1; } + + /// possible entry states + typedef enum { leEmpty = 0, leLoading, leLoaded, leCorrupted, leIgnored } State; +}; + +} /* namespace Rock */ + Rock::Rebuild::Rebuild(SwapDir *dir): AsyncJob("Rock::Rebuild"), sd(dir), + entries(NULL), dbSize(0), dbEntrySize(0), dbEntryLimit(0), fd(-1), dbOffset(0), - filen(0) + loadingPos(0), + validationPos(0) { assert(sd); memset(&counts, 0, sizeof(counts)); dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste - dbEntrySize = sd->max_objsize; + dbEntrySize = sd->slotSize; dbEntryLimit = sd->entryLimit(); } @@ -40,6 +119,7 @@ { if (fd >= 0) file_close(fd); + delete[] entries; } /// prepares and initiates entry loading sequence @@ -61,12 +141,18 @@ if (fd < 0) failure("cannot open db", errno); - char buf[SwapDir::HeaderSize]; - if (read(fd, buf, sizeof(buf)) != SwapDir::HeaderSize) + char hdrBuf[SwapDir::HeaderSize]; + if (read(fd, hdrBuf, sizeof(hdrBuf)) != SwapDir::HeaderSize) failure("cannot read db header", errno); + // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours + assert(sizeof(DbCellHeader) < SM_PAGE_SIZE); + buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE); + dbOffset = SwapDir::HeaderSize; - filen = 0; + loadingPos = 0; + + entries = new LoadingEntry[dbEntryLimit]; checkpoint(); } @@ -82,7 +168,8 @@ bool Rock::Rebuild::doneAll() const { - return dbOffset >= dbSize && AsyncJob::doneAll(); + return dbOffset >= dbSize && validationPos >= dbEntryLimit && + AsyncJob::doneAll(); } void @@ -95,7 +182,18 @@ void Rock::Rebuild::steps() { - debugs(47,5, HERE << sd->index << " filen " << filen << " at " << + if (dbOffset < dbSize) + loadingSteps(); + else + validationSteps(); + + checkpoint(); +} + +void +Rock::Rebuild::loadingSteps() +{ + debugs(47,5, sd->index << " slot " << loadingPos << " at " << dbOffset << " <= " << dbSize); // Balance our desire to maximize the number of entries processed at once @@ -106,9 +204,9 @@ int loaded = 0; while (loaded < dbEntryLimit && dbOffset < dbSize) { - doOneEntry(); + loadOneSlot(); dbOffset += dbEntrySize; - ++filen; + ++loadingPos; ++loaded; if (counts.scancount % 1000 == 0) @@ -125,14 +223,12 @@ break; } } - - checkpoint(); } void -Rock::Rebuild::doOneEntry() +Rock::Rebuild::loadOneSlot() { - debugs(47,5, HERE << sd->index << " filen " << filen << " at " << + debugs(47,5, sd->index << " slot " << loadingPos << " at " << dbOffset << " <= " << dbSize); ++counts.scancount; @@ -140,49 +236,143 @@ if (lseek(fd, dbOffset, SEEK_SET) < 0) failure("cannot seek to db entry", errno); - MemBuf buf; - buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE); + buf.reset(); if (!storeRebuildLoadEntry(fd, sd->index, buf, counts)) return; + const SlotId slotId = loadingPos; + // get our header DbCellHeader header; if (buf.contentSize() < static_cast(sizeof(header))) { debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " << - "Ignoring truncated cache entry meta data at " << dbOffset); - ++counts.invalid; + "Ignoring truncated " << buf.contentSize() << "-byte " << + "cache entry meta data at " << dbOffset); + freeSlotIfIdle(slotId, true); return; } memcpy(&header, buf.content(), sizeof(header)); - - if (!header.sane()) { + if (header.empty()) { + freeSlotIfIdle(slotId, false); + return; + } + if (!header.sane(dbEntrySize, dbEntryLimit)) { debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " << "Ignoring malformed cache entry meta data at " << dbOffset); - ++counts.invalid; + freeSlotIfIdle(slotId, true); return; } buf.consume(sizeof(header)); // optimize to avoid memmove() + useNewSlot(slotId, header); +} + +/// parse StoreEntry basics and add them to the map, returning true on success +bool +Rock::Rebuild::importEntry(Ipc::StoreMapAnchor &anchor, const sfileno fileno, const DbCellHeader &header) +{ cache_key key[SQUID_MD5_DIGEST_LENGTH]; StoreEntry loadedE; - if (!storeRebuildParseEntry(buf, loadedE, key, counts, header.payloadSize)) { - // skip empty slots - if (loadedE.swap_filen > 0 || loadedE.swap_file_sz > 0) { - ++counts.invalid; - //sd->unlink(filen); leave garbage on disk, it should not hurt - } - return; - } - - assert(loadedE.swap_filen < dbEntryLimit); - if (!storeRebuildKeepEntry(loadedE, key, counts)) - return; - - ++counts.objcount; + const uint64_t knownSize = header.entrySize > 0 ? + header.entrySize : anchor.basics.swap_file_sz.get(); + if (!storeRebuildParseEntry(buf, loadedE, key, counts, knownSize)) + return false; + + // the entry size may still be unknown at this time + + debugs(47, 8, "importing basics for entry " << fileno << + " swap_file_sz: " << loadedE.swap_file_sz); + anchor.set(loadedE); + + // we have not validated whether all db cells for this entry were loaded + EBIT_CLR(anchor.basics.flags, ENTRY_VALIDATED); + // loadedE->dump(5); - sd->addEntry(filen, header, loadedE); + return true; +} + +void +Rock::Rebuild::validationSteps() +{ + debugs(47, 5, sd->index << " validating from " << validationPos); + + // see loadingSteps() for the rationale; TODO: avoid duplication + const int maxSpentMsec = 50; // keep small: validation does not do I/O + const timeval loopStart = current_time; + + int validated = 0; + while (validationPos < dbEntryLimit) { + validateOneEntry(); + ++validationPos; + ++validated; + + if (validationPos % 1000 == 0) + debugs(20, 2, "validated: " << validationPos); + + if (opt_foreground_rebuild) + continue; // skip "few entries at a time" check below + + getCurrentTime(); + const double elapsedMsec = tvSubMsec(loopStart, current_time); + if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) { + debugs(47, 5, "pausing after " << validated << " entries in " << + elapsedMsec << "ms; " << (elapsedMsec/validated) << "ms per entry"); + break; + } + } +} + +void +Rock::Rebuild::validateOneEntry() +{ + LoadingEntry &e = entries[validationPos]; + switch (e.state) { + + case LoadingEntry::leEmpty: + break; // no entry hashed to this position + + case LoadingEntry::leLoading: + freeBadEntry(validationPos, "partially stored"); + break; + + case LoadingEntry::leLoaded: + break; // we have already unlocked this entry + + case LoadingEntry::leCorrupted: + break; // we have already removed this entry + } +} + +/// Marks remaining bad entry slots as free and unlocks the entry. The map +/// cannot do this because Loading entries may have holes in the slots chain. +void +Rock::Rebuild::freeBadEntry(const sfileno fileno, const char *eDescription) +{ + debugs(47, 2, "cache_dir #" << sd->index << ' ' << eDescription << + " entry " << fileno << " is ignored during rebuild"); + + Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno); + + bool freedSome = false; + // free all loaded non-anchor slots + SlotId slotId = entries[anchor.start].more; + while (slotId >= 0) { + const SlotId next = entries[slotId].more; + freeSlot(slotId, false); + slotId = next; + freedSome = true; + } + // free anchor slot if it was loaded + if (entries[fileno].anchored) { + freeSlot(anchor.start, false); + freedSome = true; + } + assert(freedSome); + + sd->map->forgetWritingEntry(fileno); + ++counts.invalid; } void @@ -197,7 +387,7 @@ void Rock::Rebuild::failure(const char *msg, int errNo) { - debugs(47,5, HERE << sd->index << " filen " << filen << " at " << + debugs(47,5, sd->index << " slot " << loadingPos << " at " << dbOffset << " <= " << dbSize); if (errNo) @@ -208,3 +398,286 @@ fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.", sd->index, sd->filePath, msg); } + +/// adds slot to the free slot index +void +Rock::Rebuild::freeSlot(const SlotId slotId, const bool invalid) +{ + debugs(47,5, sd->index << " frees slot " << slotId); + LoadingEntry &le = entries[slotId]; + assert(!le.freed); + le.freed = 1; + + if (invalid) { + ++counts.invalid; + //sd->unlink(fileno); leave garbage on disk, it should not hurt + } + + Ipc::Mem::PageId pageId; + pageId.pool = sd->index+1; + pageId.number = slotId+1; + sd->freeSlots->push(pageId); +} + +/// adds slot to the free slot index but only if the slot is unused +void +Rock::Rebuild::freeSlotIfIdle(const SlotId slotId, const bool invalid) +{ + const LoadingEntry &le = entries[slotId]; + + // mapped slots must be freed via freeBadEntry() to keep the map in sync + assert(!le.mapped); + + if (!le.used()) + freeSlot(slotId, invalid); +} + +/// adds slot to the entry chain in the map +void +Rock::Rebuild::mapSlot(const SlotId slotId, const DbCellHeader &header) +{ + LoadingEntry &le = entries[slotId]; + assert(!le.mapped); + assert(!le.freed); + le.mapped = 1; + + Ipc::StoreMapSlice slice; + slice.next = header.nextSlot; + slice.size = header.payloadSize; + sd->map->importSlice(slotId, slice); +} + +/// adds slot to an existing entry chain; caller must check that the slot +/// belongs to the chain it is being added to +void +Rock::Rebuild::addSlotToEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header) +{ + LoadingEntry &le = entries[fileno]; + Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno); + + assert(le.version == header.version); + + // mark anchor as loaded or add the secondary slot to the chain + LoadingEntry &inode = entries[header.firstSlot]; + if (header.firstSlot == slotId) { + debugs(47,5, "adding inode"); + assert(!inode.freed); + le.anchored = 1; + } else { + debugs(47,9, "linking " << slotId << " to " << inode.more); + // we do not need to preserve the order + LoadingEntry &slice = entries[slotId]; + assert(!slice.freed); + assert(slice.more < 0); + slice.more = inode.more; + inode.more = slotId; + } + + if (header.firstSlot == slotId && !importEntry(anchor, fileno, header)) { + le.state = LoadingEntry::leCorrupted; + freeBadEntry(fileno, "corrupted metainfo"); + return; + } + + // set total entry size and/or check it for consistency + debugs(47, 8, "header.entrySize: " << header.entrySize << " swap_file_sz: " << anchor.basics.swap_file_sz); + uint64_t totalSize = header.entrySize; + assert(totalSize != static_cast(-1)); + if (!totalSize && anchor.basics.swap_file_sz) { + assert(anchor.basics.swap_file_sz != static_cast(-1)); + // perhaps we loaded a later slot (with entrySize) earlier + totalSize = anchor.basics.swap_file_sz; + } else if (totalSize && !anchor.basics.swap_file_sz) { + anchor.basics.swap_file_sz = totalSize; + assert(anchor.basics.swap_file_sz != static_cast(-1)); + } else if (totalSize != anchor.basics.swap_file_sz) { + le.state = LoadingEntry::leCorrupted; + freeBadEntry(fileno, "size mismatch"); + return; + } + + le.size += header.payloadSize; + + if (totalSize > 0 && le.size > totalSize) { // overflow + debugs(47, 8, "overflow: " << le.size << " > " << totalSize); + le.state = LoadingEntry::leCorrupted; + freeBadEntry(fileno, "overflowing"); + return; + } + + mapSlot(slotId, header); + if (totalSize > 0 && le.size == totalSize) { + // entry fully loaded, unlock it + // we have validated that all db cells for this entry were loaded + EBIT_SET(anchor.basics.flags, ENTRY_VALIDATED); + le.state = LoadingEntry::leLoaded; + sd->map->closeForWriting(fileno, false); + ++counts.objcount; + } +} + +/// initialize housekeeping information for a newly accepted entry +void +Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor &anchor, const sfileno fileno, const DbCellHeader &header) +{ + anchor.setKey(reinterpret_cast(header.key)); + assert(header.firstSlot >= 0); + anchor.start = header.firstSlot; + + assert(anchor.basics.swap_file_sz != static_cast(-1)); + + LoadingEntry &le = entries[fileno]; + le.state = LoadingEntry::leLoading; + le.version = header.version; + le.size = 0; +} + +/// handle a slot from an entry that we have not seen before +void +Rock::Rebuild::startNewEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header) +{ + // If some other from-disk entry is/was using this slot as its inode OR + // if some other from-disk entry is/was using our inode slot, then the + // entries are conflicting. We cannot identify other entries, so we just + // remove ours and hope that the others were/will be handled correctly. + const LoadingEntry &slice = entries[slotId]; + const LoadingEntry &inode = entries[header.firstSlot]; + if (slice.used() || inode.used()) { + debugs(47,8, "slice/inode used: " << slice.used() << inode.used()); + LoadingEntry &le = entries[fileno]; + le.state = LoadingEntry::leCorrupted; + freeSlotIfIdle(slotId, slotId == header.firstSlot); + // if not idle, the other entry will handle its slice + ++counts.clashcount; + return; + } + + // A miss may have been stored at our fileno while we were loading other + // slots from disk. We ought to preserve that entry because it is fresher. + const bool overwriteExisting = false; + if (Ipc::StoreMap::Anchor *anchor = sd->map->openForWritingAt(fileno, overwriteExisting)) { + primeNewEntry(*anchor, fileno, header); + addSlotToEntry(fileno, slotId, header); // may fail + assert(anchor->basics.swap_file_sz != static_cast(-1)); + } else { + // A new from-network entry is occupying our map slot; let it be, but + // save us from the trouble of going through the above motions again. + LoadingEntry &le = entries[fileno]; + le.state = LoadingEntry::leIgnored; + freeSlotIfIdle(slotId, false); + } +} + +/// does the header belong to the fileno entry being loaded? +bool +Rock::Rebuild::sameEntry(const sfileno fileno, const DbCellHeader &header) const +{ + const Ipc::StoreMap::Anchor &anchor = sd->map->writeableEntry(fileno); + const LoadingEntry &le = entries[fileno]; + // any order will work, but do fast comparisons first: + return le.version == header.version && + anchor.start == static_cast(header.firstSlot) && + anchor.sameKey(reinterpret_cast(header.key)); +} + +/// is the new header consistent with information already loaded? +bool +Rock::Rebuild::canAdd(const sfileno fileno, const SlotId slotId, const DbCellHeader &header) const +{ + if (!sameEntry(fileno, header)) { + debugs(79, 7, "cannot add; wrong entry"); + return false; + } + + const LoadingEntry &le = entries[slotId]; + // We cannot add a slot that was already declared free or mapped. + if (le.freed || le.mapped) { + debugs(79, 7, "cannot add; freed/mapped: " << le.freed << le.mapped); + return false; + } + + if (slotId == header.firstSlot) { + // If we are the inode, the anchored flag cannot be set yet. + if (entries[fileno].anchored) { + debugs(79, 7, "cannot add; extra anchor"); + return false; + } + + // And there should have been some other slot for this entry to exist. + if (le.more < 0) { + debugs(79, 7, "cannot add; missing slots"); + return false; + } + + return true; + } + + // We are the continuation slice so the more field is reserved for us. + if (le.more >= 0) { + debugs(79, 7, "cannot add; foreign slot"); + return false; + } + + return true; +} + +/// handle freshly loaded (and validated) db slot header +void +Rock::Rebuild::useNewSlot(const SlotId slotId, const DbCellHeader &header) +{ + LoadingEntry &slice = entries[slotId]; + assert(!slice.freed); // we cannot free what was not loaded + + const cache_key *const key = + reinterpret_cast(header.key); + const sfileno fileno = sd->map->anchorIndexByKey(key); + assert(0 <= fileno && fileno < dbEntryLimit); + + LoadingEntry &le = entries[fileno]; + debugs(47,9, "entry " << fileno << " state: " << le.state << ", inode: " << + header.firstSlot << ", size: " << header.payloadSize); + + switch (le.state) { + + case LoadingEntry::leEmpty: { + startNewEntry(fileno, slotId, header); + break; + } + + case LoadingEntry::leLoading: { + if (canAdd(fileno, slotId, header)) { + addSlotToEntry(fileno, slotId, header); + } else { + // either the loading chain or this slot is stale; + // be conservative and ignore both (and any future ones) + le.state = LoadingEntry::leCorrupted; + freeBadEntry(fileno, "duplicated"); + freeSlotIfIdle(slotId, slotId == header.firstSlot); + ++counts.dupcount; + } + break; + } + + case LoadingEntry::leLoaded: { + // either the previously loaded chain or this slot is stale; + // be conservative and ignore both (and any future ones) + le.state = LoadingEntry::leCorrupted; + sd->map->freeEntry(fileno); // may not be immediately successful + freeSlotIfIdle(slotId, slotId == header.firstSlot); + ++counts.dupcount; + break; + } + + case LoadingEntry::leCorrupted: { + // previously seen slots messed things up so we must ignore this one + freeSlotIfIdle(slotId, false); + break; + } + + case LoadingEntry::leIgnored: { + // already replaced by a fresher or colliding from-network entry + freeSlotIfIdle(slotId, false); + break; + } + } +} === modified file 'src/fs/rock/RockRebuild.h' --- src/fs/rock/RockRebuild.h 2012-09-06 14:22:03 +0000 +++ src/fs/rock/RockRebuild.h 2013-12-31 18:09:24 +0000 @@ -3,12 +3,14 @@ #include "base/AsyncJob.h" #include "cbdata.h" +#include "fs/rock/RockForward.h" +#include "MemBuf.h" #include "store_rebuild.h" namespace Rock { -class SwapDir; +class LoadingEntry; /// \ingroup Rock /// manages store rebuild process: loading meta information from db on disk @@ -27,10 +29,30 @@ private: void checkpoint(); void steps(); - void doOneEntry(); + void loadingSteps(); + void validationSteps(); + void loadOneSlot(); + void validateOneEntry(); + bool importEntry(Ipc::StoreMapAnchor &anchor, const sfileno slotId, const DbCellHeader &header); + void freeBadEntry(const sfileno fileno, const char *eDescription); + void failure(const char *msg, int errNo = 0); + void startNewEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header); + void primeNewEntry(Ipc::StoreMapAnchor &anchor, const sfileno fileno, const DbCellHeader &header); + void addSlotToEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header); + void useNewSlot(const SlotId slotId, const DbCellHeader &header); + + void mapSlot(const SlotId slotId, const DbCellHeader &header); + void freeSlotIfIdle(const SlotId slotId, const bool invalid); + void freeBusySlot(const SlotId slotId, const bool invalid); + void freeSlot(const SlotId slotId, const bool invalid); + + bool canAdd(const sfileno fileno, const SlotId slotId, const DbCellHeader &header) const; + bool sameEntry(const sfileno fileno, const DbCellHeader &header) const; + SwapDir *sd; + LoadingEntry *entries; ///< store entries being loaded from disk int64_t dbSize; int dbEntrySize; @@ -38,7 +60,9 @@ int fd; // store db file descriptor int64_t dbOffset; - int filen; + sfileno loadingPos; ///< index of the db slot being loaded from disk now + sfileno validationPos; ///< index of the loaded db slot being validated now + MemBuf buf; ///< space to load current db slot (and entry metadata) into StoreRebuildData counts; === modified file 'src/fs/rock/RockSwapDir.cc' --- src/fs/rock/RockSwapDir.cc 2013-10-25 00:13:46 +0000 +++ src/fs/rock/RockSwapDir.cc 2013-12-31 18:49:41 +0000 @@ -4,6 +4,7 @@ #include "squid.h" #include "cache_cf.h" +#include "CollapsedForwarding.h" #include "ConfigOption.h" #include "DiskIO/DiskIOModule.h" #include "DiskIO/DiskIOStrategy.h" @@ -30,7 +31,9 @@ const int64_t Rock::SwapDir::HeaderSize = 16*1024; -Rock::SwapDir::SwapDir(): ::SwapDir("rock"), filePath(NULL), io(NULL), map(NULL) +Rock::SwapDir::SwapDir(): ::SwapDir("rock"), + slotSize(HeaderSize), filePath(NULL), map(NULL), io(NULL), + waitingForPage(NULL) { } @@ -62,32 +65,14 @@ return NULL; sfileno filen; - const Ipc::StoreMapSlot *const slot = map->openForReading(key, filen); + const Ipc::StoreMapAnchor *const slot = map->openForReading(key, filen); if (!slot) return NULL; - const Ipc::StoreMapSlot::Basics &basics = slot->basics; - // create a brand new store entry and initialize it with stored basics StoreEntry *e = new StoreEntry(); - e->lock_count = 0; - e->swap_dirn = index; - e->swap_filen = filen; - e->swap_file_sz = basics.swap_file_sz; - e->lastref = basics.lastref; - e->timestamp = basics.timestamp; - e->expires = basics.expires; - e->lastmod = basics.lastmod; - e->refcount = basics.refcount; - e->flags = basics.flags; - e->store_status = STORE_OK; - e->setMemStatus(NOT_IN_MEMORY); - e->swap_status = SWAPOUT_DONE; - e->ping_status = PING_NONE; - EBIT_SET(e->flags, ENTRY_CACHABLE); - EBIT_CLR(e->flags, RELEASE_REQUEST); - EBIT_CLR(e->flags, KEY_PRIVATE); - EBIT_SET(e->flags, ENTRY_VALIDATED); + anchorEntry(*e, filen, *slot); + e->hashInsert(key); trackReferences(*e); @@ -95,6 +80,75 @@ // the disk entry remains open for reading, protected from modifications } +bool +Rock::SwapDir::anchorCollapsed(StoreEntry &collapsed, bool &inSync) +{ + if (!map || !theFile || !theFile->canRead()) + return false; + + sfileno filen; + const Ipc::StoreMapAnchor *const slot = map->openForReading( + reinterpret_cast(collapsed.key), filen); + if (!slot) + return false; + + anchorEntry(collapsed, filen, *slot); + inSync = updateCollapsedWith(collapsed, *slot); + return true; // even if inSync is false +} + +bool +Rock::SwapDir::updateCollapsed(StoreEntry &collapsed) +{ + if (!map || !theFile || !theFile->canRead()) + return false; + + if (collapsed.swap_filen < 0) // no longer using a disk cache + return true; + assert(collapsed.swap_dirn == index); + + const Ipc::StoreMapAnchor &s = map->readableEntry(collapsed.swap_filen); + return updateCollapsedWith(collapsed, s); +} + +bool +Rock::SwapDir::updateCollapsedWith(StoreEntry &collapsed, const Ipc::StoreMapAnchor &anchor) +{ + collapsed.swap_file_sz = anchor.basics.swap_file_sz; + return true; +} + +void +Rock::SwapDir::anchorEntry(StoreEntry &e, const sfileno filen, const Ipc::StoreMapAnchor &anchor) +{ + const Ipc::StoreMapAnchor::Basics &basics = anchor.basics; + + e.swap_file_sz = basics.swap_file_sz; + e.lastref = basics.lastref; + e.timestamp = basics.timestamp; + e.expires = basics.expires; + e.lastmod = basics.lastmod; + e.refcount = basics.refcount; + e.flags = basics.flags; + + if (anchor.complete()) { + e.store_status = STORE_OK; + e.swap_status = SWAPOUT_DONE; + } else { + e.store_status = STORE_PENDING; + e.swap_status = SWAPOUT_WRITING; // even though another worker writes? + } + + e.ping_status = PING_NONE; + + EBIT_CLR(e.flags, RELEASE_REQUEST); + EBIT_CLR(e.flags, KEY_PRIVATE); + EBIT_SET(e.flags, ENTRY_VALIDATED); + + e.swap_dirn = index; + e.swap_filen = filen; +} + void Rock::SwapDir::disconnect(StoreEntry &e) { assert(e.swap_dirn == index); @@ -105,17 +159,33 @@ // do not rely on e.swap_status here because there is an async delay // before it switches from SWAPOUT_WRITING to SWAPOUT_DONE. - // since e has swap_filen, its slot is locked for either reading or writing - map->abortIo(e.swap_filen); - e.swap_dirn = -1; - e.swap_filen = -1; - e.swap_status = SWAPOUT_NONE; + // since e has swap_filen, its slot is locked for reading and/or writing + // but it is difficult to know whether THIS worker is reading or writing e, + // especially since we may switch from writing to reading. This code relies + // on Rock::IoState::writeableAnchor_ being set when we locked for writing. + if (e.mem_obj && e.mem_obj->swapout.sio != NULL && + dynamic_cast(*e.mem_obj->swapout.sio).writeableAnchor_) { + map->abortWriting(e.swap_filen); + e.swap_dirn = -1; + e.swap_filen = -1; + e.swap_status = SWAPOUT_NONE; + dynamic_cast(*e.mem_obj->swapout.sio).writeableAnchor_ = NULL; + Store::Root().transientsAbandon(e); // broadcasts after the change + } else { + map->closeForReading(e.swap_filen); + e.swap_dirn = -1; + e.swap_filen = -1; + e.swap_status = SWAPOUT_NONE; + } } uint64_t Rock::SwapDir::currentSize() const { - return HeaderSize + max_objsize * currentCount(); + const uint64_t spaceSize = !freeSlots ? + maxSize() : (slotSize * freeSlots->size()); + // everything that is not free is in use + return maxSize() - spaceSize; } uint64_t @@ -142,7 +212,7 @@ Rock::SwapDir::entryLimitAllowed() const { const int64_t eLimitLo = map ? map->entryLimit() : 0; // dynamic shrinking unsupported - const int64_t eWanted = (maxSize() - HeaderSize)/maxObjectSize(); + const int64_t eWanted = (maxSize() - HeaderSize)/slotSize; return min(max(eLimitLo, eWanted), entryLimitHigh()); } @@ -175,51 +245,44 @@ // no need to distinguish ENOENT from other possible stat() errors. debugs (47, DBG_IMPORTANT, "Creating Rock db directory: " << path); const int res = mkdir(path, 0700); - if (res != 0) { - debugs(47, DBG_CRITICAL, "Failed to create Rock db dir " << path << - ": " << xstrerror()); - fatal("Rock Store db creation error"); - } + if (res != 0) + createError("mkdir"); } debugs (47, DBG_IMPORTANT, "Creating Rock db: " << filePath); + const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600); + if (swap < 0) + createError("create"); + #if SLOWLY_FILL_WITH_ZEROS char block[1024]; Must(maxSize() % sizeof(block) == 0); memset(block, '\0', sizeof(block)); - const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600); for (off_t offset = 0; offset < maxSize(); offset += sizeof(block)) { - if (write(swap, block, sizeof(block)) != sizeof(block)) { - debugs(47, DBG_CRITICAL, "ERROR: Failed to create Rock Store db in " << filePath << - ": " << xstrerror()); - fatal("Rock Store db creation error"); - } + if (write(swap, block, sizeof(block)) != sizeof(block)) + createError("write"); } - close(swap); #else - const int swap = open(filePath, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0600); - if (swap < 0) { - debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " << filePath << - "; create error: " << xstrerror()); - fatal("Rock Store db creation error"); - } - - if (ftruncate(swap, maxSize()) != 0) { - debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " << filePath << - "; truncate error: " << xstrerror()); - fatal("Rock Store db creation error"); - } + if (ftruncate(swap, maxSize()) != 0) + createError("truncate"); char header[HeaderSize]; memset(header, '\0', sizeof(header)); - if (write(swap, header, sizeof(header)) != sizeof(header)) { - debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " << filePath << - "; write error: " << xstrerror()); - fatal("Rock Store db initialization error"); - } + if (write(swap, header, sizeof(header)) != sizeof(header)) + createError("write"); +#endif + close(swap); -#endif +} + +// report Rock DB creation error and exit +void +Rock::SwapDir::createError(const char *const msg) +{ + debugs(47, DBG_CRITICAL, "ERROR: Failed to initialize Rock Store db in " << + filePath << "; " << msg << " error: " << xstrerror()); + fatal("Rock Store db creation error"); } void @@ -231,8 +294,11 @@ // are refcounted. We up our count once to avoid implicit delete's. lock(); + freeSlots = shm_old(Ipc::Mem::PageStack)(freeSlotsPath()); + Must(!map); - map = new DirMap(path); + map = new DirMap(inodeMapPath()); + map->cleaner = this; const char *ioModule = needsDiskStrand() ? "IpcIo" : "Blocking"; if (DiskIOModule *m = DiskIOModule::Find(ioModule)) { @@ -318,6 +384,7 @@ { ConfigOptionVector *vector = dynamic_cast(::SwapDir::getOptionTree()); assert(vector); + vector->options.push_back(new ConfigOptionAdapter(*const_cast(this), &SwapDir::parseSizeOption, &SwapDir::dumpSizeOption)); vector->options.push_back(new ConfigOptionAdapter(*const_cast(this), &SwapDir::parseTimeOption, &SwapDir::dumpTimeOption)); vector->options.push_back(new ConfigOptionAdapter(*const_cast(this), &SwapDir::parseRateOption, &SwapDir::dumpRateOption)); return vector; @@ -326,7 +393,7 @@ bool Rock::SwapDir::allowOptionReconfigure(const char *const option) const { - return strcmp(option, "max-size") != 0 && + return strcmp(option, "slot-size") != 0 && ::SwapDir::allowOptionReconfigure(option); } @@ -335,7 +402,7 @@ Rock::SwapDir::parseTimeOption(char const *option, const char *value, int reconfig) { // TODO: ::SwapDir or, better, Config should provide time-parsing routines, - // including time unit handling. Same for size. + // including time unit handling. Same for size and rate. time_msec_t *storedTime; if (strcmp(option, "swap-timeout") == 0) @@ -421,17 +488,60 @@ storeAppendPrintf(e, " max-swap-rate=%d", fileConfig.ioRate); } +/// parses size-specific options; mimics ::SwapDir::optionObjectSizeParse() +bool +Rock::SwapDir::parseSizeOption(char const *option, const char *value, int reconfig) +{ + uint64_t *storedSize; + if (strcmp(option, "slot-size") == 0) + storedSize = &slotSize; + else + return false; + + if (!value) + self_destruct(); + + // TODO: handle size units and detect parsing errors better + const uint64_t newSize = strtoll(value, NULL, 10); + if (newSize <= 0) { + debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must be positive; got: " << newSize); + self_destruct(); + } + + if (newSize <= sizeof(DbCellHeader)) { + debugs(3, DBG_CRITICAL, "FATAL: cache_dir " << path << ' ' << option << " must exceed " << sizeof(DbCellHeader) << "; got: " << newSize); + self_destruct(); + } + + if (!reconfig) + *storedSize = newSize; + else if (*storedSize != newSize) { + debugs(3, DBG_IMPORTANT, "WARNING: cache_dir " << path << ' ' << option + << " cannot be changed dynamically, value left unchanged: " << + *storedSize); + } + + return true; +} + +/// reports size-specific options; mimics ::SwapDir::optionObjectSizeDump() +void +Rock::SwapDir::dumpSizeOption(StoreEntry * e) const +{ + storeAppendPrintf(e, " slot-size=%" PRId64, slotSize); +} + /// check the results of the configuration; only level-0 debugging works here void Rock::SwapDir::validateOptions() { - if (max_objsize <= 0) - fatal("Rock store requires a positive max-size"); + if (slotSize <= 0) + fatal("Rock store requires a positive slot-size"); const int64_t maxSizeRoundingWaste = 1024 * 1024; // size is configured in MB - const int64_t maxObjectSizeRoundingWaste = maxObjectSize(); + const int64_t slotSizeRoundingWaste = slotSize; const int64_t maxRoundingWaste = - max(maxSizeRoundingWaste, maxObjectSizeRoundingWaste); + max(maxSizeRoundingWaste, slotSizeRoundingWaste); const int64_t usableDiskSize = diskOffset(entryLimitAllowed()); const int64_t diskWasteSize = maxSize() - usableDiskSize; Must(diskWasteSize >= 0); @@ -441,7 +551,7 @@ diskWasteSize >= maxRoundingWaste) { debugs(47, DBG_CRITICAL, "Rock store cache_dir[" << index << "] '" << path << "':"); debugs(47, DBG_CRITICAL, "\tmaximum number of entries: " << entryLimitAllowed()); - debugs(47, DBG_CRITICAL, "\tmaximum object size: " << maxObjectSize() << " Bytes"); + debugs(47, DBG_CRITICAL, "\tdb slot size: " << slotSize << " Bytes"); debugs(47, DBG_CRITICAL, "\tmaximum db size: " << maxSize() << " Bytes"); debugs(47, DBG_CRITICAL, "\tusable db size: " << usableDiskSize << " Bytes"); debugs(47, DBG_CRITICAL, "\tdisk space waste: " << diskWasteSize << " Bytes"); @@ -456,28 +566,6 @@ AsyncJob::Start(new Rebuild(this)); } -/* Add a new object to the cache with empty memory copy and pointer to disk - * use to rebuild store from disk. Based on UFSSwapDir::addDiskRestore */ -bool -Rock::SwapDir::addEntry(const int filen, const DbCellHeader &header, const StoreEntry &from) -{ - debugs(47, 8, HERE << &from << ' ' << from.getMD5Text() << - ", filen="<< std::setfill('0') << std::hex << std::uppercase << - std::setw(8) << filen); - - sfileno newLocation = 0; - if (Ipc::StoreMapSlot *slot = map->openForWriting(reinterpret_cast(from.key), newLocation)) { - if (filen == newLocation) { - slot->set(from); - map->extras(filen) = header; - } // else some other, newer entry got into our cell - map->closeForWriting(newLocation, false); - return filen == newLocation; - } - - return false; -} - bool Rock::SwapDir::canStore(const StoreEntry &e, int64_t diskSpaceNeeded, int &load) const { @@ -513,42 +601,31 @@ return NULL; } - // compute payload size for our cell header, using StoreEntry info - // careful: e.objectLen() may still be negative here - const int64_t expectedReplySize = e.mem_obj->expectedReplySize(); - assert(expectedReplySize >= 0); // must know to prevent cell overflows - assert(e.mem_obj->swap_hdr_sz > 0); - DbCellHeader header; - header.payloadSize = e.mem_obj->swap_hdr_sz + expectedReplySize; - const int64_t payloadEnd = sizeof(DbCellHeader) + header.payloadSize; - assert(payloadEnd <= max_objsize); - sfileno filen; - Ipc::StoreMapSlot *const slot = + Ipc::StoreMapAnchor *const slot = map->openForWriting(reinterpret_cast(e.key), filen); if (!slot) { debugs(47, 5, HERE << "map->add failed"); return NULL; } - e.swap_file_sz = header.payloadSize; // and will be copied to the map + + assert(filen >= 0); slot->set(e); - map->extras(filen) = header; // XXX: We rely on our caller, storeSwapOutStart(), to set e.fileno. // If that does not happen, the entry will not decrement the read level! - IoState *sio = new IoState(this, &e, cbFile, cbIo, data); + Rock::SwapDir::Pointer self(this); + IoState *sio = new IoState(self, &e, cbFile, cbIo, data); sio->swap_dirn = index; sio->swap_filen = filen; - sio->payloadEnd = payloadEnd; - sio->diskOffset = diskOffset(sio->swap_filen); + sio->writeableAnchor_ = slot; debugs(47,5, HERE << "dir " << index << " created new filen " << std::setfill('0') << std::hex << std::uppercase << std::setw(8) << - sio->swap_filen << std::dec << " at " << sio->diskOffset); - - assert(sio->diskOffset + payloadEnd <= diskOffsetLimit()); + sio->swap_filen << std::dec << " starting at " << + diskOffset(sio->swap_filen)); sio->file(theFile); @@ -560,7 +637,14 @@ Rock::SwapDir::diskOffset(int filen) const { assert(filen >= 0); - return HeaderSize + max_objsize*filen; + return HeaderSize + slotSize*filen; +} + +int64_t +Rock::SwapDir::diskOffset(Ipc::Mem::PageId &pageId) const +{ + assert(pageId); + return diskOffset(pageId.number - 1); } int64_t @@ -570,7 +654,63 @@ return diskOffset(map->entryLimit()); } -// tries to open an old or being-written-to entry with swap_filen for reading +int +Rock::SwapDir::entryMaxPayloadSize() const +{ + return slotSize - sizeof(DbCellHeader); +} + +int +Rock::SwapDir::entriesNeeded(const int64_t objSize) const +{ + return (objSize + entryMaxPayloadSize() - 1) / entryMaxPayloadSize(); +} + +bool +Rock::SwapDir::useFreeSlot(Ipc::Mem::PageId &pageId) +{ + if (freeSlots->pop(pageId)) { + debugs(47, 5, "got a previously free slot: " << pageId); + return true; + } + + // catch free slots delivered to noteFreeMapSlice() + assert(!waitingForPage); + waitingForPage = &pageId; + if (map->purgeOne()) { + assert(!waitingForPage); // noteFreeMapSlice() should have cleared it + assert(pageId.set()); + debugs(47, 5, "got a previously busy slot: " << pageId); + return true; + } + assert(waitingForPage == &pageId); + waitingForPage = NULL; + + debugs(47, 3, "cannot get a slot; entries: " << map->entryCount()); + return false; +} + +bool +Rock::SwapDir::validSlotId(const SlotId slotId) const +{ + return 0 <= slotId && slotId < entryLimitAllowed(); +} + +void +Rock::SwapDir::noteFreeMapSlice(const sfileno sliceId) +{ + Ipc::Mem::PageId pageId; + pageId.pool = index+1; + pageId.number = sliceId+1; + if (waitingForPage) { + *waitingForPage = pageId; + waitingForPage = NULL; + } else { + freeSlots->push(pageId); + } +} + +// tries to open an old entry with swap_filen for reading StoreIOState::Pointer Rock::SwapDir::openStoreIO(StoreEntry &e, StoreIOState::STFNCB *cbFile, StoreIOState::STIOCB *cbIo, void *data) { @@ -594,29 +734,28 @@ // The are two ways an entry can get swap_filen: our get() locked it for // reading or our storeSwapOutStart() locked it for writing. Peeking at our - // locked entry is safe, but no support for reading a filling entry. - const Ipc::StoreMapSlot *slot = map->peekAtReader(e.swap_filen); + // locked entry is safe, but no support for reading the entry we swap out. + const Ipc::StoreMapAnchor *slot = map->peekAtReader(e.swap_filen); if (!slot) return NULL; // we were writing afterall - IoState *sio = new IoState(this, &e, cbFile, cbIo, data); + Rock::SwapDir::Pointer self(this); + IoState *sio = new IoState(self, &e, cbFile, cbIo, data); sio->swap_dirn = index; sio->swap_filen = e.swap_filen; - sio->payloadEnd = sizeof(DbCellHeader) + map->extras(e.swap_filen).payloadSize; - assert(sio->payloadEnd <= max_objsize); // the payload fits the slot + sio->readableAnchor_ = slot; + sio->file(theFile); debugs(47,5, HERE << "dir " << index << " has old filen: " << std::setfill('0') << std::hex << std::uppercase << std::setw(8) << sio->swap_filen); - assert(slot->basics.swap_file_sz > 0); - assert(slot->basics.swap_file_sz == e.swap_file_sz); - - sio->diskOffset = diskOffset(sio->swap_filen); - assert(sio->diskOffset + sio->payloadEnd <= diskOffsetLimit()); - - sio->file(theFile); + assert(slot->sameKey(static_cast(e.key))); + // For collapsed disk hits: e.swap_file_sz and slot->basics.swap_file_sz + // may still be zero and basics.swap_file_sz may grow. + assert(slot->basics.swap_file_sz >= e.swap_file_sz); + return sio; } @@ -652,14 +791,8 @@ if (errflag == DISK_OK && rlen > 0) sio->offset_ += rlen; - assert(sio->diskOffset + sio->offset_ <= diskOffsetLimit()); // post-factum - StoreIOState::STRCB *callb = sio->read.callback; - assert(callb); - sio->read.callback = NULL; - void *cbdata; - if (cbdataReferenceValidDone(sio->read.callback_data, &cbdata)) - callb(cbdata, r->buf, rlen, sio.getRaw()); + sio->callReaderBack(r->buf, rlen); } void @@ -670,25 +803,62 @@ assert(request->sio != NULL); IoState &sio = *request->sio; + // quit if somebody called IoState::close() while we were waiting + if (!sio.stillWaiting()) { + debugs(79, 3, "ignoring closed entry " << sio.swap_filen); + noteFreeMapSlice(request->sidNext); + return; + } + + // TODO: Fail if disk dropped one of the previous write requests. + if (errflag == DISK_OK) { - // close, assuming we only write once; the entry gets the read lock - map->closeForWriting(sio.swap_filen, true); // do not increment sio.offset_ because we do it in sio->write() + + // finalize the shared slice info after writing slice contents to disk + Ipc::StoreMap::Slice &slice = + map->writeableSlice(sio.swap_filen, request->sidCurrent); + slice.size = request->len - sizeof(DbCellHeader); + slice.next = request->sidNext; + + if (request->eof) { + assert(sio.e); + assert(sio.writeableAnchor_); + sio.e->swap_file_sz = sio.writeableAnchor_->basics.swap_file_sz = + sio.offset_; + + // close, the entry gets the read lock + map->closeForWriting(sio.swap_filen, true); + sio.writeableAnchor_ = NULL; + sio.finishedWriting(errflag); + } } else { - // Do not abortWriting here. The entry should keep the write lock - // instead of losing association with the store and confusing core. - map->free(sio.swap_filen); // will mark as unusable, just in case + noteFreeMapSlice(request->sidNext); + + writeError(*sio.e); + sio.finishedWriting(errflag); + // and hope that Core will call disconnect() to close the map entry } - assert(sio.diskOffset + sio.offset_ <= diskOffsetLimit()); // post-factum - - sio.finishedWriting(errflag); + CollapsedForwarding::Broadcast(*sio.e); +} + +void +Rock::SwapDir::writeError(StoreEntry &e) +{ + // Do not abortWriting here. The entry should keep the write lock + // instead of losing association with the store and confusing core. + map->freeEntry(e.swap_filen); // will mark as unusable, just in case + + Store::Root().transientsAbandon(e); + + // All callers must also call IoState callback, to propagate the error. } bool Rock::SwapDir::full() const { - return map && map->full(); + return freeSlots != NULL && !freeSlots->size(); } // storeSwapOutFileClosed calls this nethod on DISK_NO_SPACE_LEFT, @@ -704,49 +874,10 @@ void Rock::SwapDir::maintain() { - debugs(47,3, HERE << "cache_dir[" << index << "] guards: " << - !repl << !map << !full() << StoreController::store_dirs_rebuilding); - - if (!repl) - return; // no means (cannot find a victim) - - if (!map) - return; // no victims (yet) - - if (!full()) - return; // no need (to find a victim) - - // XXX: UFSSwapDir::maintain says we must quit during rebuild - if (StoreController::store_dirs_rebuilding) - return; - - debugs(47,3, HERE << "cache_dir[" << index << "] state: " << map->full() << - ' ' << currentSize() << " < " << diskOffsetLimit()); - - // Hopefully, we find a removable entry much sooner (TODO: use time?) - const int maxProbed = 10000; - RemovalPurgeWalker *walker = repl->PurgeInit(repl, maxProbed); - - // It really should not take that long, but this will stop "infinite" loops - const int maxFreed = 1000; - int freed = 0; - // TODO: should we purge more than needed to minimize overheads? - for (; freed < maxFreed && full(); ++freed) { - if (StoreEntry *e = walker->Next(walker)) - e->release(); // will call our unlink() method - else - break; // no more objects - } - - debugs(47,2, HERE << "Rock cache_dir[" << index << "] freed " << freed << - " scanned " << walker->scanned << '/' << walker->locked); - - walker->Done(walker); - - if (full()) { - debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir[" << index << "] " << - "is still full after freeing " << freed << " entries. A bug?"); - } + // The Store calls this to free some db space, but there is nothing wrong + // with a full() db, except when db has to shrink after reconfigure, and + // we do not support shrinking yet (it would have to purge specific slots). + // TODO: Disable maintain() requests when they are pointless. } void @@ -780,11 +911,18 @@ { debugs(47, 5, HERE << e); ignoreReferences(e); - map->free(e.swap_filen); + map->freeEntry(e.swap_filen); disconnect(e); } void +Rock::SwapDir::markForUnlink(StoreEntry &e) +{ + debugs(47, 5, e); + map->freeEntry(e.swap_filen); +} + +void Rock::SwapDir::trackReferences(StoreEntry &e) { debugs(47, 5, HERE << e); @@ -817,6 +955,12 @@ storeAppendPrintf(&e, "Current entries: %9d %.2f%%\n", entryCount, (100.0 * entryCount / limit)); + const unsigned int slotsFree = !freeSlots ? 0 : freeSlots->size(); + if (slotsFree <= static_cast(limit)) { + const int usedSlots = limit - static_cast(slotsFree); + storeAppendPrintf(&e, "Used slots: %9d %.2f%%\n", + usedSlots, (100.0 * usedSlots / limit)); + } if (limit < 100) { // XXX: otherwise too expensive to count Ipc::ReadWriteLockStats stats; map->updateStats(stats); @@ -840,6 +984,24 @@ } +const char * +Rock::SwapDir::inodeMapPath() const +{ + static String inodesPath; + inodesPath = path; + inodesPath.append("_inodes"); + return inodesPath.termedBuf(); +} + +const char * +Rock::SwapDir::freeSlotsPath() const +{ + static String spacesPath; + spacesPath = path; + spacesPath.append("_spaces"); + return spacesPath.termedBuf(); +} + namespace Rock { RunnerRegistrationEntry(rrAfterConfig, SwapDirRr); @@ -847,18 +1009,36 @@ void Rock::SwapDirRr::create(const RunnerRegistry &) { - Must(owners.empty()); + Must(mapOwners.empty() && freeSlotsOwners.empty()); for (int i = 0; i < Config.cacheSwap.n_configured; ++i) { if (const Rock::SwapDir *const sd = dynamic_cast(INDEXSD(i))) { - Rock::SwapDir::DirMap::Owner *const owner = - Rock::SwapDir::DirMap::Init(sd->path, sd->entryLimitAllowed()); - owners.push_back(owner); + const int64_t capacity = sd->entryLimitAllowed(); + + SwapDir::DirMap::Owner *const mapOwner = + SwapDir::DirMap::Init(sd->inodeMapPath(), capacity); + mapOwners.push_back(mapOwner); + + // TODO: somehow remove pool id and counters from PageStack? + Ipc::Mem::Owner *const freeSlotsOwner = + shm_new(Ipc::Mem::PageStack)(sd->freeSlotsPath(), + i+1, capacity, + sizeof(DbCellHeader)); + freeSlotsOwners.push_back(freeSlotsOwner); + + // TODO: add method to initialize PageStack with no free pages + while (true) { + Ipc::Mem::PageId pageId; + if (!freeSlotsOwner->object()->pop(pageId)) + break; + } } } } Rock::SwapDirRr::~SwapDirRr() { - for (size_t i = 0; i < owners.size(); ++i) - delete owners[i]; + for (size_t i = 0; i < mapOwners.size(); ++i) { + delete mapOwners[i]; + delete freeSlotsOwners[i]; + } } === modified file 'src/fs/rock/RockSwapDir.h' --- src/fs/rock/RockSwapDir.h 2013-10-25 00:13:46 +0000 +++ src/fs/rock/RockSwapDir.h 2013-12-31 18:49:41 +0000 @@ -4,6 +4,9 @@ #include "DiskIO/DiskFile.h" #include "DiskIO/IORequestor.h" #include "fs/rock/RockDbCell.h" +#include "fs/rock/RockForward.h" +#include "ipc/mem/Page.h" +#include "ipc/mem/PageStack.h" #include "ipc/StoreMap.h" #include "SwapDir.h" @@ -14,12 +17,13 @@ namespace Rock { -class Rebuild; - /// \ingroup Rock -class SwapDir: public ::SwapDir, public IORequestor +class SwapDir: public ::SwapDir, public IORequestor, public Ipc::StoreMapCleaner { public: + typedef RefCount Pointer; + typedef Ipc::StoreMap DirMap; + SwapDir(); virtual ~SwapDir(); @@ -28,6 +32,7 @@ virtual StoreSearch *search(String const url, HttpRequest *); virtual StoreEntry *get(const cache_key *key); virtual void get(String const, STOREGETCLIENT, void * cbdata); + virtual void markForUnlink(StoreEntry &e); virtual void disconnect(StoreEntry &e); virtual uint64_t currentSize() const; virtual uint64_t currentCount() const; @@ -36,12 +41,35 @@ virtual void create(); virtual void parse(int index, char *path); + // temporary path to the shared memory map of first slots of cached entries + const char *inodeMapPath() const; + // temporary path to the shared memory stack of free slots + const char *freeSlotsPath() const; + int64_t entryLimitHigh() const { return SwapFilenMax; } ///< Core limit int64_t entryLimitAllowed() const; - typedef Ipc::StoreMapWithExtras DirMap; + /// removes a slot from a list of free slots or returns false + bool useFreeSlot(Ipc::Mem::PageId &pageId); + /// whether the given slot ID may point to a slot in this db + bool validSlotId(const SlotId slotId) const; + /// purges one or more entries to make full() false and free some slots + void purgeSome(); + + int64_t diskOffset(Ipc::Mem::PageId &pageId) const; + int64_t diskOffset(int filen) const; + void writeError(StoreEntry &e); + + /* StoreMapCleaner API */ + virtual void noteFreeMapSlice(const sfileno fileno); + + uint64_t slotSize; ///< all db slots are of this size protected: + /* Store API */ + virtual bool anchorCollapsed(StoreEntry &collapsed, bool &inSync); + virtual bool updateCollapsed(StoreEntry &collapsed); + /* protected ::SwapDir API */ virtual bool needsDiskStrand() const; virtual void init(); @@ -70,26 +98,35 @@ void dumpTimeOption(StoreEntry * e) const; bool parseRateOption(char const *option, const char *value, int reconfiguring); void dumpRateOption(StoreEntry * e) const; + bool parseSizeOption(char const *option, const char *value, int reconfiguring); + void dumpSizeOption(StoreEntry * e) const; void rebuild(); ///< starts loading and validating stored entry metadata - ///< used to add entries successfully loaded during rebuild - bool addEntry(const int fileno, const DbCellHeader &header, const StoreEntry &from); bool full() const; ///< no more entries can be stored without purging void trackReferences(StoreEntry &e); ///< add to replacement policy scope void ignoreReferences(StoreEntry &e); ///< delete from repl policy scope - int64_t diskOffset(int filen) const; int64_t diskOffsetLimit() const; int entryLimit() const { return map->entryLimit(); } + int entryMaxPayloadSize() const; + int entriesNeeded(const int64_t objSize) const; + + void anchorEntry(StoreEntry &e, const sfileno filen, const Ipc::StoreMapAnchor &anchor); + bool updateCollapsedWith(StoreEntry &collapsed, const Ipc::StoreMapAnchor &anchor); friend class Rebuild; + friend class IoState; const char *filePath; ///< location of cache storage file inside path/ + DirMap *map; ///< entry key/sfileno to MaxExtras/inode mapping private: + void createError(const char *const msg); + DiskIOStrategy *io; RefCount theFile; ///< cache storage for this cache_dir - DirMap *map; + Ipc::Mem::Pointer freeSlots; ///< all unused slots + Ipc::Mem::PageId *waitingForPage; ///< one-page cache for a "hot" free slot /* configurable options */ DiskFile::Config fileConfig; ///< file-level configuration options @@ -109,7 +146,8 @@ virtual void create(const RunnerRegistry &); private: - Vector owners; + Vector mapOwners; + Vector< Ipc::Mem::Owner *> freeSlotsOwners; }; } // namespace Rock === modified file 'src/fs/ufs/RebuildState.cc' --- src/fs/ufs/RebuildState.cc 2013-05-06 12:19:39 +0000 +++ src/fs/ufs/RebuildState.cc 2013-12-31 18:49:41 +0000 @@ -191,15 +191,25 @@ if (!storeRebuildLoadEntry(fd, sd->index, buf, counts)) return; + const uint64_t expectedSize = sb.st_size > 0 ? + static_cast(sb.st_size) : 0; + StoreEntry tmpe; - const bool loaded = storeRebuildParseEntry(buf, tmpe, key, counts, - (int64_t)sb.st_size); + const bool parsed = storeRebuildParseEntry(buf, tmpe, key, counts, + expectedSize); file_close(fd); --store_open_disk_fd; fd = -1; - if (!loaded) { + bool accepted = parsed && tmpe.swap_file_sz > 0; + if (parsed && !accepted) { + debugs(47, DBG_IMPORTANT, "WARNING: Ignoring ufs cache entry with " << + "unknown size: " << tmpe); + accepted = false; + } + + if (!accepted) { // XXX: shouldn't this be a call to commonUfsUnlink? sd->unlinkFile(filn); // should we unlink in all failure cases? return; === modified file 'src/fs/ufs/UFSStoreState.cc' --- src/fs/ufs/UFSStoreState.cc 2013-10-25 00:13:46 +0000 +++ src/fs/ufs/UFSStoreState.cc 2013-12-06 23:52:26 +0000 @@ -177,7 +177,7 @@ * writes and just do the write directly. But for now we'll keep the * code simpler and always go through the pending_writes queue. */ -void +bool Fs::Ufs::UFSStoreState::write(char const *buf, size_t size, off_t aOffset, FREE * free_func) { debugs(79, 3, "UFSStoreState::write: dirn " << swap_dirn << ", fileno "<< @@ -187,11 +187,12 @@ debugs(79, DBG_IMPORTANT,HERE << "avoid write on theFile with error"); debugs(79, DBG_IMPORTANT,HERE << "calling free_func for " << (void*) buf); free_func((void*)buf); - return; + return false; } queueWrite(buf, size, aOffset, free_func); drainWriteQueue(); + return true; } /* === modified file 'src/fs/ufs/UFSStoreState.h' --- src/fs/ufs/UFSStoreState.h 2013-06-27 15:58:46 +0000 +++ src/fs/ufs/UFSStoreState.h 2013-08-15 22:09:07 +0000 @@ -56,8 +56,9 @@ bool closing; bool reading; bool writing; + /* StoreIOState API */ void read_(char *buf, size_t size, off_t offset, STRCB * callback, void *callback_data); - void write(char const *buf, size_t size, off_t offset, FREE * free_func); + virtual bool write(char const *buf, size_t size, off_t offset, FREE * free_func); protected: virtual void doCloseCallback (int errflag); === modified file 'src/fs/ufs/UFSSwapDir.cc' --- src/fs/ufs/UFSSwapDir.cc 2013-10-25 00:13:46 +0000 +++ src/fs/ufs/UFSSwapDir.cc 2013-12-06 23:52:26 +0000 @@ -754,14 +754,12 @@ e->swap_filen = file_number; e->swap_dirn = index; e->swap_file_sz = swap_file_sz; - e->lock_count = 0; e->lastref = lastref; e->timestamp = timestamp; e->expires = expires; e->lastmod = lastmod; e->refcount = refcount; e->flags = newFlags; - EBIT_SET(e->flags, ENTRY_CACHABLE); EBIT_CLR(e->flags, RELEASE_REQUEST); EBIT_CLR(e->flags, KEY_PRIVATE); e->ping_status = PING_NONE; === modified file 'src/ftp.cc' --- src/ftp.cc 2013-12-29 15:56:02 +0000 +++ src/ftp.cc 2014-01-01 19:20:49 +0000 @@ -3263,7 +3263,7 @@ FtpStateData::completedListing() { assert(entry); - entry->lock(); + entry->lock("FtpStateData"); ErrorState ferr(ERR_DIR_LISTING, Http::scOkay, request); ferr.ftp.listing = &listing; ferr.ftp.cwd_msg = xstrdup(cwd_message.size()? cwd_message.termedBuf() : ""); @@ -3272,7 +3272,7 @@ entry->replaceHttpReply( ferr.BuildHttpReply() ); EBIT_CLR(entry->flags, ENTRY_FWD_HDR_WAIT); entry->flush(); - entry->unlock(); + entry->unlock("FtpStateData"); } /// \ingroup ServerProtocolFTPInternal @@ -3690,7 +3690,7 @@ * Authenticated requests can't be cached. */ e->release(); - } else if (EBIT_TEST(e->flags, ENTRY_CACHABLE) && !getCurrentOffset()) { + } else if (!EBIT_TEST(e->flags, RELEASE_REQUEST) && !getCurrentOffset()) { e->setPublicKey(); } else { e->release(); === modified file 'src/gopher.cc' --- src/gopher.cc 2013-06-07 04:35:25 +0000 +++ src/gopher.cc 2013-08-15 22:09:07 +0000 @@ -173,7 +173,7 @@ return; if (gopherState->entry) { - gopherState->entry->unlock(); + gopherState->entry->unlock("gopherState"); } HTTPMSGUNLOCK(gopherState->req); @@ -945,8 +945,7 @@ CommIoCbPtrFun(gopherSendComplete, gopherState)); Comm::Write(gopherState->serverConn, buf, strlen(buf), call, NULL); - if (EBIT_TEST(gopherState->entry->flags, ENTRY_CACHABLE)) - gopherState->entry->setPublicKey(); /* Make it public */ + gopherState->entry->makePublic(); } /// \ingroup ServerProtocolGopherInternal @@ -962,7 +961,7 @@ gopherState = cbdataAlloc(GopherStateData); gopherState->buf = (char *)memAllocate(MEM_4K_BUF); - entry->lock(); + entry->lock("gopherState"); gopherState->entry = entry; gopherState->fwd = fwd; === modified file 'src/http.cc' --- src/http.cc 2013-11-23 05:21:34 +0000 +++ src/http.cc 2013-12-06 23:52:26 +0000 @@ -188,6 +188,8 @@ serverConnection->close(); } +/// Remove an existing public store entry if the incoming response (to be +/// stored in a currently private entry) is going to invalidate it. static void httpMaybeRemovePublic(StoreEntry * e, Http::StatusCode status) { @@ -195,6 +197,8 @@ int forbidden = 0; StoreEntry *pe; + // If the incoming response already goes into a public entry, then there is + // nothing to remove. This protects ready-for-collapsing entries as well. if (!EBIT_TEST(e->flags, KEY_PRIVATE)) return; @@ -255,7 +259,7 @@ if (e->mem_obj->request) pe = storeGetPublicByRequest(e->mem_obj->request); else - pe = storeGetPublic(e->mem_obj->url, e->mem_obj->method); + pe = storeGetPublic(e->mem_obj->storeId(), e->mem_obj->method); if (pe != NULL) { assert(e != pe); @@ -272,7 +276,7 @@ if (e->mem_obj->request) pe = storeGetPublicByRequestMethod(e->mem_obj->request, Http::METHOD_HEAD); else - pe = storeGetPublic(e->mem_obj->url, Http::METHOD_HEAD); + pe = storeGetPublic(e->mem_obj->storeId(), Http::METHOD_HEAD); if (pe != NULL) { assert(e != pe); @@ -335,12 +339,17 @@ * condition */ #define REFRESH_OVERRIDE(flag) \ - ((R = (R ? R : refreshLimits(entry->mem_obj->url))) , \ + ((R = (R ? R : refreshLimits(entry->mem_obj->storeId()))) , \ (R && R->flags.flag)) #else #define REFRESH_OVERRIDE(flag) 0 #endif + if (EBIT_TEST(entry->flags, RELEASE_REQUEST)) { + debugs(22, 3, "NO because " << *entry << " has been released."); + return 0; + } + // Check for Surrogate/1.0 protocol conditions // NP: reverse-proxy traffic our parent server has instructed us never to cache if (surrogateNoStore) { @@ -700,7 +709,7 @@ /** Creates a blank header. If this routine is made incremental, this will not do */ /* NP: all exit points to this function MUST call ctx_exit(ctx) */ - Ctx ctx = ctx_enter(entry->mem_obj->url); + Ctx ctx = ctx_enter(entry->mem_obj->urlXXX()); debugs(11, 3, "processReplyHeader: key '" << entry->getMD5Text() << "'"); @@ -910,7 +919,7 @@ { ServerStateData::haveParsedReplyHeaders(); - Ctx ctx = ctx_enter(entry->mem_obj->url); + Ctx ctx = ctx_enter(entry->mem_obj->urlXXX()); HttpReply *rep = finalReply(); entry->timestampsSet(); === modified file 'src/icmp/net_db.cc' --- src/icmp/net_db.cc 2013-06-07 04:35:25 +0000 +++ src/icmp/net_db.cc 2013-12-19 04:53:35 +0000 @@ -887,7 +887,7 @@ debugs(38, 3, "netdbExchangeDone: " << ex->e->url() ); HTTPMSGUNLOCK(ex->r); storeUnregister(ex->sc, ex->e, ex); - ex->e->unlock(); + ex->e->unlock("netdbExchangeDone"); cbdataReferenceDone(ex->p); cbdataFree(ex); } === modified file 'src/ipc/Messages.h' --- src/ipc/Messages.h 2012-09-01 14:38:36 +0000 +++ src/ipc/Messages.h 2013-05-29 16:24:49 +0000 @@ -16,6 +16,7 @@ mtStrandSearchRequest, mtStrandSearchResponse, mtSharedListenRequest, mtSharedListenResponse, mtIpcIoNotification, + mtCollapsedForwardingNotification, mtCacheMgrRequest, mtCacheMgrResponse #if SQUID_SNMP , === modified file 'src/ipc/Queue.cc' --- src/ipc/Queue.cc 2012-10-17 00:14:09 +0000 +++ src/ipc/Queue.cc 2013-12-31 18:49:41 +0000 @@ -9,6 +9,8 @@ #include "globals.h" #include "ipc/Queue.h" +#include + /// constructs Metadata ID from parent queue ID static String MetadataId(String id) @@ -121,6 +123,74 @@ return *reinterpret_cast(queue); } +// BaseMultiQueue + +Ipc::BaseMultiQueue::BaseMultiQueue(const int aLocalProcessId): + theLocalProcessId(aLocalProcessId), + theLastPopProcessId(std::numeric_limits::max() - 1) +{ +} + +void +Ipc::BaseMultiQueue::clearReaderSignal(const int remoteProcessId) +{ + QueueReader &reader = localReader(); + debugs(54, 7, "reader: " << reader.id); + + reader.clearSignal(); + + // we got a hint; we could reposition iteration to try popping from the + // remoteProcessId queue first; but it does not seem to help much and might + // introduce some bias so we do not do that for now: + // theLastPopProcessId = remoteProcessId; +} + +const Ipc::QueueReader::Balance & +Ipc::BaseMultiQueue::balance(const int remoteProcessId) const +{ + const QueueReader &r = remoteReader(remoteProcessId); + return r.balance; +} + +const Ipc::QueueReader::Rate & +Ipc::BaseMultiQueue::rateLimit(const int remoteProcessId) const +{ + const QueueReader &r = remoteReader(remoteProcessId); + return r.rateLimit; +} + +Ipc::OneToOneUniQueue & +Ipc::BaseMultiQueue::inQueue(const int remoteProcessId) +{ + const OneToOneUniQueue &queue = + const_cast(this)->inQueue(remoteProcessId); + return const_cast(queue); +} + +Ipc::OneToOneUniQueue & +Ipc::BaseMultiQueue::outQueue(const int remoteProcessId) +{ + const OneToOneUniQueue &queue = + const_cast(this)->outQueue(remoteProcessId); + return const_cast(queue); +} + +Ipc::QueueReader & +Ipc::BaseMultiQueue::localReader() +{ + const QueueReader &reader = + const_cast(this)->localReader(); + return const_cast(reader); +} + +Ipc::QueueReader & +Ipc::BaseMultiQueue::remoteReader(const int remoteProcessId) +{ + const QueueReader &reader = + const_cast(this)->remoteReader(remoteProcessId); + return const_cast(reader); +} + // FewToFewBiQueue Ipc::FewToFewBiQueue::Owner * @@ -130,17 +200,16 @@ } Ipc::FewToFewBiQueue::FewToFewBiQueue(const String &id, const Group aLocalGroup, const int aLocalProcessId): + BaseMultiQueue(aLocalProcessId), metadata(shm_old(Metadata)(MetadataId(id).termedBuf())), queues(shm_old(OneToOneUniQueues)(QueuesId(id).termedBuf())), readers(shm_old(QueueReaders)(ReadersId(id).termedBuf())), - theLocalGroup(aLocalGroup), theLocalProcessId(aLocalProcessId), - theLastPopProcessId(readers->theCapacity) + theLocalGroup(aLocalGroup) { Must(queues->theCapacity == metadata->theGroupASize * metadata->theGroupBSize * 2); Must(readers->theCapacity == metadata->theGroupASize + metadata->theGroupBSize); - const QueueReader &localReader = reader(theLocalGroup, theLocalProcessId); - debugs(54, 7, HERE << "queue " << id << " reader: " << localReader.id); + debugs(54, 7, "queue " << id << " reader: " << localReader().id); } int @@ -185,19 +254,12 @@ return index; } -Ipc::OneToOneUniQueue & -Ipc::FewToFewBiQueue::oneToOneQueue(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId) -{ - return (*queues)[oneToOneQueueIndex(fromGroup, fromProcessId, toGroup, toProcessId)]; -} - const Ipc::OneToOneUniQueue & Ipc::FewToFewBiQueue::oneToOneQueue(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId) const { return (*queues)[oneToOneQueueIndex(fromGroup, fromProcessId, toGroup, toProcessId)]; } -/// incoming queue from a given remote process const Ipc::OneToOneUniQueue & Ipc::FewToFewBiQueue::inQueue(const int remoteProcessId) const { @@ -205,7 +267,6 @@ theLocalGroup, theLocalProcessId); } -/// outgoing queue to a given remote process const Ipc::OneToOneUniQueue & Ipc::FewToFewBiQueue::outQueue(const int remoteProcessId) const { @@ -222,59 +283,30 @@ metadata->theGroupASize + processId - metadata->theGroupBIdOffset; } -Ipc::QueueReader & -Ipc::FewToFewBiQueue::reader(const Group group, const int processId) -{ - return readers->theReaders[readerIndex(group, processId)]; -} - -const Ipc::QueueReader & -Ipc::FewToFewBiQueue::reader(const Group group, const int processId) const -{ - return readers->theReaders[readerIndex(group, processId)]; -} - -void -Ipc::FewToFewBiQueue::clearReaderSignal(const int remoteProcessId) -{ - QueueReader &localReader = reader(theLocalGroup, theLocalProcessId); - debugs(54, 7, HERE << "reader: " << localReader.id); - - Must(validProcessId(remoteGroup(), remoteProcessId)); - localReader.clearSignal(); - - // we got a hint; we could reposition iteration to try popping from the - // remoteProcessId queue first; but it does not seem to help much and might - // introduce some bias so we do not do that for now: - // theLastPopProcessId = remoteProcessId; -} - -Ipc::QueueReader::Balance & -Ipc::FewToFewBiQueue::localBalance() -{ - QueueReader &r = reader(theLocalGroup, theLocalProcessId); - return r.balance; -} - -const Ipc::QueueReader::Balance & -Ipc::FewToFewBiQueue::balance(const int remoteProcessId) const -{ - const QueueReader &r = reader(remoteGroup(), remoteProcessId); - return r.balance; -} - -Ipc::QueueReader::Rate & -Ipc::FewToFewBiQueue::localRateLimit() -{ - QueueReader &r = reader(theLocalGroup, theLocalProcessId); - return r.rateLimit; -} - -const Ipc::QueueReader::Rate & -Ipc::FewToFewBiQueue::rateLimit(const int remoteProcessId) const -{ - const QueueReader &r = reader(remoteGroup(), remoteProcessId); - return r.rateLimit; +const Ipc::QueueReader & +Ipc::FewToFewBiQueue::localReader() const +{ + return readers->theReaders[readerIndex(theLocalGroup, theLocalProcessId)]; +} + +const Ipc::QueueReader & +Ipc::FewToFewBiQueue::remoteReader(const int processId) const +{ + return readers->theReaders[readerIndex(remoteGroup(), processId)]; +} + +int +Ipc::FewToFewBiQueue::remotesCount() const +{ + return theLocalGroup == groupA ? metadata->theGroupBSize : + metadata->theGroupASize; +} + +int +Ipc::FewToFewBiQueue::remotesIdOffset() const +{ + return theLocalGroup == groupA ? metadata->theGroupBIdOffset : + metadata->theGroupAIdOffset; } Ipc::FewToFewBiQueue::Metadata::Metadata(const int aGroupASize, const int aGroupAIdOffset, const int aGroupBSize, const int aGroupBIdOffset): @@ -298,3 +330,105 @@ delete queuesOwner; delete readersOwner; } + +// MultiQueue + +Ipc::MultiQueue::Owner * +Ipc::MultiQueue::Init(const String &id, const int processCount, const int processIdOffset, const unsigned int maxItemSize, const int capacity) +{ + return new Owner(id, processCount, processIdOffset, maxItemSize, capacity); +} + +Ipc::MultiQueue::MultiQueue(const String &id, const int localProcessId): + BaseMultiQueue(localProcessId), + metadata(shm_old(Metadata)(MetadataId(id).termedBuf())), + queues(shm_old(OneToOneUniQueues)(QueuesId(id).termedBuf())), + readers(shm_old(QueueReaders)(ReadersId(id).termedBuf())) +{ + Must(queues->theCapacity == metadata->theProcessCount * metadata->theProcessCount); + Must(readers->theCapacity == metadata->theProcessCount); + + debugs(54, 7, "queue " << id << " reader: " << localReader().id); +} + +bool +Ipc::MultiQueue::validProcessId(const int processId) const +{ + return metadata->theProcessIdOffset <= processId && + processId < metadata->theProcessIdOffset + metadata->theProcessCount; +} + +const Ipc::OneToOneUniQueue & +Ipc::MultiQueue::oneToOneQueue(const int fromProcessId, const int toProcessId) const +{ + assert(validProcessId(fromProcessId)); + assert(validProcessId(toProcessId)); + const int fromIndex = fromProcessId - metadata->theProcessIdOffset; + const int toIndex = toProcessId - metadata->theProcessIdOffset; + const int index = fromIndex * metadata->theProcessCount + toIndex; + return (*queues)[index]; +} + +const Ipc::QueueReader & +Ipc::MultiQueue::reader(const int processId) const +{ + assert(validProcessId(processId)); + const int index = processId - metadata->theProcessIdOffset; + return readers->theReaders[index]; +} + +const Ipc::OneToOneUniQueue & +Ipc::MultiQueue::inQueue(const int remoteProcessId) const +{ + return oneToOneQueue(remoteProcessId, theLocalProcessId); +} + +const Ipc::OneToOneUniQueue & +Ipc::MultiQueue::outQueue(const int remoteProcessId) const +{ + return oneToOneQueue(theLocalProcessId, remoteProcessId); +} + +const Ipc::QueueReader & +Ipc::MultiQueue::localReader() const +{ + return reader(theLocalProcessId); +} + +const Ipc::QueueReader & +Ipc::MultiQueue::remoteReader(const int processId) const +{ + return reader(processId); +} + +int +Ipc::MultiQueue::remotesCount() const +{ + return metadata->theProcessCount; +} + +int +Ipc::MultiQueue::remotesIdOffset() const +{ + return metadata->theProcessIdOffset; +} + +Ipc::MultiQueue::Metadata::Metadata(const int aProcessCount, const int aProcessIdOffset): + theProcessCount(aProcessCount), theProcessIdOffset(aProcessIdOffset) +{ + Must(theProcessCount > 0); +} + +Ipc::MultiQueue::Owner::Owner(const String &id, const int processCount, const int processIdOffset, const unsigned int maxItemSize, const int capacity): + metadataOwner(shm_new(Metadata)(MetadataId(id).termedBuf(), processCount, processIdOffset)), + queuesOwner(shm_new(OneToOneUniQueues)(QueuesId(id).termedBuf(), processCount*processCount, maxItemSize, capacity)), + readersOwner(shm_new(QueueReaders)(ReadersId(id).termedBuf(), processCount)) +{ +} + +Ipc::MultiQueue::Owner::~Owner() +{ + delete metadataOwner; + delete queuesOwner; + delete readersOwner; +} === modified file 'src/ipc/Queue.h' --- src/ipc/Queue.h 2013-10-25 00:13:46 +0000 +++ src/ipc/Queue.h 2013-12-06 23:52:26 +0000 @@ -141,6 +141,70 @@ }; /** + * Base class for lockless fixed-capacity bidirectional queues for a + * limited number processes. + */ +class BaseMultiQueue +{ +public: + BaseMultiQueue(const int aLocalProcessId); + + /// clears the reader notification received by the local process from the remote process + void clearReaderSignal(const int remoteProcessId); + + /// picks a process and calls OneToOneUniQueue::pop() using its queue + template bool pop(int &remoteProcessId, Value &value); + + /// calls OneToOneUniQueue::push() using the given process queue + template bool push(const int remoteProcessId, const Value &value); + + /// peeks at the item likely to be pop()ed next + template bool peek(int &remoteProcessId, Value &value) const; + + /// returns local reader's balance + QueueReader::Balance &localBalance() { return localReader().balance; } + + /// returns reader's balance for a given remote process + const QueueReader::Balance &balance(const int remoteProcessId) const; + + /// returns local reader's rate limit + QueueReader::Rate &localRateLimit() { return localReader().rateLimit; } + + /// returns reader's rate limit for a given remote process + const QueueReader::Rate &rateLimit(const int remoteProcessId) const; + + /// number of items in incoming queue from a given remote process + int inSize(const int remoteProcessId) const { return inQueue(remoteProcessId).size(); } + + /// number of items in outgoing queue to a given remote process + int outSize(const int remoteProcessId) const { return outQueue(remoteProcessId).size(); } + +protected: + /// incoming queue from a given remote process + virtual const OneToOneUniQueue &inQueue(const int remoteProcessId) const = 0; + OneToOneUniQueue &inQueue(const int remoteProcessId); + + /// outgoing queue to a given remote process + virtual const OneToOneUniQueue &outQueue(const int remoteProcessId) const = 0; + OneToOneUniQueue &outQueue(const int remoteProcessId); + + virtual const QueueReader &localReader() const = 0; + QueueReader &localReader(); + + virtual const QueueReader &remoteReader(const int remoteProcessId) const = 0; + QueueReader &remoteReader(const int remoteProcessId); + + virtual int remotesCount() const = 0; + virtual int remotesIdOffset() const = 0; + +protected: + const int theLocalProcessId; ///< process ID of this queue + +private: + int theLastPopProcessId; ///< the ID of the last process we tried to pop() from +}; + +/** * Lockless fixed-capacity bidirectional queue for a limited number * processes. Allows communication between two groups of processes: * any process in one group may send data to and receive from any @@ -148,7 +212,7 @@ * communicate. Process in each group has a unique integer ID in * [groupIdOffset, groupIdOffset + groupSize) range. */ -class FewToFewBiQueue +class FewToFewBiQueue: public BaseMultiQueue { public: typedef OneToOneUniQueue::Full Full; @@ -188,55 +252,25 @@ /// maximum number of items in the queue static int MaxItemsCount(const int groupASize, const int groupBSize, const int capacity); - Group localGroup() const { return theLocalGroup; } - Group remoteGroup() const { return theLocalGroup == groupA ? groupB : groupA; } - - /// clears the reader notification received by the local process from the remote process - void clearReaderSignal(const int remoteProcessId); - - /// picks a process and calls OneToOneUniQueue::pop() using its queue - template bool pop(int &remoteProcessId, Value &value); - - /// calls OneToOneUniQueue::push() using the given process queue - template bool push(const int remoteProcessId, const Value &value); - /// finds the oldest item in incoming and outgoing queues between /// us and the given remote process template bool findOldest(const int remoteProcessId, Value &value) const; - /// peeks at the item likely to be pop()ed next - template bool peek(int &remoteProcessId, Value &value) const; - - /// returns local reader's balance - QueueReader::Balance &localBalance(); - - /// returns reader's balance for a given remote process - const QueueReader::Balance &balance(const int remoteProcessId) const; - - /// returns local reader's rate limit - QueueReader::Rate &localRateLimit(); - - /// returns reader's rate limit for a given remote process - const QueueReader::Rate &rateLimit(const int remoteProcessId) const; - - /// number of items in incoming queue from a given remote process - int inSize(const int remoteProcessId) const { return inQueue(remoteProcessId).size(); } - - /// number of items in outgoing queue to a given remote process - int outSize(const int remoteProcessId) const { return outQueue(remoteProcessId).size(); } +protected: + virtual const OneToOneUniQueue &inQueue(const int remoteProcessId) const; + virtual const OneToOneUniQueue &outQueue(const int remoteProcessId) const; + virtual const QueueReader &localReader() const; + virtual const QueueReader &remoteReader(const int processId) const; + virtual int remotesCount() const; + virtual int remotesIdOffset() const; private: bool validProcessId(const Group group, const int processId) const; int oneToOneQueueIndex(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId) const; const OneToOneUniQueue &oneToOneQueue(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId) const; - OneToOneUniQueue &oneToOneQueue(const Group fromGroup, const int fromProcessId, const Group toGroup, const int toProcessId); - const OneToOneUniQueue &inQueue(const int remoteProcessId) const; - const OneToOneUniQueue &outQueue(const int remoteProcessId) const; - QueueReader &reader(const Group group, const int processId); - const QueueReader &reader(const Group group, const int processId) const; int readerIndex(const Group group, const int processId) const; - int remoteGroupSize() const { return theLocalGroup == groupA ? metadata->theGroupBSize : metadata->theGroupASize; } - int remoteGroupIdOffset() const { return theLocalGroup == groupA ? metadata->theGroupBIdOffset : metadata->theGroupAIdOffset; } + Group localGroup() const { return theLocalGroup; } + Group remoteGroup() const { return theLocalGroup == groupA ? groupB : groupA; } private: const Mem::Pointer metadata; ///< shared metadata @@ -244,8 +278,65 @@ const Mem::Pointer readers; ///< readers array const Group theLocalGroup; ///< group of this queue - const int theLocalProcessId; ///< process ID of this queue - int theLastPopProcessId; ///< the ID of the last process we tried to pop() from +}; + +/** + * Lockless fixed-capacity bidirectional queue for a limited number + * processes. Any process may send data to and receive from any other + * process (including itself). Each process has a unique integer ID in + * [processIdOffset, processIdOffset + processCount) range. + */ +class MultiQueue: public BaseMultiQueue +{ +public: + typedef OneToOneUniQueue::Full Full; + typedef OneToOneUniQueue::ItemTooLarge ItemTooLarge; + +private: + /// Shared metadata for MultiQueue + struct Metadata { + Metadata(const int aProcessCount, const int aProcessIdOffset); + size_t sharedMemorySize() const { return sizeof(*this); } + static size_t SharedMemorySize(const int, const int) { return sizeof(Metadata); } + + const int theProcessCount; + const int theProcessIdOffset; + }; + +public: + class Owner + { + public: + Owner(const String &id, const int processCount, const int processIdOffset, const unsigned int maxItemSize, const int capacity); + ~Owner(); + + private: + Mem::Owner *const metadataOwner; + Mem::Owner *const queuesOwner; + Mem::Owner *const readersOwner; + }; + + static Owner *Init(const String &id, const int processCount, const int processIdOffset, const unsigned int maxItemSize, const int capacity); + + MultiQueue(const String &id, const int localProcessId); + +protected: + virtual const OneToOneUniQueue &inQueue(const int remoteProcessId) const; + virtual const OneToOneUniQueue &outQueue(const int remoteProcessId) const; + virtual const QueueReader &localReader() const; + virtual const QueueReader &remoteReader(const int remoteProcessId) const; + virtual int remotesCount() const; + virtual int remotesIdOffset() const; + +private: + bool validProcessId(const int processId) const; + const OneToOneUniQueue &oneToOneQueue(const int fromProcessId, const int toProcessId) const; + const QueueReader &reader(const int processId) const; + +private: + const Mem::Pointer metadata; ///< shared metadata + const Mem::Pointer queues; ///< unidirection one-to-one queues + const Mem::Pointer readers; ///< readers array }; // OneToOneUniQueue @@ -306,10 +397,9 @@ if (full()) throw Full(); - const bool wasEmpty = empty(); const unsigned int pos = theIn++ % theCapacity * theMaxItemSize; memcpy(theBuffer + pos, &value, sizeof(value)); - ++theSize; + const bool wasEmpty = !theSize++; return wasEmpty && (!reader || reader->raiseSignal()); } @@ -330,19 +420,18 @@ return *reinterpret_cast(queue); } -// FewToFewBiQueue +// BaseMultiQueue template bool -FewToFewBiQueue::pop(int &remoteProcessId, Value &value) +BaseMultiQueue::pop(int &remoteProcessId, Value &value) { - // iterate all remote group processes, starting after the one we visited last - QueueReader &localReader = reader(theLocalGroup, theLocalProcessId); - for (int i = 0; i < remoteGroupSize(); ++i) { - if (++theLastPopProcessId >= remoteGroupIdOffset() + remoteGroupSize()) - theLastPopProcessId = remoteGroupIdOffset(); - OneToOneUniQueue &queue = oneToOneQueue(remoteGroup(), theLastPopProcessId, theLocalGroup, theLocalProcessId); - if (queue.pop(value, &localReader)) { + // iterate all remote processes, starting after the one we visited last + for (int i = 0; i < remotesCount(); ++i) { + if (++theLastPopProcessId >= remotesIdOffset() + remotesCount()) + theLastPopProcessId = remotesIdOffset(); + OneToOneUniQueue &queue = inQueue(theLastPopProcessId); + if (queue.pop(value, &localReader())) { remoteProcessId = theLastPopProcessId; debugs(54, 7, HERE << "popped from " << remoteProcessId << " to " << theLocalProcessId << " at " << queue.size()); return true; @@ -353,13 +442,33 @@ template bool -FewToFewBiQueue::push(const int remoteProcessId, const Value &value) +BaseMultiQueue::push(const int remoteProcessId, const Value &value) { - OneToOneUniQueue &remoteQueue = oneToOneQueue(theLocalGroup, theLocalProcessId, remoteGroup(), remoteProcessId); - QueueReader &remoteReader = reader(remoteGroup(), remoteProcessId); + OneToOneUniQueue &remoteQueue = outQueue(remoteProcessId); + QueueReader &reader = remoteReader(remoteProcessId); debugs(54, 7, HERE << "pushing from " << theLocalProcessId << " to " << remoteProcessId << " at " << remoteQueue.size()); - return remoteQueue.push(value, &remoteReader); -} + return remoteQueue.push(value, &reader); +} + +template +bool +BaseMultiQueue::peek(int &remoteProcessId, Value &value) const +{ + // mimic FewToFewBiQueue::pop() but quit just before popping + int popProcessId = theLastPopProcessId; // preserve for future pop() + for (int i = 0; i < remotesCount(); ++i) { + if (++popProcessId >= remotesIdOffset() + remotesCount()) + popProcessId = remotesIdOffset(); + const OneToOneUniQueue &queue = inQueue(popProcessId); + if (queue.peek(value)) { + remoteProcessId = popProcessId; + return true; + } + } + return false; // most likely, no process had anything to pop +} + +// FewToFewBiQueue template bool @@ -383,26 +492,6 @@ return out.peek(value); } -template -bool -FewToFewBiQueue::peek(int &remoteProcessId, Value &value) const -{ - // mimic FewToFewBiQueue::pop() but quit just before popping - int popProcessId = theLastPopProcessId; // preserve for future pop() - for (int i = 0; i < remoteGroupSize(); ++i) { - if (++popProcessId >= remoteGroupIdOffset() + remoteGroupSize()) - popProcessId = remoteGroupIdOffset(); - const OneToOneUniQueue &queue = - oneToOneQueue(remoteGroup(), popProcessId, - theLocalGroup, theLocalProcessId); - if (queue.peek(value)) { - remoteProcessId = popProcessId; - return true; - } - } - return false; // most likely, no process had anything to pop -} - } // namespace Ipc #endif // SQUID_IPC_QUEUE_H === modified file 'src/ipc/ReadWriteLock.cc' --- src/ipc/ReadWriteLock.cc 2013-10-25 00:13:46 +0000 +++ src/ipc/ReadWriteLock.cc 2013-12-06 23:52:26 +0000 @@ -9,52 +9,71 @@ bool Ipc::ReadWriteLock::lockShared() { - ++readers; // this locks "new" writers out - if (!writers) // there are no old writers + ++readLevel; // this locks "new" writers out + if (!writeLevel || appending) { // nobody is writing, or sharing is OK + ++readers; return true; - --readers; + } + --readLevel; return false; } bool Ipc::ReadWriteLock::lockExclusive() { - if (!writers++) { // we are the first writer + this locks "new" readers out - if (!readers) // there are no old readers + if (!writeLevel++) { // we are the first writer + lock "new" readers out + if (!readLevel) { // no old readers and nobody is becoming one + writing = true; return true; + } } - --writers; + --writeLevel; return false; } void Ipc::ReadWriteLock::unlockShared() { - assert(readers-- > 0); + assert(readers > 0); + --readers; + --readLevel; } void Ipc::ReadWriteLock::unlockExclusive() { - assert(writers-- > 0); + assert(writing); + appending = false; + writing = false; + --writeLevel; } void Ipc::ReadWriteLock::switchExclusiveToShared() { - ++readers; // must be done before we release exclusive control + assert(writing); + ++readLevel; // must be done before we release exclusive control + ++readers; unlockExclusive(); } void +Ipc::ReadWriteLock::startAppending() +{ + assert(writing); + appending = true; +} + +void Ipc::ReadWriteLock::updateStats(ReadWriteLockStats &stats) const { if (readers) { ++stats.readable; stats.readers += readers; - } else if (writers) { + } else if (writing) { ++stats.writeable; - stats.writers += writers; + ++stats.writers; + stats.appenders += appending; } else { ++stats.idle; } @@ -87,7 +106,9 @@ const int locked = readers + writers; storeAppendPrintf(&e, "Readers: %9d %6.2f%%\n", readers, (100.0 * readers / locked)); - storeAppendPrintf(&e, "Writers: %9d %6.2f%%\n", - writers, (100.0 * writers / locked)); + const double appPerc = writers ? (100.0 * appenders / writers) : 0.0; + storeAppendPrintf(&e, "Writers: %9d %6.2f%% including Appenders: %9d %6.2f%%\n", + writers, (100.0 * writers / locked), + appenders, appPerc); } } === modified file 'src/ipc/ReadWriteLock.h' --- src/ipc/ReadWriteLock.h 2012-08-28 13:00:30 +0000 +++ src/ipc/ReadWriteLock.h 2013-06-21 22:04:04 +0000 @@ -11,6 +11,9 @@ class ReadWriteLockStats; /// an atomic readers-writer or shared-exclusive lock suitable for maps/tables +/// Also supports reading-while-appending mode when readers and writer are +/// allowed to access the same locked object because the writer promisses +/// to only append new data and all size-related object properties are atomic. class ReadWriteLock { public: @@ -22,12 +25,19 @@ void unlockExclusive(); ///< undo successful exclusiveLock() void switchExclusiveToShared(); ///< stop writing, start reading + void startAppending(); ///< writer keeps its lock but also allows reading + /// adds approximate current stats to the supplied ones void updateStats(ReadWriteLockStats &stats) const; public: - mutable Atomic::Word readers; ///< number of users trying to read - Atomic::Word writers; ///< number of writers trying to modify protected data + mutable Atomic::Word readers; ///< number of reading users + Atomic::Word writing; ///< there is a writing user (there can be at most 1) + Atomic::Word appending; ///< the writer has promissed to only append + +private: + mutable Atomic::Word readLevel; ///< number of users reading (or trying to) + Atomic::Word writeLevel; ///< number of users writing (or trying to write) }; /// approximate stats of a set of ReadWriteLocks @@ -44,6 +54,7 @@ int idle; ///< number of unlocked locks int readers; ///< sum of lock.readers int writers; ///< sum of lock.writers + int appenders; ///< number of appending writers }; } // namespace Ipc === modified file 'src/ipc/StoreMap.cc' --- src/ipc/StoreMap.cc 2013-10-25 00:13:46 +0000 +++ src/ipc/StoreMap.cc 2013-12-31 18:49:41 +0000 @@ -31,178 +31,354 @@ shared->limit); } -Ipc::StoreMap::Slot * +int +Ipc::StoreMap::compareVersions(const sfileno fileno, time_t newVersion) const +{ + assert(valid(fileno)); + Anchor &inode = shared->slots[fileno].anchor; + + // note: we do not lock, so comparison may be inacurate + + if (inode.empty()) + return +2; + + if (const time_t diff = newVersion - inode.basics.timestamp) + return diff < 0 ? -1 : +1; + + return 0; +} + +void +Ipc::StoreMap::forgetWritingEntry(sfileno fileno) +{ + assert(valid(fileno)); + Anchor &inode = shared->slots[fileno].anchor; + + assert(inode.writing()); + + // we do not iterate slices because we were told to forget about + // them; the caller is responsible for freeing them (most likely + // our slice list is incomplete or has holes) + + inode.waitingToBeFreed = false; + inode.rewind(); + + inode.lock.unlockExclusive(); + --shared->count; + + debugs(54, 8, "closed entry " << fileno << " for writing " << path); +} + +Ipc::StoreMap::Anchor * Ipc::StoreMap::openForWriting(const cache_key *const key, sfileno &fileno) { - debugs(54, 5, HERE << " trying to open slot for key " << storeKeyText(key) - << " for writing in map [" << path << ']'); - const int idx = slotIndexByKey(key); - - Slot &s = shared->slots[idx]; + debugs(54, 5, "opening entry with key " << storeKeyText(key) + << " for writing " << path); + const int idx = anchorIndexByKey(key); + + if (Anchor *anchor = openForWritingAt(idx)) { + fileno = idx; + return anchor; + } + + return NULL; +} + +Ipc::StoreMap::Anchor * +Ipc::StoreMap::openForWritingAt(const sfileno fileno, bool overwriteExisting) +{ + Anchor &s = shared->slots[fileno].anchor; ReadWriteLock &lock = s.lock; if (lock.lockExclusive()) { - assert(s.state != Slot::Writeable); // until we start breaking locks + assert(s.writing() && !s.reading()); + + // bail if we cannot empty this position + if (!s.waitingToBeFreed && !s.empty() && !overwriteExisting) { + lock.unlockExclusive(); + debugs(54, 5, "cannot open existing entry " << fileno << + " for writing " << path); + return NULL; + } // free if the entry was used, keeping the entry locked - if (s.waitingToBeFreed || s.state == Slot::Readable) - freeLocked(s, true); + if (s.waitingToBeFreed || !s.empty()) + freeChain(fileno, s, true); - assert(s.state == Slot::Empty); + assert(s.empty()); + s.start = -1; // we have not allocated any slices yet ++shared->count; - s.state = Slot::Writeable; - fileno = idx; + //s.setKey(key); // XXX: the caller should do that - debugs(54, 5, HERE << " opened slot at " << idx << - " for writing in map [" << path << ']'); + debugs(54, 5, "opened entry " << fileno << " for writing " << path); return &s; // and keep the entry locked } - debugs(54, 5, HERE << " failed to open slot at " << idx << - " for writing in map [" << path << ']'); + debugs(54, 5, "cannot open busy entry " << fileno << + " for writing " << path); return NULL; } void +Ipc::StoreMap::startAppending(const sfileno fileno) +{ + assert(valid(fileno)); + Anchor &s = shared->slots[fileno].anchor; + assert(s.writing()); + s.lock.startAppending(); + debugs(54, 5, "restricted entry " << fileno << " to appending " << path); +} + +void Ipc::StoreMap::closeForWriting(const sfileno fileno, bool lockForReading) { - debugs(54, 5, HERE << " closing slot at " << fileno << " for writing and " - "openning for reading in map [" << path << ']'); assert(valid(fileno)); - Slot &s = shared->slots[fileno]; - assert(s.state == Slot::Writeable); - s.state = Slot::Readable; - if (lockForReading) + Anchor &s = shared->slots[fileno].anchor; + assert(s.writing()); + if (lockForReading) { s.lock.switchExclusiveToShared(); - else + debugs(54, 5, "switched entry " << fileno << + " from writing to reading " << path); + assert(s.complete()); + } else { s.lock.unlockExclusive(); -} - -/// terminate writing the entry, freeing its slot for others to use + debugs(54, 5, "closed entry " << fileno << " for writing " << path); + // cannot assert completeness here because we have no lock + } +} + +Ipc::StoreMap::Slice & +Ipc::StoreMap::writeableSlice(const AnchorId anchorId, const SliceId sliceId) +{ + assert(valid(anchorId)); + assert(shared->slots[anchorId].anchor.writing()); + assert(valid(sliceId)); + return shared->slots[sliceId].slice; +} + +const Ipc::StoreMap::Slice & +Ipc::StoreMap::readableSlice(const AnchorId anchorId, const SliceId sliceId) const +{ + assert(valid(anchorId)); + assert(shared->slots[anchorId].anchor.reading()); + assert(valid(sliceId)); + return shared->slots[sliceId].slice; +} + +Ipc::StoreMap::Anchor & +Ipc::StoreMap::writeableEntry(const AnchorId anchorId) +{ + assert(valid(anchorId)); + assert(shared->slots[anchorId].anchor.writing()); + return shared->slots[anchorId].anchor; +} + +const Ipc::StoreMap::Anchor & +Ipc::StoreMap::readableEntry(const AnchorId anchorId) const +{ + assert(valid(anchorId)); + assert(shared->slots[anchorId].anchor.reading()); + return shared->slots[anchorId].anchor; +} + void Ipc::StoreMap::abortWriting(const sfileno fileno) { - debugs(54, 5, HERE << " abort writing slot at " << fileno << - " in map [" << path << ']'); - assert(valid(fileno)); - Slot &s = shared->slots[fileno]; - assert(s.state == Slot::Writeable); - freeLocked(s, false); -} - -void -Ipc::StoreMap::abortIo(const sfileno fileno) -{ - debugs(54, 5, HERE << " abort I/O for slot at " << fileno << - " in map [" << path << ']'); - assert(valid(fileno)); - Slot &s = shared->slots[fileno]; - - // The caller is a lock holder. Thus, if we are Writeable, then the - // caller must be the writer; otherwise the caller must be the reader. - if (s.state == Slot::Writeable) - abortWriting(fileno); - else - closeForReading(fileno); -} - -const Ipc::StoreMap::Slot * + debugs(54, 5, "aborting entry " << fileno << " for writing " << path); + assert(valid(fileno)); + Anchor &s = shared->slots[fileno].anchor; + assert(s.writing()); + s.lock.appending = false; // locks out any new readers + if (!s.lock.readers) { + freeChain(fileno, s, false); + debugs(54, 5, "closed clean entry " << fileno << " for writing " << path); + } else { + s.waitingToBeFreed = true; + s.lock.unlockExclusive(); + debugs(54, 5, "closed dirty entry " << fileno << " for writing " << path); + } +} + +const Ipc::StoreMap::Anchor * Ipc::StoreMap::peekAtReader(const sfileno fileno) const { assert(valid(fileno)); - const Slot &s = shared->slots[fileno]; - switch (s.state) { - case Slot::Readable: + const Anchor &s = shared->slots[fileno].anchor; + if (s.reading()) return &s; // immediate access by lock holder so no locking - case Slot::Writeable: - return NULL; // cannot read the slot when it is being written - case Slot::Empty: - assert(false); // must be locked for reading or writing - } - assert(false); // not reachable + if (s.writing()) + return NULL; // the caller is not a read lock holder + assert(false); // must be locked for reading or writing return NULL; } +const Ipc::StoreMap::Anchor & +Ipc::StoreMap::peekAtEntry(const sfileno fileno) const +{ + assert(valid(fileno)); + return shared->slots[fileno].anchor; +} + void -Ipc::StoreMap::free(const sfileno fileno) +Ipc::StoreMap::freeEntry(const sfileno fileno) { - debugs(54, 5, HERE << " marking slot at " << fileno << " to be freed in" - " map [" << path << ']'); + debugs(54, 5, "marking entry " << fileno << " to be freed in " << path); assert(valid(fileno)); - Slot &s = shared->slots[fileno]; + Anchor &s = shared->slots[fileno].anchor; if (s.lock.lockExclusive()) - freeLocked(s, false); + freeChain(fileno, s, false); else s.waitingToBeFreed = true; // mark to free it later } -const Ipc::StoreMap::Slot * +void +Ipc::StoreMap::freeEntryByKey(const cache_key *const key) +{ + debugs(54, 5, "marking entry with key " << storeKeyText(key) + << " to be freed in " << path); + + const int idx = anchorIndexByKey(key); + Anchor &s = shared->slots[idx].anchor; + if (s.lock.lockExclusive()) { + if (s.sameKey(key)) + freeChain(idx, s, true); + s.lock.unlockExclusive(); + } else if (s.lock.lockShared()) { + if (s.sameKey(key)) + s.waitingToBeFreed = true; // mark to free it later + s.lock.unlockShared(); + } else { + // we cannot be sure that the entry we found is ours because we do not + // have a lock on it, but we still check to minimize false deletions + if (s.sameKey(key)) + s.waitingToBeFreed = true; // mark to free it later + } +} + +/// unconditionally frees an already locked chain of slots, unlocking if needed +void +Ipc::StoreMap::freeChain(const sfileno fileno, Anchor &inode, const bool keepLocked) +{ + debugs(54, 7, "freeing entry " << fileno << + " in " << path); + if (!inode.empty()) { + sfileno sliceId = inode.start; + debugs(54, 8, "first slice " << sliceId); + while (sliceId >= 0) { + Slice &slice = shared->slots[sliceId].slice; + const sfileno nextId = slice.next; + slice.size = 0; + slice.next = -1; + if (cleaner) + cleaner->noteFreeMapSlice(sliceId); // might change slice state + sliceId = nextId; + } + } + + inode.waitingToBeFreed = false; + inode.rewind(); + + if (!keepLocked) + inode.lock.unlockExclusive(); + --shared->count; + debugs(54, 5, "freed entry " << fileno << " in " << path); +} + +const Ipc::StoreMap::Anchor * Ipc::StoreMap::openForReading(const cache_key *const key, sfileno &fileno) { - debugs(54, 5, HERE << " trying to open slot for key " << storeKeyText(key) - << " for reading in map [" << path << ']'); - const int idx = slotIndexByKey(key); - if (const Slot *slot = openForReadingAt(idx)) { + debugs(54, 5, "opening entry with key " << storeKeyText(key) + << " for reading " << path); + const int idx = anchorIndexByKey(key); + if (const Anchor *slot = openForReadingAt(idx)) { if (slot->sameKey(key)) { fileno = idx; - debugs(54, 5, HERE << " opened slot at " << fileno << " for key " - << storeKeyText(key) << " for reading in map [" << path << - ']'); return slot; // locked for reading } slot->lock.unlockShared(); + debugs(54, 7, "closed entry " << idx << " for reading " << path); } - debugs(54, 5, HERE << " failed to open slot for key " << storeKeyText(key) - << " for reading in map [" << path << ']'); return NULL; } -const Ipc::StoreMap::Slot * +const Ipc::StoreMap::Anchor * Ipc::StoreMap::openForReadingAt(const sfileno fileno) { - debugs(54, 5, HERE << " trying to open slot at " << fileno << " for " - "reading in map [" << path << ']'); + debugs(54, 5, "opening entry " << fileno << " for reading " << path); assert(valid(fileno)); - Slot &s = shared->slots[fileno]; + Anchor &s = shared->slots[fileno].anchor; if (!s.lock.lockShared()) { - debugs(54, 5, HERE << " failed to lock slot at " << fileno << " for " - "reading in map [" << path << ']'); + debugs(54, 5, "cannot open busy entry " << fileno << + " for reading " << path); return NULL; } - if (s.state == Slot::Empty) { + if (s.empty()) { s.lock.unlockShared(); - debugs(54, 7, HERE << " empty slot at " << fileno << " for " - "reading in map [" << path << ']'); + debugs(54, 7, "cannot open empty entry " << fileno << + " for reading " << path); return NULL; } if (s.waitingToBeFreed) { s.lock.unlockShared(); - debugs(54, 7, HERE << " dirty slot at " << fileno << " for " - "reading in map [" << path << ']'); + debugs(54, 7, "cannot open marked entry " << fileno << + " for reading " << path); return NULL; } - // cannot be Writing here if we got shared lock and checked Empty above - assert(s.state == Slot::Readable); - debugs(54, 5, HERE << " opened slot at " << fileno << " for reading in" - " map [" << path << ']'); + debugs(54, 5, "opened entry " << fileno << " for reading " << path); return &s; } void Ipc::StoreMap::closeForReading(const sfileno fileno) { - debugs(54, 5, HERE << " closing slot at " << fileno << " for reading in " - "map [" << path << ']'); assert(valid(fileno)); - Slot &s = shared->slots[fileno]; - assert(s.state == Slot::Readable); + Anchor &s = shared->slots[fileno].anchor; + assert(s.reading()); s.lock.unlockShared(); + debugs(54, 5, "closed entry " << fileno << " for reading " << path); +} + +bool +Ipc::StoreMap::purgeOne() +{ + // Hopefully, we find a removable entry much sooner (TODO: use time?). + // The min() will protect us from division by zero inside the loop. + const int searchLimit = min(10000, entryLimit()); + int tries = 0; + for (; tries < searchLimit; ++tries) { + const sfileno fileno = static_cast(++shared->victim % shared->limit); + assert(valid(fileno)); + Anchor &s = shared->slots[fileno].anchor; + if (s.lock.lockExclusive()) { + // the caller wants a free slice; empty anchor is not enough + if (!s.empty() && s.start >= 0) { + // this entry may be marked for deletion, and that is OK + freeChain(fileno, s, false); + debugs(54, 5, "purged entry " << fileno << " from " << path); + return true; + } + s.lock.unlockExclusive(); + } + } + debugs(54, 5, "no entries to purge from " << path << "; tried: " << tries); + return false; +} + +void +Ipc::StoreMap::importSlice(const SliceId sliceId, const Slice &slice) +{ + // Slices are imported into positions that should not be available via + // "get free slice" API. This is not something we can double check + // reliably because the anchor for the imported slice may not have been + // imported yet. + assert(valid(sliceId)); + shared->slots[sliceId].slice = slice; } int @@ -217,17 +393,11 @@ return shared->count; } -bool -Ipc::StoreMap::full() const -{ - return entryCount() >= entryLimit(); -} - void Ipc::StoreMap::updateStats(ReadWriteLockStats &stats) const { for (int i = 0; i < shared->limit; ++i) - shared->slots[i].lock.updateStats(stats); + shared->slots[i].anchor.lock.updateStats(stats); } bool @@ -236,62 +406,47 @@ return 0 <= pos && pos < entryLimit(); } -int -Ipc::StoreMap::slotIndexByKey(const cache_key *const key) const +sfileno +Ipc::StoreMap::anchorIndexByKey(const cache_key *const key) const { const uint64_t *const k = reinterpret_cast(key); // TODO: use a better hash function return (k[0] + k[1]) % shared->limit; } -Ipc::StoreMap::Slot & -Ipc::StoreMap::slotByKey(const cache_key *const key) -{ - return shared->slots[slotIndexByKey(key)]; -} - -/// unconditionally frees the already exclusively locked slot and releases lock -void -Ipc::StoreMap::freeLocked(Slot &s, bool keepLocked) -{ - if (s.state == Slot::Readable && cleaner) - cleaner->cleanReadable(&s - shared->slots.raw()); - - s.waitingToBeFreed = false; - s.state = Slot::Empty; - if (!keepLocked) - s.lock.unlockExclusive(); - --shared->count; - debugs(54, 5, HERE << " freed slot at " << (&s - shared->slots.raw()) << - " in map [" << path << ']'); -} - -/* Ipc::StoreMapSlot */ - -Ipc::StoreMapSlot::StoreMapSlot(): state(Empty) +Ipc::StoreMap::Anchor & +Ipc::StoreMap::anchorByKey(const cache_key *const key) +{ + return shared->slots[anchorIndexByKey(key)].anchor; +} + +/* Ipc::StoreMapAnchor */ + +Ipc::StoreMapAnchor::StoreMapAnchor(): start(0) { memset(&key, 0, sizeof(key)); memset(&basics, 0, sizeof(basics)); + // keep in sync with rewind() } void -Ipc::StoreMapSlot::setKey(const cache_key *const aKey) +Ipc::StoreMapAnchor::setKey(const cache_key *const aKey) { memcpy(key, aKey, sizeof(key)); } bool -Ipc::StoreMapSlot::sameKey(const cache_key *const aKey) const +Ipc::StoreMapAnchor::sameKey(const cache_key *const aKey) const { const uint64_t *const k = reinterpret_cast(aKey); return k[0] == key[0] && k[1] == key[1]; } void -Ipc::StoreMapSlot::set(const StoreEntry &from) +Ipc::StoreMapAnchor::set(const StoreEntry &from) { + assert(writing() && !reading()); memcpy(key, from.key, sizeof(key)); - // XXX: header = aHeader; basics.timestamp = from.timestamp; basics.lastref = from.lastref; basics.expires = from.expires; @@ -301,10 +456,21 @@ basics.flags = from.flags; } +void +Ipc::StoreMapAnchor::rewind() +{ + assert(writing()); + start = 0; + memset(&key, 0, sizeof(key)); + memset(&basics, 0, sizeof(basics)); + // but keep the lock +} + /* Ipc::StoreMap::Shared */ Ipc::StoreMap::Shared::Shared(const int aLimit, const size_t anExtrasSize): - limit(aLimit), extrasSize(anExtrasSize), count(0), slots(aLimit) + limit(aLimit), extrasSize(anExtrasSize), count(0), victim(0), + slots(aLimit) { } @@ -317,6 +483,6 @@ size_t Ipc::StoreMap::Shared::SharedMemorySize(const int limit, const size_t extrasSize) { - return sizeof(Shared) + limit * (sizeof(Slot) + extrasSize); + return sizeof(Shared) + limit * (sizeof(StoreMapSlot) + extrasSize); } === modified file 'src/ipc/StoreMap.h' --- src/ipc/StoreMap.h 2013-10-25 00:13:46 +0000 +++ src/ipc/StoreMap.h 2013-12-31 18:49:41 +0000 @@ -9,22 +9,52 @@ namespace Ipc { -/// a StoreMap element, holding basic shareable StoreEntry info -class StoreMapSlot -{ -public: - StoreMapSlot(); - - /// store StoreEntry key and basics +typedef int32_t StoreMapSliceId; + +/// a piece of Store entry, linked to other pieces, forming a chain +/// slices may be appended by writers while readers read the entry +class StoreMapSlice +{ +public: + typedef uint32_t Size; + + StoreMapSlice(): size(0), next(-1) {} + + Atomic::WordT size; ///< slice contents size + Atomic::WordT next; ///< ID of the next entry slice +}; + +/// Maintains shareable information about a StoreEntry as a whole. +/// An anchor points to one or more StoreEntry slices. This is the +/// only lockable part of shared StoreEntry information, providing +/// protection for all StoreEntry slices. +class StoreMapAnchor +{ +public: + StoreMapAnchor(); + + /// store StoreEntry key and basics for an inode slot void set(const StoreEntry &anEntry); void setKey(const cache_key *const aKey); bool sameKey(const cache_key *const aKey) const; + /// undo the effects of set(), setKey(), etc., but keep locks and state + void rewind(); + + /* entry state may change immediately after calling these methods unless + * the caller holds an appropriate lock */ + bool empty() const { return !key[0] && !key[1]; } + bool reading() const { return lock.readers; } + bool writing() const { return lock.writing; } + bool complete() const { return !empty() && !writing(); } + public: mutable ReadWriteLock lock; ///< protects slot data below Atomic::WordT waitingToBeFreed; ///< may be accessed w/o a lock + // fields marked with [app] can be modified when appending-while-reading + uint64_t key[2]; ///< StoreEntry key // STORE_META_STD TLV field from StoreEntry @@ -33,11 +63,15 @@ time_t lastref; time_t expires; time_t lastmod; - uint64_t swap_file_sz; + Atomic::WordT swap_file_sz; // [app] uint16_t refcount; uint16_t flags; } basics; + /// where the chain of StoreEntry slices begins [app] + Atomic::WordT start; + +#if 0 /// possible persistent states typedef enum { Empty, ///< ready for writing, with nothing of value @@ -45,16 +79,30 @@ Readable, ///< ready for reading } State; State state; ///< current state +#endif +}; + +/// A hack to allocate one shared array for both anchors and slices. +/// Anchors are indexed by store entry ID and are independent from each other. +/// Slices are indexed by slice IDs and form entry chains using slice.next. +class StoreMapSlot +{ +public: + StoreMapAnchor anchor; ///< information about store entry as a whole + StoreMapSlice slice; ///< information about one stored entry piece }; class StoreMapCleaner; -/// map of StoreMapSlots indexed by their keys, with read/write slot locking +/// map of StoreMapSlots indexed by their keys, with read/write slice locking /// kids extend to store custom data class StoreMap { public: - typedef StoreMapSlot Slot; + typedef StoreMapAnchor Anchor; + typedef sfileno AnchorId; + typedef StoreMapSlice Slice; + typedef StoreMapSliceId SliceId; /// data shared across maps in different processes class Shared @@ -64,10 +112,11 @@ size_t sharedMemorySize() const; static size_t SharedMemorySize(const int limit, const size_t anExtrasSize); - const int limit; ///< maximum number of map slots - const size_t extrasSize; ///< size of slot extra data - Atomic::Word count; ///< current number of map slots - Ipc::Mem::FlexibleArray slots; ///< slots storage + const int limit; ///< maximum number of store entries + const size_t extrasSize; ///< size of slice extra data + Atomic::Word count; ///< current number of entries + Atomic::WordT victim; ///< starting point for purge search + Ipc::Mem::FlexibleArray slots; ///< storage }; public: @@ -78,31 +127,68 @@ StoreMap(const char *const aPath); - /// finds, reservers space for writing a new entry or returns nil - Slot *openForWriting(const cache_key *const key, sfileno &fileno); - /// successfully finish writing the entry + /// computes map entry position for a given entry key + sfileno anchorIndexByKey(const cache_key *const key) const; + + /// Like strcmp(mapped, new), but for store entry versions/timestamps. + /// Returns +2 if the mapped entry does not exist; -1/0/+1 otherwise. + /// Comparison may be inaccurate unless the caller is a lock holder. + int compareVersions(const sfileno oldFileno, time_t newVersion) const; + + /// finds, locks, and returns an anchor for an empty key position, + /// erasing the old entry (if any) + Anchor *openForWriting(const cache_key *const key, sfileno &fileno); + /// locks and returns an anchor for the empty fileno position; if + /// overwriteExisting is false and the position is not empty, returns nil + Anchor *openForWritingAt(sfileno fileno, bool overwriteExisting = true); + /// restrict opened for writing entry to appending operations; allow reads + void startAppending(const sfileno fileno); + /// successfully finish creating or updating the entry at fileno pos void closeForWriting(const sfileno fileno, bool lockForReading = false); - - /// only works on locked entries; returns nil unless the slot is readable - const Slot *peekAtReader(const sfileno fileno) const; - - /// mark the slot as waiting to be freed and, if possible, free it - void free(const sfileno fileno); - - /// open slot for reading, increments read level - const Slot *openForReading(const cache_key *const key, sfileno &fileno); - /// open slot for reading, increments read level - const Slot *openForReadingAt(const sfileno fileno); - /// close slot after reading, decrements read level + /// unlock and "forget" openForWriting entry, making it Empty again + /// this call does not free entry slices so the caller has to do that + void forgetWritingEntry(const sfileno fileno); + + /// only works on locked entries; returns nil unless the slice is readable + const Anchor *peekAtReader(const sfileno fileno) const; + + /// only works on locked entries; returns the corresponding Anchor + const Anchor &peekAtEntry(const sfileno fileno) const; + + /// free the entry if possible or mark it as waiting to be freed if not + void freeEntry(const sfileno fileno); + /// free the entry if possible or mark it as waiting to be freed if not + /// does nothing if we cannot check that the key matches the cached entry + void freeEntryByKey(const cache_key *const key); + + /// opens entry (identified by key) for reading, increments read level + const Anchor *openForReading(const cache_key *const key, sfileno &fileno); + /// opens entry (identified by sfileno) for reading, increments read level + const Anchor *openForReadingAt(const sfileno fileno); + /// closes open entry after reading, decrements read level void closeForReading(const sfileno fileno); - /// called by lock holder to terminate either slot writing or reading - void abortIo(const sfileno fileno); - - bool full() const; ///< there are no empty slots left - bool valid(const int n) const; ///< whether n is a valid slot coordinate - int entryCount() const; ///< number of used slots - int entryLimit() const; ///< maximum number of slots that can be used + /// writeable slice within an entry chain created by openForWriting() + Slice &writeableSlice(const AnchorId anchorId, const SliceId sliceId); + /// readable slice within an entry chain opened by openForReading() + const Slice &readableSlice(const AnchorId anchorId, const SliceId sliceId) const; + /// writeable anchor for the entry created by openForWriting() + Anchor &writeableEntry(const AnchorId anchorId); + /// readable anchor for the entry created by openForReading() + const Anchor &readableEntry(const AnchorId anchorId) const; + + /// stop writing the entry, freeing its slot for others to use if possible + void abortWriting(const sfileno fileno); + + /// either finds and frees an entry with at least 1 slice or returns false + bool purgeOne(); + + /// copies slice to its designated position + void importSlice(const SliceId sliceId, const Slice &slice); + + bool valid(const int n) const; ///< whether n is a valid slice coordinate + int entryCount() const; ///< number of writeable and readable entries + int entryLimit() const; ///< maximum entryCount() possible /// adds approximate current stats to the supplied ones void updateStats(ReadWriteLockStats &stats) const; @@ -112,20 +198,18 @@ protected: static Owner *Init(const char *const path, const int limit, const size_t extrasSize); - const String path; ///< cache_dir path, used for logging + const String path; ///< cache_dir path or similar cache name; for logging Mem::Pointer shared; private: - int slotIndexByKey(const cache_key *const key) const; - Slot &slotByKey(const cache_key *const key); - - Slot *openForReading(Slot &s); - void abortWriting(const sfileno fileno); - void freeIfNeeded(Slot &s); - void freeLocked(Slot &s, bool keepLocked); + Anchor &anchorByKey(const cache_key *const key); + + Anchor *openForReading(Slice &s); + + void freeChain(const sfileno fileno, Anchor &inode, const bool keepLock); }; -/// StoreMap with extra slot data +/// StoreMap with extra slice data /// Note: ExtrasT must be POD, it is initialized with zeroes, no /// constructors or destructors are called template @@ -149,14 +233,14 @@ ExtrasT *sharedExtras; ///< pointer to extras in shared memory }; -/// API for adjusting external state when dirty map slot is being freed +/// API for adjusting external state when dirty map slice is being freed class StoreMapCleaner { public: virtual ~StoreMapCleaner() {} - /// adjust slot-linked state before a locked Readable slot is erased - virtual void cleanReadable(const sfileno fileno) = 0; + /// adjust slice-linked state before a locked Readable slice is erased + virtual void noteFreeMapSlice(const sfileno sliceId) = 0; }; // StoreMapWithExtras implementation === modified file 'src/ipc/Strand.cc' --- src/ipc/Strand.cc 2013-10-25 00:13:46 +0000 +++ src/ipc/Strand.cc 2013-12-06 23:52:26 +0000 @@ -7,6 +7,7 @@ #include "base/Subscription.h" #include "base/TextException.h" #include "CacheManager.h" +#include "CollapsedForwarding.h" #include "comm/Connection.h" #include "globals.h" #include "ipc/Kids.h" @@ -89,6 +90,10 @@ } break; + case mtCollapsedForwardingNotification: + CollapsedForwarding::HandleNotification(message); + break; + #if SQUID_SNMP case mtSnmpRequest: { const Snmp::Request req(message); === modified file 'src/ipc/mem/Page.h' --- src/ipc/mem/Page.h 2012-09-01 14:38:36 +0000 +++ src/ipc/mem/Page.h 2013-01-07 17:14:28 +0000 @@ -20,7 +20,12 @@ public: PageId(): pool(0), number(0), purpose(maxPurpose) {} - operator bool() const { return pool && number; } + /// true if and only if both critical components have been initialized + bool set() const { return pool && number; } + + // safer than bool which would enable silent casts to int + typedef const uint32_t PageId::*SaferBool; + operator SaferBool() const { return set() ? &PageId::number : NULL; } uint32_t pool; ///< page pool ID within Squid // uint32_t segment; ///< memory segment ID within the pool; unused for now === modified file 'src/ipc/mem/PageStack.cc' --- src/ipc/mem/PageStack.cc 2012-10-16 00:26:06 +0000 +++ src/ipc/mem/PageStack.cc 2013-01-07 17:16:17 +0000 @@ -6,6 +6,7 @@ #include "squid.h" #include "base/TextException.h" +#include "Debug.h" #include "ipc/mem/Page.h" #include "ipc/mem/PageStack.h" @@ -56,6 +57,7 @@ theFirstWritable = idx; // may lie page.pool = thePoolId; page.number = value; + debugs(54, 9, page << " at " << idx << " size: " << theSize); return true; } // TODO: report suspiciously long loops @@ -68,6 +70,8 @@ void Ipc::Mem::PageStack::push(PageId &page) { + debugs(54, 9, page); + if (!page) return; @@ -87,6 +91,7 @@ // the enqueued value may already by gone, but that is OK theLastReadable = idx; // may lie ++theSize; + debugs(54, 9, page << " at " << idx << " size: " << theSize); page = PageId(); return; } === modified file 'src/ipc/mem/Pointer.h' --- src/ipc/mem/Pointer.h 2012-10-29 04:59:58 +0000 +++ src/ipc/mem/Pointer.h 2013-01-20 18:54:42 +0000 @@ -32,6 +32,9 @@ ~Owner(); + /// Raw access; handy to finalize initiatization, but avoid if possible. + Class *object() { return theObject; } + private: Owner(const char *const id, const off_t sharedSize); === modified file 'src/mgr/Forwarder.cc' --- src/mgr/Forwarder.cc 2013-10-25 00:13:46 +0000 +++ src/mgr/Forwarder.cc 2013-12-06 23:52:26 +0000 @@ -31,7 +31,7 @@ Must(entry != NULL); HTTPMSGLOCK(httpRequest); - entry->lock(); + entry->lock("Mgr::Forwarder"); EBIT_SET(entry->flags, ENTRY_FWD_HDR_WAIT); closer = asyncCall(16, 5, "Mgr::Forwarder::noteCommClosed", @@ -47,7 +47,7 @@ HTTPMSGUNLOCK(httpRequest); entry->unregisterAbort(); - entry->unlock(); + entry->unlock("Mgr::Forwarder"); cleanup(); } === modified file 'src/mgr/StoreToCommWriter.cc' --- src/mgr/StoreToCommWriter.cc 2013-10-25 00:13:46 +0000 +++ src/mgr/StoreToCommWriter.cc 2013-12-19 04:53:35 +0000 @@ -138,7 +138,7 @@ sc = NULL; } entry->unregisterAbort(); - entry->unlock(); + entry->unlock("Mgr::StoreToCommWriter::swanSong"); entry = NULL; } close(); === modified file 'src/mime.cc' --- src/mime.cc 2013-03-23 11:47:20 +0000 +++ src/mime.cc 2013-12-19 04:53:35 +0000 @@ -440,7 +440,7 @@ e->flush(); e->complete(); e->timestampsSet(); - e->unlock(); + e->unlock("MimeIcon::created"); memFree(buf, MEM_4K_BUF); debugs(25, 3, "Loaded icon " << url_); } === modified file 'src/neighbors.cc' --- src/neighbors.cc 2013-11-18 11:40:56 +0000 +++ src/neighbors.cc 2013-12-19 04:53:35 +0000 @@ -1025,7 +1025,7 @@ return; } - if (entry->lock_count == 0) { + if (!entry->locked()) { // TODO: many entries are unlocked; why is this reported at level 1? debugs(12, DBG_IMPORTANT, "neighborsUdpAck: '" << storeKeyText(key) << "' has no locks"); neighborCountIgnored(p); @@ -1426,7 +1426,7 @@ fake->abort(); // sets ENTRY_ABORTED and initiates releated cleanup HTTPMSGUNLOCK(fake->mem_obj->request); - fake->unlock(); + fake->unlock("peerCountMcastPeersDone"); HTTPMSGUNLOCK(psstate->request); cbdataFree(psstate); } @@ -1732,7 +1732,7 @@ return; } - if (e->lock_count == 0) { + if (!e->locked()) { // TODO: many entries are unlocked; why is this reported at level 1? debugs(12, DBG_IMPORTANT, "neighborsUdpAck: '" << storeKeyText(key) << "' has no locks"); neighborCountIgnored(p); === modified file 'src/peer_digest.cc' --- src/peer_digest.cc 2013-10-25 00:13:46 +0000 +++ src/peer_digest.cc 2013-12-19 04:53:35 +0000 @@ -371,8 +371,8 @@ if (old_e) { debugs(72, 5, "peerDigestRequest: found old entry"); - old_e->lock(); - old_e->createMemObject(url, url); + old_e->lock("peerDigestRequest"); + old_e->createMemObject(url, url, req->method); fetch->old_sc = storeClientListAdd(old_e, fetch); } @@ -561,7 +561,7 @@ /* get rid of 304 reply */ storeUnregister(fetch->sc, fetch->entry, fetch); - fetch->entry->unlock(); + fetch->entry->unlock("peerDigestFetchReply 304"); fetch->entry = fetch->old_entry; @@ -577,7 +577,7 @@ debugs(72, 3, "peerDigestFetchReply: got new digest, releasing old one"); storeUnregister(fetch->old_sc, fetch->old_entry, fetch); fetch->old_entry->releaseRequest(); - fetch->old_entry->unlock(); + fetch->old_entry->unlock("peerDigestFetchReply 200"); fetch->old_entry = NULL; } } else { @@ -910,7 +910,7 @@ debugs(72, 3, "peerDigestFetchFinish: deleting old entry"); storeUnregister(fetch->old_sc, fetch->old_entry, fetch); fetch->old_entry->releaseRequest(); - fetch->old_entry->unlock(); + fetch->old_entry->unlock("peerDigestFetchFinish old"); fetch->old_entry = NULL; } @@ -926,7 +926,7 @@ /* unlock everything */ storeUnregister(fetch->sc, fetch->entry, fetch); - fetch->entry->unlock(); + fetch->entry->unlock("peerDigestFetchFinish new"); HTTPMSGUNLOCK(fetch->request); === modified file 'src/peer_select.cc' --- src/peer_select.cc 2013-12-06 14:59:47 +0000 +++ src/peer_select.cc 2013-12-06 23:52:26 +0000 @@ -108,7 +108,7 @@ if (entry) { assert(entry->ping_status != PING_WAITING); - entry->unlock(); + entry->unlock("peerSelect"); entry = NULL; } @@ -173,7 +173,7 @@ #endif if (psstate->entry) - psstate->entry->lock(); + psstate->entry->lock("peerSelect"); peerSelectFoo(psstate); } === modified file 'src/refresh.cc' --- src/refresh.cc 2013-10-25 00:13:46 +0000 +++ src/refresh.cc 2013-12-06 23:52:26 +0000 @@ -240,7 +240,7 @@ stale_flags sf; if (entry->mem_obj) - uri = entry->mem_obj->url; + uri = entry->mem_obj->storeId(); else if (request) uri = urlCanonical(request); === modified file 'src/repl/heap/store_heap_replacement.cc' --- src/repl/heap/store_heap_replacement.cc 2013-10-25 00:13:46 +0000 +++ src/repl/heap/store_heap_replacement.cc 2013-12-19 23:13:24 +0000 @@ -90,8 +90,8 @@ " refcnt=" << e->refcount << " lastref=" << e->lastref << " heap_age=" << heap_age << " tie=" << tie << " -> " << key); - if (e->mem_obj && e->mem_obj->url) - debugs(81, 3, "HeapKeyGen_StoreEntry_LFUDA: url=" << e->mem_obj->url); + if (e->mem_obj) + debugs(81, 3, "storeId=" << e->mem_obj->storeId()); return (double) key; } @@ -128,8 +128,8 @@ e->lastref << " heap_age=" << heap_age << " tie=" << tie << " -> " << key); - if (e->mem_obj && e->mem_obj->url) - debugs(81, 3, "HeapKeyGen_StoreEntry_GDSF: url=" << e->mem_obj->url); + if (e->mem_obj) + debugs(81, 3, "storeId=" << e->mem_obj->storeId()); return key; } @@ -149,8 +149,8 @@ e->getMD5Text() << " heap_age=" << heap_age << " lastref=" << (double) e->lastref ); - if (e->mem_obj && e->mem_obj->url) - debugs(81, 3, "HeapKeyGen_StoreEntry_LRU: url=" << e->mem_obj->url); + if (e->mem_obj) + debugs(81, 3, "storeId=" << e->mem_obj->storeId()); return (heap_key) e->lastref; } === modified file 'src/repl/heap/store_repl_heap.cc' --- src/repl/heap/store_repl_heap.cc 2013-10-25 00:13:46 +0000 +++ src/repl/heap/store_repl_heap.cc 2013-12-19 04:53:35 +0000 @@ -230,7 +230,7 @@ if (entry->locked()) { - entry->lock(); + entry->lock("heap_purgeNext"); linklistPush(&heap_walker->locked_entries, entry); goto try_again; @@ -263,7 +263,7 @@ while ((entry = (StoreEntry *)linklistShift(&heap_walker->locked_entries))) { heap_node *node = heap_insert(h->theHeap, entry); h->setPolicyNode(entry, node); - entry->unlock(); + entry->unlock("heap_purgeDone"); } safe_free(walker->_data); === modified file 'src/ssl/ServerBump.cc' --- src/ssl/ServerBump.cc 2013-06-07 04:35:25 +0000 +++ src/ssl/ServerBump.cc 2013-08-15 22:09:07 +0000 @@ -22,7 +22,7 @@ const char *uri = urlCanonical(request.getRaw()); if (e) { entry = e; - entry->lock(); + entry->lock("Ssl::ServerBump"); } else entry = storeCreateEntry(uri, uri, request->flags, request->method); // We do not need to be a client because the error contents will be used @@ -36,7 +36,7 @@ if (entry) { debugs(33, 4, HERE << *entry); storeUnregister(sc, entry, this); - entry->unlock(); + entry->unlock("Ssl::ServerBump"); } cbdataReferenceDone(sslErrors); } === modified file 'src/stat.cc' --- src/stat.cc 2013-12-05 11:04:45 +0000 +++ src/stat.cc 2013-12-06 23:52:26 +0000 @@ -320,9 +320,6 @@ if (EBIT_TEST(flags, REFRESH_REQUEST)) strcat(buf, "REFRESH_REQUEST,"); - if (EBIT_TEST(flags, ENTRY_CACHABLE)) - strcat(buf, "CACHABLE,"); - if (EBIT_TEST(flags, ENTRY_DISPATCHED)) strcat(buf, "DISPATCHED,"); @@ -371,7 +368,7 @@ mb->Printf("\t%s\n", storeEntryFlags(e)); mb->Printf("\t%s\n", describeTimestamps(e)); mb->Printf("\t%d locks, %d clients, %d refs\n", - (int) e->lock_count, + (int) e->locks(), storePendingNClients(e), (int) e->refcount); mb->Printf("\tSwap Dir %d, File %#08X\n", @@ -394,11 +391,11 @@ if (UsingSmp()) storeAppendPrintf(state->sentry, "} by kid%d\n\n", KidIdentifier); state->sentry->complete(); - state->sentry->unlock(); + state->sentry->unlock("statObjects+isDone"); cbdataFree(state); return; } else if (EBIT_TEST(state->sentry->flags, ENTRY_ABORTED)) { - state->sentry->unlock(); + state->sentry->unlock("statObjects+aborted"); cbdataFree(state); return; } else if (state->sentry->checkDeferRead(-1)) { @@ -436,7 +433,7 @@ state->sentry = sentry; state->filter = filter; - sentry->lock(); + sentry->lock("statObjects"); state->theSearch = Store::Root().search(NULL, NULL); eventAdd("statObjects", statObjects, state, 0.0, 1); === modified file 'src/stmem.cc' --- src/stmem.cc 2013-03-11 00:30:26 +0000 +++ src/stmem.cc 2013-05-22 21:25:39 +0000 @@ -313,7 +313,7 @@ return true; } - return false; + return !range.size(); // empty range is contigous } bool === modified file 'src/store.cc' --- src/store.cc 2013-12-11 22:44:59 +0000 +++ src/store.cc 2014-01-01 19:20:49 +0000 @@ -74,8 +74,6 @@ #include #endif -static STMCB storeWriteComplete; - #define REBUILD_TIMESTAMP_DELTA_MAX 2 #define STORE_IN_MEM_BUCKETS (229) @@ -193,7 +191,7 @@ { /* This object can be cached for a long time */ - if (EBIT_TEST(flags, ENTRY_CACHABLE)) + if (!EBIT_TEST(flags, RELEASE_REQUEST)) setPublicKey(); } @@ -203,7 +201,6 @@ /* This object should never be cached at all */ expireNow(); releaseRequest(); /* delete object when not used */ - /* releaseRequest clears ENTRY_CACHABLE flag */ } void @@ -211,9 +208,7 @@ { /* This object may be negatively cached */ negativeCache(); - - if (EBIT_TEST(flags, ENTRY_CACHABLE)) - setPublicKey(); + makePublic(); } size_t @@ -314,6 +309,11 @@ mem_obj->setNoDelay(newValue); } +// XXX: Type names mislead. STORE_DISK_CLIENT actually means that we should +// open swapin file, aggressively trim memory, and ignore read-ahead gap. +// It does not mean we will read from disk exclusively (or at all!). +// XXX: May create STORE_DISK_CLIENT with no disk caching configured. +// XXX: Collapsed clients cannot predict their type. store_client_t StoreEntry::storeClientType() const { @@ -323,11 +323,6 @@ * offset 0 in the memory object is the HTTP headers. */ - if (mem_status == IN_MEMORY && Config.memShared && IamWorkerProcess()) { - // clients of an object cached in shared memory are memory clients - return STORE_MEM_CLIENT; - } - assert(mem_obj); if (mem_obj->inmem_lo) @@ -346,7 +341,7 @@ if (swap_status == SWAPOUT_DONE) { debugs(20,7, HERE << mem_obj << " lo: " << mem_obj->inmem_lo << " hi: " << mem_obj->endOffset() << " size: " << mem_obj->object_sz); if (mem_obj->endOffset() == mem_obj->object_sz) { - /* hot object fully swapped in */ + /* hot object fully swapped in (XXX: or swapped out?) */ return STORE_MEM_CLIENT; } } else { @@ -382,54 +377,27 @@ StoreEntry::StoreEntry() : mem_obj(NULL), - hidden_mem_obj(NULL), - timestamp(-1), - lastref(-1), - expires(-1), - lastmod(-1), - swap_file_sz(0), - refcount(0), - flags(0), - swap_filen(-1), - swap_dirn(-1), - lock_count(0), - mem_status(NOT_IN_MEMORY), - ping_status(PING_NONE), - store_status(STORE_PENDING), - swap_status(SWAPOUT_NONE) -{ - debugs(20, 3, HERE << "new StoreEntry " << this); -} - -StoreEntry::StoreEntry(const char *aUrl, const char *aLogUrl) : - mem_obj(NULL), - hidden_mem_obj(NULL), - timestamp(-1), - lastref(-1), - expires(-1), - lastmod(-1), - swap_file_sz(0), - refcount(0), - flags(0), - swap_filen(-1), - swap_dirn(-1), - lock_count(0), - mem_status(NOT_IN_MEMORY), - ping_status(PING_NONE), - store_status(STORE_PENDING), - swap_status(SWAPOUT_NONE) -{ - debugs(20, 3, HERE << "new StoreEntry " << this); - mem_obj = new MemObject(aUrl, aLogUrl); + timestamp(-1), + lastref(-1), + expires(-1), + lastmod(-1), + swap_file_sz(0), + refcount(0), + flags(0), + swap_filen(-1), + swap_dirn(-1), + mem_status(NOT_IN_MEMORY), + ping_status(PING_NONE), + store_status(STORE_PENDING), + swap_status(SWAPOUT_NONE), + lock_count(0) +{ + debugs(20, 5, "StoreEntry constructed, this=" << this); } StoreEntry::~StoreEntry() { - if (swap_filen >= 0) { - SwapDir &sd = dynamic_cast(*store()); - sd.disconnect(*this); - } - delete hidden_mem_obj; + debugs(20, 5, "StoreEntry destructed, this=" << this); } #if USE_ADAPTATION @@ -457,22 +425,18 @@ StoreEntry::destroyMemObject() { debugs(20, 3, HERE << "destroyMemObject " << mem_obj); - setMemStatus(NOT_IN_MEMORY); - MemObject *mem = mem_obj; - mem_obj = NULL; - delete mem; - delete hidden_mem_obj; - hidden_mem_obj = NULL; -} - -void -StoreEntry::hideMemObject() -{ - debugs(20, 3, HERE << "hiding " << mem_obj); - assert(mem_obj); - assert(!hidden_mem_obj); - hidden_mem_obj = mem_obj; - mem_obj = NULL; + + if (MemObject *mem = mem_obj) { + // Store::Root() is FATALly missing during shutdown + if (mem->xitTable.index >= 0 && !shutting_down) + Store::Root().transientsDisconnect(*mem); + if (mem->memCache.index >= 0 && !shutting_down) + Store::Root().memoryDisconnect(*this); + + setMemStatus(NOT_IN_MEMORY); + mem_obj = NULL; + delete mem; + } } void @@ -485,6 +449,12 @@ if (e == NullStoreEntry::getInstance()) return; + // Store::Root() is FATALly missing during shutdown + if (e->swap_filen >= 0 && !shutting_down) { + SwapDir &sd = dynamic_cast(*e->store()); + sd.disconnect(*e); + } + e->destroyMemObject(); e->hashDelete(); @@ -499,7 +469,7 @@ void StoreEntry::hashInsert(const cache_key * someKey) { - debugs(20, 3, "StoreEntry::hashInsert: Inserting Entry " << this << " key '" << storeKeyText(someKey) << "'"); + debugs(20, 3, "StoreEntry::hashInsert: Inserting Entry " << *this << " key '" << storeKeyText(someKey) << "'"); key = storeKeyDup(someKey); hash_join(store_table, this); } @@ -507,9 +477,11 @@ void StoreEntry::hashDelete() { - hash_remove_link(store_table, this); - storeKeyFree((const cache_key *)key); - key = NULL; + if (key) { // some test cases do not create keys and do not hashInsert() + hash_remove_link(store_table, this); + storeKeyFree((const cache_key *)key); + key = NULL; + } } /* -------------------------------------------------------------------------- */ @@ -523,26 +495,22 @@ debugs(20, 3, "StoreEntry::purgeMem: Freeing memory-copy of " << getMD5Text()); - destroyMemObject(); + Store::Root().memoryUnlink(*this); if (swap_status != SWAPOUT_DONE) release(); } -/* RBC 20050104 this is wrong- memory ref counting - * is not at all equivalent to the store 'usage' concept - * which the replacement policies should be acting upon. - * specifically, object iteration within stores needs - * memory ref counting to prevent race conditions, - * but this should not influence store replacement. - */ void - -StoreEntry::lock() +StoreEntry::lock(const char *context) { ++lock_count; - debugs(20, 3, "StoreEntry::lock: key '" << getMD5Text() <<"' count=" << - lock_count ); + debugs(20, 3, context << " locked key " << getMD5Text() << ' ' << *this); +} + +void +StoreEntry::touch() +{ lastref = squid_curtime; Store::Root().reference(*this); } @@ -556,6 +524,8 @@ debugs(20, 3, "StoreEntry::setReleaseFlag: '" << getMD5Text() << "'"); EBIT_SET(flags, RELEASE_REQUEST); + + Store::Root().markForUnlink(*this); } void @@ -564,25 +534,18 @@ if (EBIT_TEST(flags, RELEASE_REQUEST)) return; - setReleaseFlag(); - - /* - * Clear cachable flag here because we might get called before - * anyone else even looks at the cachability flag. Also, this - * prevents httpMakePublic from really setting a public key. - */ - EBIT_CLR(flags, ENTRY_CACHABLE); + setReleaseFlag(); // makes validToSend() false, preventing future hits setPrivateKey(); } -/* unlock object, return -1 if object get released after unlock - * otherwise lock_count */ int -StoreEntry::unlock() +StoreEntry::unlock(const char *context) { + debugs(20, 3, (context ? context : "somebody") << + " unlocking key " << getMD5Text() << ' ' << *this); + assert(lock_count > 0); --lock_count; - debugs(20, 3, "StoreEntry::unlock: key '" << getMD5Text() << "' count=" << lock_count); if (lock_count) return (int) lock_count; @@ -693,15 +656,18 @@ return; /* is already private */ if (key) { + setReleaseFlag(); // will markForUnlink(); all caches/workers will know + + // TODO: move into SwapDir::markForUnlink() already called by Root() if (swap_filen > -1) storeDirSwapLog(this, SWAP_LOG_DEL); hashDelete(); } - if (mem_obj != NULL) { + if (mem_obj && mem_obj->hasUris()) { mem_obj->id = getKeyCounter(); - newkey = storeKeyPrivate(mem_obj->url, mem_obj->method, mem_obj->id); + newkey = storeKeyPrivate(mem_obj->storeId(), mem_obj->method, mem_obj->id); } else { newkey = storeKeyPrivate("JUNK", Http::METHOD_NONE, getKeyCounter()); } @@ -714,7 +680,6 @@ void StoreEntry::setPublicKey() { - StoreEntry *e2 = NULL; const cache_key *newkey; if (key && !EBIT_TEST(flags, KEY_PRIVATE)) @@ -729,8 +694,7 @@ * store clients won't be able to access object data which has * been freed from memory. * - * If RELEASE_REQUEST is set, then ENTRY_CACHABLE should not - * be set, and StoreEntry::setPublicKey() should not be called. + * If RELEASE_REQUEST is set, setPublicKey() should not be called. */ #if MORE_DEBUG_OUTPUT @@ -753,9 +717,7 @@ * to record the new variance key */ safe_free(request->vary_headers); /* free old "bad" variance key */ - StoreEntry *pe = storeGetPublic(mem_obj->url, mem_obj->method); - - if (pe) + if (StoreEntry *pe = storeGetPublic(mem_obj->storeId(), mem_obj->method)) pe->release(); } @@ -770,10 +732,10 @@ // TODO: storeGetPublic() calls below may create unlocked entries. // We should add/use storeHas() API or lock/unlock those entries. - if (mem_obj->vary_headers && !storeGetPublic(mem_obj->url, mem_obj->method)) { + if (mem_obj->vary_headers && !storeGetPublic(mem_obj->storeId(), mem_obj->method)) { /* Create "vary" base object */ String vary; - StoreEntry *pe = storeCreateEntry(mem_obj->url, mem_obj->log_url, request->flags, request->method); + StoreEntry *pe = storeCreateEntry(mem_obj->storeId(), mem_obj->logUri(), request->flags, request->method); /* We are allowed to do this typecast */ HttpReply *rep = new HttpReply; rep->setHeaders(Http::scOkay, "Internal marker object", "x-squid-internal/vary", -1, -1, squid_curtime + 100000); @@ -805,22 +767,22 @@ pe->complete(); - pe->unlock(); + pe->unlock("StoreEntry::setPublicKey+Vary"); } newkey = storeKeyPublicByRequest(mem_obj->request); } else - newkey = storeKeyPublic(mem_obj->url, mem_obj->method); + newkey = storeKeyPublic(mem_obj->storeId(), mem_obj->method); - if ((e2 = (StoreEntry *) hash_lookup(store_table, newkey))) { - debugs(20, 3, "StoreEntry::setPublicKey: Making old '" << mem_obj->url << "' private."); + if (StoreEntry *e2 = (StoreEntry *)hash_lookup(store_table, newkey)) { + debugs(20, 3, "Making old " << *e2 << " private."); e2->setPrivateKey(); e2->release(); if (mem_obj->request) newkey = storeKeyPublicByRequest(mem_obj->request); else - newkey = storeKeyPublic(mem_obj->url, mem_obj->method); + newkey = storeKeyPublic(mem_obj->storeId(), mem_obj->method); } if (key) @@ -835,32 +797,22 @@ } StoreEntry * -storeCreateEntry(const char *url, const char *log_url, const RequestFlags &flags, const HttpRequestMethod& method) +storeCreatePureEntry(const char *url, const char *log_url, const RequestFlags &flags, const HttpRequestMethod& method) { StoreEntry *e = NULL; - MemObject *mem = NULL; debugs(20, 3, "storeCreateEntry: '" << url << "'"); - e = new StoreEntry(url, log_url); - e->lock_count = 1; /* Note lock here w/o calling storeLock() */ - mem = e->mem_obj; - mem->method = method; - - if (neighbors_do_private_keys || !flags.hierarchical) - e->setPrivateKey(); - else - e->setPublicKey(); + e = new StoreEntry(); + e->makeMemObject(); + e->mem_obj->setUris(url, log_url, method); if (flags.cachable) { - EBIT_SET(e->flags, ENTRY_CACHABLE); EBIT_CLR(e->flags, RELEASE_REQUEST); } else { - /* StoreEntry::releaseRequest() clears ENTRY_CACHABLE */ e->releaseRequest(); } e->store_status = STORE_PENDING; - e->setMemStatus(NOT_IN_MEMORY); e->refcount = 0; e->lastref = squid_curtime; e->timestamp = -1; /* set in StoreEntry::timestampsSet() */ @@ -869,6 +821,20 @@ return e; } +StoreEntry * +storeCreateEntry(const char *url, const char *logUrl, const RequestFlags &flags, const HttpRequestMethod& method) +{ + StoreEntry *e = storeCreatePureEntry(url, logUrl, flags, method); + e->lock("storeCreateEntry"); + + if (neighbors_do_private_keys || !flags.hierarchical) + e->setPrivateKey(); + else + e->setPublicKey(); + + return e; +} + /* Mark object as expired */ void StoreEntry::expireNow() @@ -878,21 +844,6 @@ } void -storeWriteComplete (void *data, StoreIOBuffer wroteBuffer) -{ - PROF_start(storeWriteComplete); - StoreEntry *e = (StoreEntry *)data; - - if (EBIT_TEST(e->flags, DELAY_SENDING)) { - PROF_stop(storeWriteComplete); - return; - } - - e->invokeHandlers(); - PROF_stop(storeWriteComplete); -} - -void StoreEntry::write (StoreIOBuffer writeBuffer) { assert(mem_obj != NULL); @@ -900,10 +851,17 @@ PROF_start(StoreEntry_write); assert(store_status == STORE_PENDING); + // XXX: caller uses content offset, but we also store headers + if (const HttpReply *reply = mem_obj->getReply()) + writeBuffer.offset += reply->hdr_sz; + debugs(20, 5, "storeWrite: writing " << writeBuffer.length << " bytes for '" << getMD5Text() << "'"); PROF_stop(StoreEntry_write); storeGetMemSpace(writeBuffer.length); - mem_obj->write (writeBuffer, storeWriteComplete, this); + mem_obj->write(writeBuffer); + + if (!EBIT_TEST(flags, DELAY_SENDING)) + invokeHandlers(); } /* Append incoming data from a primary server to an entry. */ @@ -1007,9 +965,9 @@ if (store_status == STORE_OK && EBIT_TEST(flags, ENTRY_BAD_LENGTH)) { debugs(20, 2, "StoreEntry::checkCachable: NO: wrong content-length"); ++store_check_cachable_hist.no.wrong_content_length; - } else if (!EBIT_TEST(flags, ENTRY_CACHABLE)) { + } else if (EBIT_TEST(flags, RELEASE_REQUEST)) { debugs(20, 2, "StoreEntry::checkCachable: NO: not cachable"); - ++store_check_cachable_hist.no.not_entry_cachable; + ++store_check_cachable_hist.no.not_entry_cachable; // TODO: rename? } else if (EBIT_TEST(flags, ENTRY_NEGCACHED)) { debugs(20, 3, "StoreEntry::checkCachable: NO: negative cached"); ++store_check_cachable_hist.no.negative_cached; @@ -1044,7 +1002,6 @@ } releaseRequest(); - /* StoreEntry::releaseRequest() cleared ENTRY_CACHABLE */ return 0; } @@ -1135,7 +1092,7 @@ assert(mem_obj != NULL); debugs(20, 6, "storeAbort: " << getMD5Text()); - lock(); /* lock while aborting */ + lock("StoreEntry::abort"); /* lock while aborting */ negativeCache(); releaseRequest(); @@ -1172,7 +1129,7 @@ // abort swap out, invalidating what was created so far (release follows) swapOutFileClose(StoreIOState::writerGone); - unlock(); /* unlock */ + unlock("StoreEntry::abort"); /* unlock */ } /** @@ -1275,7 +1232,7 @@ StoreEntry::release() { PROF_start(storeRelease); - debugs(20, 3, "storeRelease: Releasing: '" << getMD5Text() << "'"); + debugs(20, 3, "releasing " << *this << ' ' << getMD5Text()); /* If, for any reason we can't discard this object because of an * outstanding request, mark it for pending release */ @@ -1287,18 +1244,14 @@ return; } + Store::Root().memoryUnlink(*this); + if (StoreController::store_dirs_rebuilding && swap_filen > -1) { setPrivateKey(); - if (mem_obj) - destroyMemObject(); - if (swap_filen > -1) { - /* - * Fake a call to StoreEntry->lock() When rebuilding is done, - * we'll just call StoreEntry->unlock() on these. - */ - ++lock_count; + // lock the entry until rebuilding is done + lock("storeLateRelease"); setReleaseFlag(); LateReleaseStack.push_back(this); } else { @@ -1320,7 +1273,6 @@ unlink(); } - setMemStatus(NOT_IN_MEMORY); destroyStoreEntry(static_cast(this)); PROF_stop(storeRelease); } @@ -1346,7 +1298,7 @@ return; } - e->unlock(); + e->unlock("storeLateRelease"); ++n; } @@ -1360,14 +1312,9 @@ if (lock_count) return 1; - if (swap_status == SWAPOUT_WRITING) - return 1; - - if (store_status == STORE_PENDING) - return 1; - /* - * SPECIAL, PUBLIC entries should be "locked" + * SPECIAL, PUBLIC entries should be "locked"; + * XXX: Their owner should lock them then instead of relying on this hack. */ if (EBIT_TEST(flags, ENTRY_SPECIAL)) if (!EBIT_TEST(flags, KEY_PRIVATE)) @@ -1542,6 +1489,25 @@ if (EBIT_TEST(flags, ENTRY_ABORTED)) return 0; + // now check that the entry has a cache backing or is collapsed + if (swap_filen > -1) // backed by a disk cache + return 1; + + if (swappingOut()) // will be backed by a disk cache + return 1; + + if (!mem_obj) // not backed by a memory cache and not collapsed + return 0; + + if (mem_obj->memCache.index >= 0) // backed by a shared memory cache + return 0; + + // StoreEntry::storeClientType() assumes DISK_CLIENT here, but there is no + // disk cache backing so we should not rely on the store cache at all. This + // is wrong for range requests that could feed off nibbled memory (XXX). + if (mem_obj->inmem_lo) // in local memory cache, but got nibbled at + return 0; + return 1; } @@ -1645,8 +1611,6 @@ // are we using a shared memory cache? if (Config.memShared && IamWorkerProcess()) { - // enumerate calling cases if shared memory is enabled - assert(new_status != IN_MEMORY || EBIT_TEST(flags, ENTRY_SPECIAL)); // This method was designed to update replacement policy, not to // actually purge something from the memory cache (TODO: rename?). // Shared memory cache does not have a policy that needs updates. @@ -1660,19 +1624,19 @@ assert(mem_obj->inmem_lo == 0); if (EBIT_TEST(flags, ENTRY_SPECIAL)) { - debugs(20, 4, "StoreEntry::setMemStatus: not inserting special " << mem_obj->url << " into policy"); + debugs(20, 4, "not inserting special " << *this << " into policy"); } else { mem_policy->Add(mem_policy, this, &mem_obj->repl); - debugs(20, 4, "StoreEntry::setMemStatus: inserted mem node " << mem_obj->url << " key: " << getMD5Text()); + debugs(20, 4, "inserted " << *this << " key: " << getMD5Text()); } ++hot_obj_count; // TODO: maintain for the shared hot cache as well } else { if (EBIT_TEST(flags, ENTRY_SPECIAL)) { - debugs(20, 4, "StoreEntry::setMemStatus: special entry " << mem_obj->url); + debugs(20, 4, "not removing special " << *this << " from policy"); } else { mem_policy->Remove(mem_policy, this, &mem_obj->repl); - debugs(20, 4, "StoreEntry::setMemStatus: removed mem node " << mem_obj->url); + debugs(20, 4, "removed " << *this); } --hot_obj_count; @@ -1689,26 +1653,22 @@ else if (mem_obj == NULL) return "[null_mem_obj]"; else - return mem_obj->url; + return mem_obj->storeId(); +} + +MemObject * +StoreEntry::makeMemObject() +{ + if (!mem_obj) + mem_obj = new MemObject(); + return mem_obj; } void -StoreEntry::createMemObject(const char *aUrl, const char *aLogUrl) +StoreEntry::createMemObject(const char *aUrl, const char *aLogUrl, const HttpRequestMethod &aMethod) { - debugs(20, 3, "A mem_obj create attempted using : " << aUrl); - - if (mem_obj) - return; - - if (hidden_mem_obj) { - debugs(20, 3, HERE << "restoring " << hidden_mem_obj); - mem_obj = hidden_mem_obj; - hidden_mem_obj = NULL; - mem_obj->resetUrls(aUrl, aLogUrl); - return; - } - - mem_obj = new MemObject(aUrl, aLogUrl); + makeMemObject(); + mem_obj->setUris(aUrl, aLogUrl, aMethod); } /* this just sets DELAY_SENDING */ @@ -1881,6 +1841,7 @@ rep->packHeadersInto(&p); mem_obj->markEndOfReplyHeaders(); + EBIT_CLR(flags, ENTRY_FWD_HDR_WAIT); rep->body.packInto(&p); @@ -1913,21 +1874,12 @@ if (EBIT_TEST(flags, ENTRY_SPECIAL)) return; // cannot trim because we do not load them again - if (!preserveSwappable) { - if (mem_obj->policyLowestOffsetToKeep(0) == 0) { - /* Nothing to do */ - return; - } - /* - * Its not swap-able, and we're about to delete a chunk, - * so we must make it PRIVATE. This is tricky/ugly because - * for the most part, we treat swapable == cachable here. - */ - releaseRequest(); - mem_obj->trimUnSwappable (); - } else { - mem_obj->trimSwappable (); - } + if (preserveSwappable) + mem_obj->trimSwappable(); + else + mem_obj->trimUnSwappable(); + + debugs(88, 7, *this << " inmem_lo=" << mem_obj->inmem_lo); } bool @@ -2060,9 +2012,50 @@ std::ostream &operator <<(std::ostream &os, const StoreEntry &e) { - return os << e.swap_filen << '@' << e.swap_dirn << '=' << - e.mem_status << '/' << e.ping_status << '/' << e.store_status << '/' << - e.swap_status; + os << "e:"; + + if (e.mem_obj) { + if (e.mem_obj->xitTable.index > -1) + os << 't' << e.mem_obj->xitTable.index; + if (e.mem_obj->memCache.index > -1) + os << 'm' << e.mem_obj->memCache.index; + } + if (e.swap_filen > -1 || e.swap_dirn > -1) + os << 'd' << e.swap_filen << '@' << e.swap_dirn; + + os << '='; + + // print only non-default status values, using unique letters + if (e.mem_status != NOT_IN_MEMORY || + e.store_status != STORE_PENDING || + e.swap_status != SWAPOUT_NONE || + e.ping_status != PING_NONE) { + if (e.mem_status != NOT_IN_MEMORY) os << 'm'; + if (e.store_status != STORE_PENDING) os << 's'; + if (e.swap_status != SWAPOUT_NONE) os << 'w' << e.swap_status; + if (e.ping_status != PING_NONE) os << 'p' << e.ping_status; + } + + // print only set flags, using unique letters + if (e.flags) { + if (EBIT_TEST(e.flags, ENTRY_SPECIAL)) os << 'S'; + if (EBIT_TEST(e.flags, ENTRY_REVALIDATE)) os << 'R'; + if (EBIT_TEST(e.flags, DELAY_SENDING)) os << 'P'; + if (EBIT_TEST(e.flags, RELEASE_REQUEST)) os << 'X'; + if (EBIT_TEST(e.flags, REFRESH_REQUEST)) os << 'F'; + if (EBIT_TEST(e.flags, ENTRY_DISPATCHED)) os << 'D'; + if (EBIT_TEST(e.flags, KEY_PRIVATE)) os << 'I'; + if (EBIT_TEST(e.flags, ENTRY_FWD_HDR_WAIT)) os << 'W'; + if (EBIT_TEST(e.flags, ENTRY_NEGCACHED)) os << 'N'; + if (EBIT_TEST(e.flags, ENTRY_VALIDATED)) os << 'V'; + if (EBIT_TEST(e.flags, ENTRY_BAD_LENGTH)) os << 'L'; + if (EBIT_TEST(e.flags, ENTRY_ABORTED)) os << 'A'; + } + + if (e.mem_obj && e.mem_obj->smpCollapsed) + os << 'O'; + + return os << '/' << &e << '*' << e.locks(); } /* NullStoreEntry */ === modified file 'src/store_client.cc' --- src/store_client.cc 2013-10-25 00:13:46 +0000 +++ src/store_client.cc 2013-12-31 18:49:41 +0000 @@ -34,6 +34,7 @@ #include "squid.h" #include "event.h" +#include "globals.h" #include "HttpReply.h" #include "HttpRequest.h" #include "MemBuf.h" @@ -249,46 +250,50 @@ PROF_stop(storeClient_kickReads); copying = false; - // XXX: storeClientCopy2 calls doCopy() whose callback may free 'this'! - // We should make store copying asynchronous, to avoid worrying about - // 'this' being secretly deleted while we are still inside the object. - // For now, lock and use on-stack objects after storeClientCopy2(). - ++anEntry->lock_count; + anEntry->lock("store_client::copy"); // see deletion note below storeClientCopy2(entry, this); + // Bug 3480: This store_client object may be deleted now if, for example, + // the client rejects the hit response copied above. Use on-stack pointers! + #if USE_ADAPTATION anEntry->kickProducer(); #endif + anEntry->unlock("store_client::copy"); - anEntry->unlock(); // after the "++enEntry->lock_count" above // Add no code here. This object may no longer exist. } -/* - * This function is used below to decide if we have any more data to - * send to the client. If the store_status is STORE_PENDING, then we - * do have more data to send. If its STORE_OK, then - * we continue checking. If the object length is negative, then we - * don't know the real length and must open the swap file to find out. - * If the length is >= 0, then we compare it to the requested copy - * offset. - */ -static int -storeClientNoMoreToSend(StoreEntry * e, store_client * sc) +/// Whether there is (or will be) more entry data for us. +bool +store_client::moreToSend() const { - int64_t len; - - if (e->store_status == STORE_PENDING) - return 0; - - if ((len = e->objectLen()) < 0) - return 0; - - if (sc->copyInto.offset < len) - return 0; - - return 1; + if (entry->store_status == STORE_PENDING) + return true; // there may be more coming + + /* STORE_OK, including aborted entries: no more data is coming */ + + const int64_t len = entry->objectLen(); + + // If we do not know the entry length, then we have to open the swap file, + // which is only possible if there is one AND if we are allowed to use it. + const bool canSwapIn = entry->swap_filen >= 0 && + getType() == STORE_DISK_CLIENT; + if (len < 0) + return canSwapIn; + + if (copyInto.offset >= len) + return false; // sent everything there is + + if (canSwapIn) + return true; // if we lack prefix, we can swap it in + + // If we cannot swap in, make sure we have what we want in RAM. Otherwise, + // scheduleRead calls scheduleDiskRead which asserts on STORE_MEM_CLIENTs. + const MemObject *mem = entry->mem_obj; + return mem && + mem->inmem_lo <= copyInto.offset && copyInto.offset < mem->endOffset(); } static void @@ -348,7 +353,7 @@ copyInto.offset << ", hi: " << mem->endOffset()); - if (storeClientNoMoreToSend(entry, this)) { + if (!moreToSend()) { /* There is no more to send! */ debugs(33, 3, HERE << "There is no more to send!"); callback(0); @@ -475,12 +480,6 @@ this); } -static void -storeClientMemWriteComplete(void *data, StoreIOBuffer wroteBuffer) -{ - // Nothin to do here but callback is needed -} - void store_client::readBody(const char *buf, ssize_t len) { @@ -508,15 +507,11 @@ // The above may start to free our object so we need to check again if (entry->mem_obj->inmem_lo == 0) { /* Copy read data back into memory. - * but first we need to adjust offset.. some parts of the code - * counts offset including headers, some parts count offset as - * withing the body.. copyInto is including headers, but the mem - * cache expects offset without headers (using negative for headers) - * eventually not storing packed headers in memory at all. + * copyInto.offset includes headers, which is what mem cache needs */ int64_t mem_offset = entry->mem_obj->endOffset(); if ((copyInto.offset == mem_offset) || (parsed_header && mem_offset == rep->hdr_sz)) { - entry->mem_obj->write(StoreIOBuffer(len, copyInto.offset - rep->hdr_sz, copyInto.data), storeClientMemWriteComplete, this); + entry->mem_obj->write(StoreIOBuffer(len, copyInto.offset, copyInto.data)); } } } @@ -596,10 +591,11 @@ storeSwapTLVFree(tlv_list); assert(swap_hdr_sz >= 0); - assert(entry->swap_file_sz > 0); - assert(entry->swap_file_sz >= static_cast(swap_hdr_sz)); entry->mem_obj->swap_hdr_sz = swap_hdr_sz; - entry->mem_obj->object_sz = entry->swap_file_sz - swap_hdr_sz; + if (entry->swap_file_sz > 0) { // collapsed hits may not know swap_file_sz + assert(entry->swap_file_sz >= static_cast(swap_hdr_sz)); + entry->mem_obj->object_sz = entry->swap_file_sz - swap_hdr_sz; + } debugs(90, 5, "store_client::unpackHeader: swap_file_sz=" << entry->swap_file_sz << "( " << swap_hdr_sz << " + " << entry->mem_obj->object_sz << ")"); @@ -712,7 +708,7 @@ if (sc->_callback.pending()) { /* callback with ssize = -1 to indicate unexpected termination */ - debugs(90, 3, "storeUnregister: store_client for " << mem->url << " has a callback"); + debugs(90, 3, "store_client for " << *e << " has a callback"); sc->fail(); } @@ -723,14 +719,10 @@ delete sc; - // This old assert seemed to imply that a locked entry cannot be deleted, - // but this entry may be deleted because StoreEntry::abort() unlocks it. - assert(e->lock_count > 0); - // Since lock_count of 1 is not sufficient to prevent entry destruction, - // we must lock again so that we can dereference e after CheckQuickAbort(). - // Do not call expensive StoreEntry::lock() here; e "use" has been counted. - // TODO: Separate entry locking from "use" counting to make locking cheap. - ++e->lock_count; + assert(e->locked()); + // An entry locked by others may be unlocked (and destructed) by others, so + // we must lock again to safely dereference e after CheckQuickAbort(). + e->lock("storeUnregister"); if (mem->nclients == 0) CheckQuickAbort(e); @@ -741,7 +733,7 @@ e->kickProducer(); #endif - e->unlock(); // after the "++e->lock_count" above + e->unlock("storeUnregister"); return 1; } @@ -778,6 +770,7 @@ PROF_stop(InvokeHandlers); } +// Does not account for remote readers/clients. int storePendingNClients(const StoreEntry * e) { @@ -855,6 +848,8 @@ return true; } +/// Aborts a swapping-out entry if nobody needs it any more _and_ +/// continuing swap out is not reasonable per CheckQuickAbortIsReasonable(). static void CheckQuickAbort(StoreEntry * entry) { @@ -863,6 +858,9 @@ if (storePendingNClients(entry) > 0) return; + if (!shutting_down && Store::Root().transientReaders(*entry)) + return; + if (entry->store_status != STORE_PENDING) return; === modified file 'src/store_digest.cc' --- src/store_digest.cc 2013-03-16 04:57:43 +0000 +++ src/store_digest.cc 2013-12-19 04:53:35 +0000 @@ -224,11 +224,6 @@ /* check various entry flags (mimics StoreEntry::checkCachable XXX) */ - if (!EBIT_TEST(e->flags, ENTRY_CACHABLE)) { - debugs(71, 6, "storeDigestAddable: NO: not cachable"); - return 0; - } - if (EBIT_TEST(e->flags, KEY_PRIVATE)) { debugs(71, 6, "storeDigestAddable: NO: private key"); return 0; @@ -447,7 +442,7 @@ " (" << std::showpos << (int) (e->expires - squid_curtime) << ")"); /* is this the write order? @?@ */ e->mem_obj->unlinkRequest(); - e->unlock(); + e->unlock("storeDigestRewriteFinish"); sd_state.rewrite_lock = NULL; ++sd_state.rewrite_count; eventAdd("storeDigestRewriteStart", storeDigestRewriteStart, NULL, (double) === modified file 'src/store_dir.cc' --- src/store_dir.cc 2013-10-25 00:13:46 +0000 +++ src/store_dir.cc 2013-12-31 18:49:41 +0000 @@ -46,6 +46,7 @@ #include "swap_log_op.h" #include "SwapDir.h" #include "tools.h" +#include "Transients.h" #if HAVE_STATVFS #if HAVE_SYS_STATVFS_H @@ -86,12 +87,13 @@ int StoreController::store_dirs_rebuilding = 1; StoreController::StoreController() : swapDir (new StoreHashIndex()) - , memStore(NULL) + , memStore(NULL), transients(NULL) {} StoreController::~StoreController() { delete memStore; + delete transients; } /* @@ -117,6 +119,11 @@ storeDirSelectSwapDir = storeDirSelectSwapDirLeastLoad; debugs(47, DBG_IMPORTANT, "Using Least Load store dir selection"); } + + if (UsingSmp() && IamWorkerProcess() && Config.onoff.collapsed_forwarding) { + transients = new Transients; + transients->init(); + } } void @@ -747,6 +754,19 @@ StoreEntry * StoreController::get(const cache_key *key) { + if (StoreEntry *e = find(key)) { + // this is not very precise: some get()s are not initiated by clients + e->touch(); + return e; + } + return NULL; +} + +/// Internal method to implements the guts of the Store::get() API: +/// returns an in-transit or cached object with a given key, if any. +StoreEntry * +StoreController::find(const cache_key *key) +{ if (StoreEntry *e = swapDir->get(key)) { // TODO: ignore and maybe handleIdleEntry() unlocked intransit entries // because their backing store slot may be gone already. @@ -754,6 +774,20 @@ return e; } + // Must search transients before caches because we must sync those we find. + if (transients) { + if (StoreEntry *e = transients->get(key)) { + debugs(20, 3, "got shared in-transit entry: " << *e); + bool inSync = false; + const bool found = anchorCollapsed(*e, inSync); + if (!found || inSync) + return e; + assert(!e->locked()); // ensure release will destroyStoreEntry() + e->release(); // do not let others into the same trap + return NULL; + } + } + if (memStore) { if (StoreEntry *e = memStore->get(key)) { debugs(20, 3, HERE << "got mem-cached entry: " << *e); @@ -793,6 +827,45 @@ fatal("not implemented"); } +/// updates the collapsed entry with the corresponding on-disk entry, if any +/// In other words, the SwapDir::anchorCollapsed() API applied to all disks. +bool +StoreController::anchorCollapsedOnDisk(StoreEntry &collapsed, bool &inSync) +{ + // TODO: move this loop to StoreHashIndex, just like the one in get(). + if (const int cacheDirs = Config.cacheSwap.n_configured) { + // ask each cache_dir until the entry is found; use static starting + // point to avoid asking the same subset of disks more often + // TODO: coordinate with put() to be able to guess the right disk often + static int idx = 0; + for (int n = 0; n < cacheDirs; ++n) { + idx = (idx + 1) % cacheDirs; + SwapDir *sd = dynamic_cast(INDEXSD(idx)); + if (!sd->active()) + continue; + + if (sd->anchorCollapsed(collapsed, inSync)) { + debugs(20, 3, "cache_dir " << idx << " anchors " << collapsed); + return true; + } + } + } + + debugs(20, 4, "none of " << Config.cacheSwap.n_configured << + " cache_dirs have " << collapsed); + return false; +} + +void StoreController::markForUnlink(StoreEntry &e) +{ + if (transients && e.mem_obj && e.mem_obj->xitTable.index >= 0) + transients->markForUnlink(e); + if (memStore && e.mem_obj && e.mem_obj->memCache.index >= 0) + memStore->markForUnlink(e); + if (e.swap_filen >= 0) + e.store()->markForUnlink(e); +} + // move this into [non-shared] memory cache class when we have one /// whether e should be kept in local RAM for possible future caching bool @@ -813,11 +886,11 @@ } void -StoreController::maybeTrimMemory(StoreEntry &e, const bool preserveSwappable) +StoreController::memoryOut(StoreEntry &e, const bool preserveSwappable) { bool keepInLocalMemory = false; if (memStore) - keepInLocalMemory = memStore->keepInLocalMemory(e); + memStore->write(e); // leave keepInLocalMemory false else keepInLocalMemory = keepForLocalMemoryCache(e); @@ -828,6 +901,57 @@ } void +StoreController::memoryUnlink(StoreEntry &e) +{ + if (memStore) + memStore->unlink(e); + else // TODO: move into [non-shared] memory cache class when we have one + e.destroyMemObject(); +} + +void +StoreController::memoryDisconnect(StoreEntry &e) +{ + if (memStore) + memStore->disconnect(e); + // else nothing to do for non-shared memory cache +} + +void +StoreController::transientsAbandon(StoreEntry &e) +{ + if (transients) { + assert(e.mem_obj); + if (e.mem_obj->xitTable.index >= 0) + transients->abandon(e); + } +} + +void +StoreController::transientsCompleteWriting(StoreEntry &e) +{ + if (transients) { + assert(e.mem_obj); + if (e.mem_obj->xitTable.index >= 0) + transients->completeWriting(e); + } +} + +int +StoreController::transientReaders(const StoreEntry &e) const +{ + return (transients && e.mem_obj && e.mem_obj->xitTable.index >= 0) ? + transients->readers(e) : 0; +} + +void +StoreController::transientsDisconnect(MemObject &mem_obj) +{ + if (transients) + transients->disconnect(mem_obj); +} + +void StoreController::handleIdleEntry(StoreEntry &e) { bool keepInLocalMemory = false; @@ -838,7 +962,6 @@ // They are not managed [well] by any specific Store handled below. keepInLocalMemory = true; } else if (memStore) { - memStore->considerKeeping(e); // leave keepInLocalMemory false; memStore maintains its own cache } else { keepInLocalMemory = keepForLocalMemoryCache(e) && // in good shape and @@ -865,6 +988,97 @@ } } +void +StoreController::allowCollapsing(StoreEntry *e, const RequestFlags &reqFlags, + const HttpRequestMethod &reqMethod) +{ + e->makePublic(); // this is needed for both local and SMP collapsing + if (transients) + transients->startWriting(e, reqFlags, reqMethod); + debugs(20, 3, "may " << (transients && e->mem_obj->xitTable.index >= 0 ? + "SMP-" : "locally-") << "collapse " << *e); +} + +void +StoreController::syncCollapsed(const sfileno xitIndex) +{ + assert(transients); + + StoreEntry *collapsed = transients->findCollapsed(xitIndex); + if (!collapsed) { // the entry is no longer locally active, ignore update + debugs(20, 7, "not SMP-syncing not-transient " << xitIndex); + return; + } + assert(collapsed->mem_obj); + assert(collapsed->mem_obj->smpCollapsed); + + debugs(20, 7, "syncing " << *collapsed); + + bool abandoned = transients->abandoned(*collapsed); + bool found = false; + bool inSync = false; + if (memStore && collapsed->mem_obj->memCache.io == MemObject::ioDone) { + found = true; + inSync = true; + debugs(20, 7, "fully mem-loaded " << *collapsed); + } else if (memStore && collapsed->mem_obj->memCache.index >= 0) { + found = true; + inSync = memStore->updateCollapsed(*collapsed); + } else if (collapsed->swap_filen >= 0) { + found = true; + inSync = collapsed->store()->updateCollapsed(*collapsed); + } else { + found = anchorCollapsed(*collapsed, inSync); + } + + if (abandoned && collapsed->store_status == STORE_PENDING) { + debugs(20, 3, "aborting abandoned but STORE_PENDING " << *collapsed); + collapsed->abort(); + return; + } + + if (inSync) { + debugs(20, 5, "synced " << *collapsed); + collapsed->invokeHandlers(); + } else if (found) { // unrecoverable problem syncing this entry + debugs(20, 3, "aborting unsyncable " << *collapsed); + collapsed->abort(); + } else { // the entry is still not in one of the caches + debugs(20, 7, "waiting " << *collapsed); + } +} + +/// Called for in-transit entries that are not yet anchored to a cache. +/// For cached entries, return true after synchronizing them with their cache +/// (making inSync true on success). For not-yet-cached entries, return false. +bool +StoreController::anchorCollapsed(StoreEntry &collapsed, bool &inSync) +{ + // this method is designed to work with collapsed transients only + assert(collapsed.mem_obj); + assert(collapsed.mem_obj->xitTable.index >= 0); + assert(collapsed.mem_obj->smpCollapsed); + + debugs(20, 7, "anchoring " << collapsed); + + bool found = false; + if (memStore) + found = memStore->anchorCollapsed(collapsed, inSync); + else if (Config.cacheSwap.n_configured) + found = anchorCollapsedOnDisk(collapsed, inSync); + + if (found) { + if (inSync) + debugs(20, 7, "anchored " << collapsed); + else + debugs(20, 5, "failed to anchor " << collapsed); + } else { + debugs(20, 7, "skipping not yet cached " << collapsed); + } + + return found; +} + StoreHashIndex::StoreHashIndex() { if (store_table) === modified file 'src/store_key_md5.cc' --- src/store_key_md5.cc 2013-02-20 05:59:41 +0000 +++ src/store_key_md5.cc 2013-06-27 21:04:01 +0000 @@ -43,6 +43,9 @@ const char * storeKeyText(const cache_key *key) { + if (!key) + return "[null_store_key]"; + static char buf[SQUID_MD5_DIGEST_LENGTH * 2+1]; int i; === modified file 'src/store_log.cc' --- src/store_log.cc 2013-10-31 19:13:17 +0000 +++ src/store_log.cc 2013-12-06 23:52:26 +0000 @@ -70,12 +70,6 @@ ++storeLogTagsCounts[tag]; if (mem != NULL) { - if (mem->log_url == NULL) { - debugs(20, DBG_IMPORTANT, "storeLog: NULL log_url for " << mem->url); - mem->dump(); - mem->log_url = xstrdup(mem->url); - } - reply = e->getReply(); /* * XXX Ok, where should we print the dir number here? @@ -101,7 +95,7 @@ reply->content_length, e->contentLen(), RequestMethodStr(mem->method), - mem->log_url); + mem->logUri()); logfileLineEnd(storelog); } else { /* no mem object. Most RELEASE cases */ === modified file 'src/store_rebuild.cc' --- src/store_rebuild.cc 2013-10-25 00:13:46 +0000 +++ src/store_rebuild.cc 2013-12-30 23:58:33 +0000 @@ -74,6 +74,7 @@ static int store_errors = 0; static StoreSearchPointer currentSearch; static int validated = 0; + static int seen = 0; if (currentSearch == NULL || currentSearch->isDone()) currentSearch = Store::Root().search(NULL, NULL); @@ -86,6 +87,8 @@ e = currentSearch->currentItem(); + ++seen; + if (EBIT_TEST(e->flags, ENTRY_VALIDATED)) continue; @@ -113,6 +116,7 @@ } if (currentSearch->isDone()) { + debugs(20, 2, "Seen: " << seen << " entries"); debugs(20, DBG_IMPORTANT, " Completed Validation Procedure"); debugs(20, DBG_IMPORTANT, " Validated " << validated << " Entries"); debugs(20, DBG_IMPORTANT, " store_swap_size = " << Store::Root().currentSize() / 1024.0 << " KB"); @@ -340,7 +344,7 @@ // TODO: consume parsed metadata? - debugs(47,7, HERE << "successful swap meta unpacking"); + debugs(47,7, "successful swap meta unpacking; swap_file_sz=" << tmpe.swap_file_sz); memset(key, '\0', SQUID_MD5_DIGEST_LENGTH); InitStoreEntry visitor(&tmpe, key); @@ -367,9 +371,8 @@ return false; } } else if (tmpe.swap_file_sz <= 0) { - debugs(47, DBG_IMPORTANT, "WARNING: Ignoring cache entry with " << - "unknown size: " << tmpe); - return false; + // if caller cannot handle unknown sizes, it must check after the call. + debugs(47, 7, "unknown size: " << tmpe); } if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) { @@ -410,8 +413,8 @@ // For some stores, get() creates/unpacks a store entry. Signal // such stores that we will no longer use the get() result: - e->lock(); - e->unlock(); + e->lock("storeRebuildKeepEntry"); + e->unlock("storeRebuildKeepEntry"); return false; } else { === modified file 'src/store_swapout.cc' --- src/store_swapout.cc 2013-10-25 00:13:46 +0000 +++ src/store_swapout.cc 2013-12-31 18:49:41 +0000 @@ -69,6 +69,7 @@ e->swap_dirn << ", fileno " << std::hex << std::setw(8) << std::setfill('0') << std::uppercase << e->swap_filen); e->swap_status = SWAPOUT_WRITING; + mem->swapout.decision = MemObject::SwapOut::swStarted; /* If we start swapping out objects with OutOfBand Metadata, * then this code needs changing */ @@ -98,7 +99,7 @@ /* Don't lock until after create, or the replacement * code might get confused */ - e->lock(); + e->lock("storeSwapOutStart"); /* Pick up the file number if it was assigned immediately */ e->swap_filen = mem->swapout.sio->swap_filen; @@ -123,7 +124,7 @@ e->swap_dirn = mem->swapout.sio->swap_dirn; } -static void +static bool doPages(StoreEntry *anEntry) { MemObject *mem = anEntry->mem_obj; @@ -134,7 +135,7 @@ mem->data_hdr.getBlockContainingLocation(mem->swapout.queue_offset); if (!page) - return; // wait for more data to become available + break; // wait for more data to become available // memNodeWriteComplete() and absence of buffer offset math below // imply that we always write from the very beginning of the page @@ -158,15 +159,16 @@ mem->swapout.queue_offset += swap_buf_len; - storeIOWrite(mem->swapout.sio, - mem->data_hdr.NodeGet(page), - swap_buf_len, - -1, - memNodeWriteComplete); + // Quit if write() fails. Sio is going to call our callback, and that + // will cleanup, but, depending on the fs, that call may be async. + const bool ok = mem->swapout.sio->write( + mem->data_hdr.NodeGet(page), + swap_buf_len, + -1, + memNodeWriteComplete); - /* the storeWrite() call might generate an error */ - if (anEntry->swap_status != SWAPOUT_WRITING) - break; + if (!ok || anEntry->swap_status != SWAPOUT_WRITING) + return false; int64_t swapout_size = mem->endOffset() - mem->swapout.queue_offset; @@ -175,8 +177,11 @@ break; if (swapout_size <= 0) - return; + break; } while (true); + + // either wait for more data or call swapOutFileClose() + return true; } /* This routine is called every time data is sent to the client side. @@ -198,9 +203,9 @@ const bool weAreOrMayBeSwappingOut = swappingOut() || mayStartSwapOut(); - Store::Root().maybeTrimMemory(*this, weAreOrMayBeSwappingOut); + Store::Root().memoryOut(*this, weAreOrMayBeSwappingOut); - if (mem_obj->swapout.decision != MemObject::SwapOut::swPossible) + if (mem_obj->swapout.decision < MemObject::SwapOut::swPossible) return; // nothing else to do // Aborted entries have STORE_OK, but swapoutPossible rejects them. Thus, @@ -267,9 +272,7 @@ if (mem_obj->swapout.sio == NULL) return; - doPages(this); - - if (mem_obj->swapout.sio == NULL) + if (!doPages(this)) /* oops, we're not swapping out any more */ return; @@ -354,7 +357,7 @@ debugs(20, 3, "storeSwapOutFileClosed: " << __FILE__ << ":" << __LINE__); mem->swapout.sio = NULL; - e->unlock(); + e->unlock("storeSwapOutFileClosed"); } bool @@ -370,25 +373,32 @@ assert(mem_obj); MemObject::SwapOut::Decision &decision = mem_obj->swapout.decision; - // if we decided that swapout is not possible, do not repeat same checks + // if we decided that starting is not possible, do not repeat same checks if (decision == MemObject::SwapOut::swImpossible) { debugs(20, 3, HERE << " already rejected"); return false; } + // if we swapped out already, do not start over + if (swap_status == SWAPOUT_DONE) { + debugs(20, 3, "already did"); + decision = MemObject::SwapOut::swImpossible; + return false; + } + + // if we stared swapping out already, do not start over + if (decision == MemObject::SwapOut::swStarted) { + debugs(20, 3, "already started"); + decision = MemObject::SwapOut::swImpossible; + return false; + } + // if we decided that swapout is possible, do not repeat same checks if (decision == MemObject::SwapOut::swPossible) { - debugs(20, 3, HERE << "already allowed"); + debugs(20, 3, "already allowed"); return true; } - // if we swapped out already, do not start over - if (swap_status == SWAPOUT_DONE) { - debugs(20, 3, HERE << "already did"); - decision = MemObject::SwapOut::swImpossible; - return false; - } - if (!checkCachable()) { debugs(20, 3, HERE << "not cachable"); decision = MemObject::SwapOut::swImpossible; === added file 'src/tests/stub_CollapsedForwarding.cc' --- src/tests/stub_CollapsedForwarding.cc 1970-01-01 00:00:00 +0000 +++ src/tests/stub_CollapsedForwarding.cc 2014-01-01 17:51:10 +0000 @@ -0,0 +1,7 @@ +#include "squid.h" +#include "CollapsedForwarding.h" + +#define STUB_API "CollapsedForwarding.cc" +#include "tests/STUB.h" + +void CollapsedForwarding::Broadcast(StoreEntry const&) STUB === modified file 'src/tests/stub_MemObject.cc' --- src/tests/stub_MemObject.cc 2013-10-25 00:13:46 +0000 +++ src/tests/stub_MemObject.cc 2013-12-29 05:30:26 +0000 @@ -21,14 +21,12 @@ void MemObject::trimSwappable() STUB void MemObject::trimUnSwappable() STUB int64_t MemObject::policyLowestOffsetToKeep(bool swap) const STUB_RETVAL(-1) -MemObject::MemObject(char const *, char const *) : - url(NULL), +MemObject::MemObject() : inmem_lo(0), nclients(0), request(NULL), ping_reply_callback(NULL), ircb_data(NULL), - log_url(NULL), id(0), object_sz(-1), swap_hdr_sz(0), @@ -45,6 +43,9 @@ // XXX: required by testStore return NULL; } +const char *MemObject::storeId() const STUB_RETVAL(NULL) +const char *MemObject::logUri() const STUB_RETVAL(NULL) +void MemObject::setUris(char const *aStoreId, char const *aLogUri, const HttpRequestMethod &aMethod) STUB void MemObject::reset() STUB void MemObject::delayRead(DeferredRead const &aRead) STUB bool MemObject::readAheadPolicyCanRead() const STUB_RETVAL(false) @@ -55,14 +56,13 @@ DelayId MemObject::mostBytesAllowed() const STUB_RETVAL(DelayId()) #endif void MemObject::unlinkRequest() STUB -void MemObject::write(StoreIOBuffer writeBuffer, STMCB *callback, void *callbackData) STUB +void MemObject::write(const StoreIOBuffer &writeBuffer) STUB void MemObject::replaceHttpReply(HttpReply *newrep) STUB int64_t MemObject::lowestMemReaderOffset() const STUB_RETVAL(0) void MemObject::kickReads() STUB int64_t MemObject::objectBytesOnDisk() const STUB_RETVAL(0) bool MemObject::isContiguous() const STUB_RETVAL(false) int64_t MemObject::expectedReplySize() const STUB_RETVAL(0) -void MemObject::resetUrls(char const*, char const*) STUB void MemObject::markEndOfReplyHeaders() STUB size_t MemObject::inUseCount() STUB_RETVAL(0) int64_t MemObject::availableForSwapOut() const STUB_RETVAL(0) === modified file 'src/tests/stub_MemStore.cc' --- src/tests/stub_MemStore.cc 2012-10-16 00:18:09 +0000 +++ src/tests/stub_MemStore.cc 2013-08-16 14:44:40 +0000 @@ -12,10 +12,13 @@ MemStore::MemStore() STUB MemStore::~MemStore() STUB bool MemStore::keepInLocalMemory(const StoreEntry &) const STUB_RETVAL(false) -void MemStore::considerKeeping(StoreEntry &) STUB +void MemStore::write(StoreEntry &e) STUB +void MemStore::completeWriting(StoreEntry &e) STUB +void MemStore::unlink(StoreEntry &e) STUB +void MemStore::disconnect(StoreEntry &e) STUB void MemStore::reference(StoreEntry &) STUB void MemStore::maintain() STUB -void MemStore::cleanReadable(const sfileno) STUB +void MemStore::noteFreeMapSlice(const sfileno) STUB void MemStore::get(String const, STOREGETCLIENT, void *) STUB void MemStore::init() STUB void MemStore::getStats(StoreInfoStats&) const STUB @@ -29,3 +32,6 @@ int64_t MemStore::maxObjectSize() const STUB_RETVAL(0) StoreSearch *MemStore::search(String const, HttpRequest *) STUB_RETVAL(NULL) bool MemStore::dereference(StoreEntry &, bool) STUB_RETVAL(false) +void MemStore::markForUnlink(StoreEntry&) STUB +bool MemStore::anchorCollapsed(StoreEntry&, bool&) STUB_RETVAL(false) +bool MemStore::updateCollapsed(StoreEntry&) STUB_RETVAL(false) === modified file 'src/tests/stub_stat.cc' --- src/tests/stub_stat.cc 2012-09-01 14:42:17 +0000 +++ src/tests/stub_stat.cc 2013-12-29 05:30:26 +0000 @@ -32,3 +32,8 @@ #include "squid.h" +#define STUB_API "stat.cc" +#include "tests/STUB.h" + +class StoreEntry; +const char *storeEntryFlags(const StoreEntry *) STUB_RETVAL(NULL) === modified file 'src/tests/stub_store.cc' --- src/tests/stub_store.cc 2013-10-30 18:37:05 +0000 +++ src/tests/stub_store.cc 2013-12-29 05:30:26 +0000 @@ -21,7 +21,6 @@ bool StoreEntry::checkDeferRead(int fd) const STUB_RETVAL(false) const char *StoreEntry::getMD5Text() const STUB_RETVAL(NULL) StoreEntry::StoreEntry() STUB -StoreEntry::StoreEntry(const char *, const char *) STUB StoreEntry::~StoreEntry() STUB HttpReply const *StoreEntry::getReply() const STUB_RETVAL(NULL) void StoreEntry::write(StoreIOBuffer) STUB @@ -43,7 +42,6 @@ void StoreEntry::releaseRequest() STUB void StoreEntry::negativeCache() STUB void StoreEntry::cacheNegatively() STUB -void StoreEntry::invokeHandlers() STUB void StoreEntry::purgeMem() STUB void StoreEntry::swapOut() STUB void StoreEntry::swapOutFileClose(int how) STUB @@ -53,8 +51,8 @@ int StoreEntry::locked() const STUB_RETVAL(0) int StoreEntry::validToSend() const STUB_RETVAL(0) bool StoreEntry::memoryCachable() const STUB_RETVAL(false) -void StoreEntry::createMemObject(const char *, const char *) STUB -void StoreEntry::hideMemObject() STUB +MemObject *StoreEntry::makeMemObject() STUB_RETVAL(NULL) +void StoreEntry::createMemObject(const char *, const char *, const HttpRequestMethod &aMethod) STUB void StoreEntry::dump(int debug_lvl) const STUB void StoreEntry::hashDelete() STUB void StoreEntry::hashInsert(const cache_key *) STUB @@ -88,10 +86,11 @@ void StoreEntry::append(char const *, int len) STUB void StoreEntry::buffer() STUB void StoreEntry::flush() STUB -int StoreEntry::unlock() STUB_RETVAL(0) +int StoreEntry::unlock(const char *) STUB_RETVAL(0) int64_t StoreEntry::objectLen() const STUB_RETVAL(0) int64_t StoreEntry::contentLen() const STUB_RETVAL(0) -void StoreEntry::lock() STUB +void StoreEntry::lock(const char *) STUB +void StoreEntry::touch() STUB void StoreEntry::release() STUB NullStoreEntry *NullStoreEntry::getInstance() STUB_RETVAL(NULL) @@ -116,12 +115,12 @@ } size_t storeEntryInUse() STUB_RETVAL(0) -const char *storeEntryFlags(const StoreEntry *) STUB_RETVAL(NULL) void storeEntryReplaceObject(StoreEntry *, HttpReply *) STUB StoreEntry *storeGetPublic(const char *uri, const HttpRequestMethod& method) STUB_RETVAL(NULL) StoreEntry *storeGetPublicByRequest(HttpRequest * request) STUB_RETVAL(NULL) StoreEntry *storeGetPublicByRequestMethod(HttpRequest * request, const HttpRequestMethod& method) STUB_RETVAL(NULL) StoreEntry *storeCreateEntry(const char *, const char *, const RequestFlags &, const HttpRequestMethod&) STUB_RETVAL(NULL) +StoreEntry *storeCreatePureEntry(const char *storeId, const char *logUrl, const RequestFlags &, const HttpRequestMethod&) STUB_RETVAL(NULL) void storeInit(void) STUB void storeConfigure(void) STUB void storeFreeMemory(void) STUB @@ -138,6 +137,13 @@ // in Packer.cc !? void packerToStoreInit(Packer * p, StoreEntry * e) STUB void storeGetMemSpace(int size) STUB -#if !_USE_INLINE_ -#include "Store.cci" -#endif +#if !_USE_INLINE_ /* stubs for Store.cci */ +bool StoreEntry::isEmpty () const STUB_RETVAL(true) +HttpReply const *NullStoreEntry::getReply() const STUB_RETVAL(NULL) + +Store &Store::Root() +{ + CPPUNIT_ASSERT(CurrentRoot != NULL); + return *CurrentRoot; +} +#endif /* !_USE_INLINE_ */ === modified file 'src/tests/stub_store_client.cc' --- src/tests/stub_store_client.cc 2013-10-25 00:13:46 +0000 +++ src/tests/stub_store_client.cc 2013-12-29 05:30:26 +0000 @@ -25,7 +25,6 @@ void storeLogOpen(void) STUB void storeDigestInit(void) STUB void storeRebuildStart(void) STUB -const char *storeEntryFlags(const StoreEntry *) STUB_RETVAL(NULL) void storeReplSetup(void) STUB bool store_client::memReaderHasLowerOffset(int64_t anOffset) const STUB_RETVAL(false) void store_client::dumpStats(MemBuf * output, int clientNumber) const STUB === modified file 'src/tests/stub_store_rebuild.cc' --- src/tests/stub_store_rebuild.cc 2012-09-06 14:22:03 +0000 +++ src/tests/stub_store_rebuild.cc 2013-12-31 18:49:41 +0000 @@ -33,15 +33,32 @@ #include "squid.h" #include "MemBuf.h" #include "store_rebuild.h" +#include "SwapDir.h" +#if HAVE_STRING_H +#include +#endif #define STUB_API "stub_store_rebuild.cc" #include "tests/STUB.h" void storeRebuildProgress(int sd_index, int total, int sofar) STUB -void storeRebuildComplete(StoreRebuildData *dc) STUB_NOP -bool storeRebuildLoadEntry(int, int, MemBuf&, StoreRebuildData&) -{ - return false; -} bool storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key, StoreRebuildData &counts) STUB_RETVAL(false) bool storeRebuildParseEntry(MemBuf &, StoreEntry &, cache_key *, StoreRebuildData &, uint64_t) STUB_RETVAL(false) + +void storeRebuildComplete(StoreRebuildData *) +{ + --StoreController::store_dirs_rebuilding; +} + +bool +storeRebuildLoadEntry(int fd, int diskIndex, MemBuf &buf, StoreRebuildData &) +{ + if (fd < 0) + return false; + + assert(buf.hasSpace()); // caller must allocate + // this stub simulates reading an empty entry + memset(buf.space(), 0, buf.spaceSize()); + buf.appended(buf.spaceSize()); + return true; +} === modified file 'src/tests/testRock.cc' --- src/tests/testRock.cc 2013-11-18 17:03:55 +0000 +++ src/tests/testRock.cc 2013-12-27 18:37:26 +0000 @@ -167,7 +167,16 @@ */ /* nothing left to rebuild */ - CPPUNIT_ASSERT_EQUAL(1, StoreController::store_dirs_rebuilding); + CPPUNIT_ASSERT_EQUAL(0, StoreController::store_dirs_rebuilding); +} + +static const char * +storeId(const int i) +{ + static char buf[64]; + snprintf(buf, sizeof(buf), "dummy url %i", i); + buf[sizeof(buf) - 1] = '\0'; + return buf; } StoreEntry * @@ -175,11 +184,8 @@ { RequestFlags flags; flags.cachable = true; - char url[64]; - snprintf(url, sizeof(url), "dummy url %i", i); - url[sizeof(url) - 1] = '\0'; StoreEntry *const pe = - storeCreateEntry(url, "dummy log url", flags, Http::METHOD_GET); + storeCreateEntry(storeId(i), "dummy log url", flags, Http::METHOD_GET); HttpReply *const rep = const_cast(pe->getReply()); rep->setHeaders(Http::scOkay, "dummy test object", "x-squid-internal/test", 0, -1, squid_curtime + 100000); @@ -213,8 +219,7 @@ StoreEntry * testRock::getEntry(const int i) { - StoreEntry *const pe = createEntry(i); - return store->get(reinterpret_cast(pe->key)); + return storeGetPublic(storeId(i), Http::METHOD_GET); } void @@ -251,7 +256,7 @@ CPPUNIT_ASSERT_EQUAL(SWAPOUT_DONE, pe->swap_status); - pe->unlock(); + pe->unlock("testRock::testRockSwapOut priming"); } CPPUNIT_ASSERT_EQUAL((uint64_t)5, store->currentCount()); @@ -268,6 +273,8 @@ loop.run(); CPPUNIT_ASSERT_EQUAL(SWAPOUT_DONE, pe->swap_status); + + pe->unlock("testRock::testRockSwapOut e#4"); } // try to swap out entry to a used locked slot @@ -287,16 +294,29 @@ StockEventLoop loop; loop.run(); + + pe->unlock("testRock::testRockSwapOut e#5.1"); + pe2->unlock("testRock::testRockSwapOut e#5.2"); + + // pe2 has the same public key as pe so it marks old pe for release + // here, we add another entry #5 into the now-available slot + StoreEntry *const pe3 = addEntry(5); + CPPUNIT_ASSERT_EQUAL(SWAPOUT_WRITING, pe3->swap_status); + CPPUNIT_ASSERT_EQUAL(0, pe3->swap_dirn); + CPPUNIT_ASSERT(pe3->swap_filen >= 0); + loop.run(); + CPPUNIT_ASSERT_EQUAL(SWAPOUT_DONE, pe3->swap_status); + pe3->unlock("testRock::testRockSwapOut e#5.3"); } CPPUNIT_ASSERT_EQUAL((uint64_t)6, store->currentCount()); - // try to get and unlink entries + // try to get and release all entries for (int i = 0; i < 6; ++i) { StoreEntry *const pe = getEntry(i); CPPUNIT_ASSERT(pe != NULL); - pe->unlink(); + pe->release(); // destroys pe StoreEntry *const pe2 = getEntry(i); CPPUNIT_ASSERT_EQUAL(static_cast(NULL), pe2); === modified file 'src/tests/testStoreController.cc' --- src/tests/testStoreController.cc 2013-11-18 17:03:55 +0000 +++ src/tests/testStoreController.cc 2013-12-19 23:13:24 +0000 @@ -2,6 +2,7 @@ #include "squid.h" #include "Mem.h" +#include "MemObject.h" #include "SquidConfig.h" #include "SquidTime.h" #include "Store.h" @@ -23,7 +24,9 @@ void testStoreController::testStats() { - StoreEntry * logEntry = new StoreEntry("dummy_url", "dummy_log_url"); + StoreEntry *logEntry = new StoreEntry; + logEntry->makeMemObject(); + logEntry->mem_obj->setUris("dummy_storeId", NULL, HttpRequestMethod()); logEntry->store_status = STORE_PENDING; StorePointer aRoot (new StoreController); Store::Root(aRoot); @@ -63,7 +66,9 @@ testStoreController::testMaxSize() { commonInit(); - StoreEntry * logEntry = new StoreEntry("dummy_url", "dummy_log_url"); + StoreEntry *logEntry = new StoreEntry; + logEntry->makeMemObject(); + logEntry->mem_obj->setUris("dummy_storeId", NULL, HttpRequestMethod()); logEntry->store_status = STORE_PENDING; StorePointer aRoot (new StoreController); Store::Root(aRoot); @@ -99,13 +104,11 @@ CPPUNIT_ASSERT (e->swap_dirn != -1); e->swap_file_sz = 0; /* garh lower level */ - e->lock_count = 0; e->lastref = squid_curtime; e->timestamp = squid_curtime; e->expires = squid_curtime; e->lastmod = squid_curtime; e->refcount = 1; - EBIT_SET(e->flags, ENTRY_CACHABLE); EBIT_CLR(e->flags, RELEASE_REQUEST); EBIT_CLR(e->flags, KEY_PRIVATE); e->ping_status = PING_NONE; === modified file 'src/tests/testStoreEntryStream.cc' --- src/tests/testStoreEntryStream.cc 2013-10-25 00:13:46 +0000 +++ src/tests/testStoreEntryStream.cc 2013-12-19 23:13:24 +0000 @@ -32,7 +32,7 @@ CapturingStoreEntry * anEntry = new CapturingStoreEntry(); { - StoreEntryStream stream(anEntry); + StoreEntryStream stream(anEntry); // locks and unlocks/deletes anEntry CPPUNIT_ASSERT_EQUAL(1, anEntry->_buffer_calls); CPPUNIT_ASSERT_EQUAL(0, anEntry->_flush_calls); @@ -53,8 +53,5 @@ CPPUNIT_ASSERT_EQUAL(String("12345677.7 some text !."), anEntry->_appended_text); } - - delete anEntry; - Store::Root(NULL); } === modified file 'src/tests/testStoreHashIndex.cc' --- src/tests/testStoreHashIndex.cc 2013-11-18 17:03:55 +0000 +++ src/tests/testStoreHashIndex.cc 2013-12-19 23:13:24 +0000 @@ -2,6 +2,7 @@ #include "squid.h" #include "Mem.h" +#include "MemObject.h" #include "SquidConfig.h" #include "SquidTime.h" #include "Store.h" @@ -24,7 +25,9 @@ void testStoreHashIndex::testStats() { - StoreEntry * logEntry = new StoreEntry("dummy_url", "dummy_log_url"); + StoreEntry *logEntry = new StoreEntry; + logEntry->makeMemObject(); + logEntry->mem_obj->setUris("dummy_storeId", NULL, HttpRequestMethod()); logEntry->store_status = STORE_PENDING; StorePointer aRoot (new StoreHashIndex()); Store::Root(aRoot); @@ -44,7 +47,9 @@ void testStoreHashIndex::testMaxSize() { - StoreEntry * logEntry = new StoreEntry("dummy_url", "dummy_log_url"); + StoreEntry *logEntry = new StoreEntry; + logEntry->makeMemObject(); + logEntry->mem_obj->setUris("dummy_storeId", NULL, HttpRequestMethod()); logEntry->store_status = STORE_PENDING; StorePointer aRoot (new StoreHashIndex()); Store::Root(aRoot); @@ -80,13 +85,11 @@ CPPUNIT_ASSERT (e->swap_dirn != -1); e->swap_file_sz = 0; /* garh lower level */ - e->lock_count = 0; e->lastref = squid_curtime; e->timestamp = squid_curtime; e->expires = squid_curtime; e->lastmod = squid_curtime; e->refcount = 1; - EBIT_SET(e->flags, ENTRY_CACHABLE); EBIT_CLR(e->flags, RELEASE_REQUEST); EBIT_CLR(e->flags, KEY_PRIVATE); e->ping_status = PING_NONE; === modified file 'src/tests/testUfs.cc' --- src/tests/testUfs.cc 2013-11-18 17:03:55 +0000 +++ src/tests/testUfs.cc 2013-12-27 18:37:26 +0000 @@ -129,7 +129,7 @@ /* rebuild is a scheduled event */ StockEventLoop loop; - while (StoreController::store_dirs_rebuilding > 1) + while (StoreController::store_dirs_rebuilding) loop.runOnce(); /* cannot use loop.run(); as the loop will never idle: the store-dir @@ -137,7 +137,7 @@ */ /* nothing left to rebuild */ - CPPUNIT_ASSERT_EQUAL(1, StoreController::store_dirs_rebuilding); + CPPUNIT_ASSERT_EQUAL(0, StoreController::store_dirs_rebuilding); /* add an entry */ { @@ -165,7 +165,7 @@ pe->swapOut(); CPPUNIT_ASSERT_EQUAL(0, pe->swap_dirn); CPPUNIT_ASSERT_EQUAL(0, pe->swap_filen); - pe->unlock(); + pe->unlock("testUfs::testUfsSearch vary"); } storeDirWriteCleanLogs(0); === modified file 'src/urn.cc' --- src/urn.cc 2013-06-19 04:59:56 +0000 +++ src/urn.cc 2013-08-15 22:09:07 +0000 @@ -225,7 +225,7 @@ entry = e; request = r; - entry->lock(); + entry->lock("UrnState::start"); setUriResFromRequest(r); if (urlres_r == NULL) @@ -244,7 +244,7 @@ sc = storeClientListAdd(urlres_e, this); FwdState::fwdStart(Comm::ConnectionPointer(), urlres_e, urlres_r.getRaw()); } else { - urlres_e->lock(); + urlres_e->lock("UrnState::created"); sc = storeClientListAdd(urlres_e, this); } @@ -285,8 +285,8 @@ static void urnHandleReplyError(UrnState *urnState, StoreEntry *urlres_e) { - urlres_e->unlock(); - urnState->entry->unlock(); + urlres_e->unlock("urnHandleReplyError+res"); + urnState->entry->unlock("urnHandleReplyError+prime"); delete urnState; } === modified file 'src/whois.cc' --- src/whois.cc 2013-10-24 17:27:28 +0000 +++ src/whois.cc 2013-12-06 23:52:26 +0000 @@ -89,7 +89,7 @@ p->fwd = fwd; p->dataWritten = false; - p->entry->lock(); + p->entry->lock("whoisStart"); comm_add_close_handler(fwd->serverConnection()->fd, whoisClose, p); l = p->request->urlpath.size() + 3; @@ -185,8 +185,7 @@ entry->timestampsSet(); entry->flush(); - if (!EBIT_TEST(entry->flags, RELEASE_REQUEST)) - entry->setPublicKey(); + entry->makePublic(); fwd->complete(); debugs(75, 3, "whoisReadReply: Done: " << entry->url()); @@ -198,6 +197,6 @@ { WhoisState *p = (WhoisState *)params.data; debugs(75, 3, "whoisClose: FD " << params.fd); - p->entry->unlock(); + p->entry->unlock("whoisClose"); delete p; }