------------------------------------------------------------
revno: 13190
revision-id: rousskov@measurement-factory.com-20131218171900-dz28iirj5y215tys
parent: kinkie@squid-cache.org-20131218170315-ajv8rc9icsddy88x
committer: Alex Rousskov <rousskov@measurement-factory.com>
branch nick: trunk
timestamp: Wed 2013-12-18 10:19:00 -0700
message:
  Added send_hit and store_miss squid.conf directives
  to control caching of responses using response info.
  
  The existing "cache" directive is checked before Squid has access to the
  response and, hence, could not use response-based ACLs such as http_status.
  Response-based ACLs may be essential when fine-tuning caching. Squid Bug 3937
  (StoreID can lead to 302 infinite loop) is a good use case.
  
  Updated old "cache" directive documentation to provide more information, to
  help folks distinguish the three related directives, and to polish for
  clarity.
  
  TODO: Support lookup_hit and possibly deprecate/remove "cache".
------------------------------------------------------------
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: rousskov@measurement-factory.com-20131218171900-\
#   dz28iirj5y215tys
# target_branch: http://bzr.squid-cache.org/bzr/squid3/trunk/
# testament_sha1: 7c641c99e871a0c1ac71f2ea5ae12c03e3f6f59e
# timestamp: 2013-12-18 17:54:24 +0000
# source_branch: http://bzr.squid-cache.org/bzr/squid3/trunk/
# base_revision_id: kinkie@squid-cache.org-20131218170315-\
#   ajv8rc9icsddy88x
# 
# Begin patch
=== modified file 'src/Server.cc'
--- src/Server.cc	2013-12-05 11:04:45 +0000
+++ src/Server.cc	2013-12-18 17:19:00 +0000
@@ -31,6 +31,7 @@
  */
 
 #include "squid.h"
+#include "acl/FilledChecklist.h"
 #include "acl/Gadgets.h"
 #include "base/TextException.h"
 #include "comm/Connection.h"
@@ -174,6 +175,8 @@
     // give entry the reply because haveParsedReplyHeaders() expects it there
     entry->replaceHttpReply(theFinalReply, false); // but do not write yet
     haveParsedReplyHeaders(); // update the entry/reply (e.g., set timestamps)
+    if (EBIT_TEST(entry->flags, ENTRY_CACHABLE) && blockCaching())
+        entry->release();
     entry->startWriting(); // write the updated entry to store
 
     return theFinalReply;
@@ -533,6 +536,24 @@
     currentOffset = partial ? theFinalReply->content_range->spec.offset : 0;
 }
 
+/// whether to prevent caching of an otherwise cachable response
+bool
+ServerStateData::blockCaching()
+{
+    if (const Acl::Tree *acl = Config.accessList.storeMiss) {
+        // This relatively expensive check is not in StoreEntry::checkCachable:
+        // That method lacks HttpRequest and may be called too many times.
+        ACLFilledChecklist ch(acl, originalRequest(), NULL);
+        ch.reply = const_cast<HttpReply*>(entry->getReply()); // ACLFilledChecklist API bug
+        HTTPMSGLOCK(ch.reply);
+        if (ch.fastCheck() != ACCESS_ALLOWED) { // when in doubt, block
+            debugs(20, 3, "store_miss prohibits caching");
+            return true;
+        }
+    }
+    return false;
+}
+
 HttpRequest *
 ServerStateData::originalRequest()
 {

=== modified file 'src/Server.h'
--- src/Server.h	2013-10-25 00:13:46 +0000
+++ src/Server.h	2013-12-18 17:19:00 +0000
@@ -131,6 +131,8 @@
     /// Entry-dependent callbacks use this check to quit if the entry went bad
     bool abortOnBadEntry(const char *abortReason);
 
+    bool blockCaching();
+
 #if USE_ADAPTATION
     void startAdaptation(const Adaptation::ServiceGroupPointer &group, HttpRequest *cause);
     void adaptVirginReplyBody(const char *buf, ssize_t len);

=== modified file 'src/SquidConfig.h'
--- src/SquidConfig.h	2013-10-13 17:55:11 +0000
+++ src/SquidConfig.h	2013-12-18 17:19:00 +0000
@@ -364,6 +364,8 @@
         acl_access *AlwaysDirect;
         acl_access *ASlists;
         acl_access *noCache;
+        acl_access *sendHit;
+        acl_access *storeMiss;
         acl_access *stats_collection;
 #if SQUID_SNMP
 

=== modified file 'src/cf.data.pre'
--- src/cf.data.pre	2013-12-11 21:50:06 +0000
+++ src/cf.data.pre	2013-12-18 17:19:00 +0000
@@ -4851,18 +4851,97 @@
 NAME: cache no_cache
 TYPE: acl_access
 DEFAULT: none
-DEFAULT_DOC: Allow caching, unless rules exist in squid.conf.
+DEFAULT_DOC: By default, this directive is unused and has no effect.
 LOC: Config.accessList.noCache
 DOC_START
-	A list of ACL elements which, if matched and denied, cause the request to
-	not be satisfied from the cache and the reply to not be cached.
-	In other words, use this to force certain objects to never be cached.
-
-	You must use the words 'allow' or 'deny' to indicate whether items
-	matching the ACL should be allowed or denied into the cache.
+	Requests denied by this directive will not be served from the cache
+	and their responses will not be stored in the cache. This directive
+	has no effect on other transactions and on already cached responses.
 
 	This clause supports both fast and slow acl types.
 	See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details.
+
+	This and the two other similar caching directives listed below are
+	checked at different transaction processing stages, have different
+	access to response information, affect different cache operations,
+	and differ in slow ACLs support:
+
+	* cache: Checked before Squid makes a hit/miss determination.
+		No access to reply information!
+		Denies both serving a hit and storing a miss.
+		Supports both fast and slow ACLs.
+	* send_hit: Checked after a hit was detected.
+		Has access to reply (hit) information.
+		Denies serving a hit only.
+		Supports fast ACLs only.
+	* store_miss: Checked before storing a cachable miss.
+		Has access to reply (miss) information.
+		Denies storing a miss only.
+		Supports fast ACLs only.
+
+	If you are not sure which of the three directives to use, apply the
+	following decision logic:
+
+	* If your ACL(s) are of slow type _and_ need response info, redesign.
+	  Squid does not support that particular combination at this time.
+        Otherwise:
+	* If your directive ACL(s) are of slow type, use "cache"; and/or
+	* if your directive ACL(s) need no response info, use "cache".
+        Otherwise:
+	* If you do not want the response cached, use store_miss; and/or
+	* if you do not want a hit on a cached response, use send_hit.
+DOC_END
+
+NAME: send_hit
+TYPE: acl_access
+DEFAULT: none
+DEFAULT_DOC: By default, this directive is unused and has no effect.
+LOC: Config.accessList.sendHit
+DOC_START
+	Responses denied by this directive will not be served from the cache
+	(but may still be cached, see store_miss). This directive has no
+	effect on the responses it allows and on the cached objects.
+
+	Please see the "cache" directive for a summary of differences among
+	store_miss, send_hit, and cache directives.
+
+	Unlike the "cache" directive, send_hit only supports fast acl
+	types.  See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details.
+
+	For example:
+
+		# apply custom Store ID mapping to some URLs
+		acl MapMe dstdomain .c.example.com
+		store_id_program ...
+		store_id_access allow MapMe
+
+		# but prevent caching of special responses
+		# such as 302 redirects that cause StoreID loops
+		acl Ordinary http_status 200-299
+		store_miss deny MapMe !Ordinary
+
+		# and do not serve any previously stored special responses
+		# from the cache (in case they were already cached before
+		# the above store_miss rule was in effect).
+		send_hit deny MapMe !Ordinary
+DOC_END
+
+NAME: store_miss
+TYPE: acl_access
+DEFAULT: none
+DEFAULT_DOC: By default, this directive is unused and has no effect.
+LOC: Config.accessList.storeMiss
+DOC_START
+	Responses denied by this directive will not be cached (but may still
+	be served from the cache, see send_hit). This directive has no
+	effect on the responses it allows and on the already cached responses.
+
+	Please see the "cache" directive for a summary of differences among
+	store_miss, send_hit, and cache directives. See the
+	send_hit directive for a usage example.
+
+	Unlike the "cache" directive, store_miss only supports fast acl
+	types.  See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details.
 DOC_END
 
 NAME: max_stale

=== modified file 'src/client_side_reply.cc'
--- src/client_side_reply.cc	2013-12-11 22:44:59 +0000
+++ src/client_side_reply.cc	2013-12-18 17:19:00 +0000
@@ -536,6 +536,11 @@
        ) {
         http->logType = LOG_TCP_NEGATIVE_HIT;
         sendMoreData(result);
+    } else if (blockedHit()) {
+        debugs(88, 5, "send_hit forces a MISS");
+        http->logType = LOG_TCP_MISS;
+        processMiss();
+        return;
     } else if (!http->flags.internal && refreshCheckHTTP(e, r)) {
         debugs(88, 5, "clientCacheHit: in refreshCheck() block");
         /*
@@ -764,6 +769,30 @@
     }
 }
 
+/// whether squid.conf send_hit prevents us from serving this hit
+bool
+clientReplyContext::blockedHit() const
+{
+    if (!Config.accessList.sendHit)
+        return false; // hits are not blocked by default
+
+    if (http->flags.internal)
+        return false; // internal content "hits" cannot be blocked
+
+    if (const HttpReply *rep = http->storeEntry()->getReply()) {
+        std::auto_ptr<ACLFilledChecklist> chl(clientAclChecklistCreate(Config.accessList.sendHit, http));
+        chl->reply = const_cast<HttpReply*>(rep); // ACLChecklist API bug
+        HTTPMSGLOCK(chl->reply);
+        return chl->fastCheck() != ACCESS_ALLOWED; // when in doubt, block
+    }
+
+    // This does not happen, I hope, because we are called from CacheHit, which
+    // is called via a storeClientCopy() callback, and store should initialize
+    // the reply before calling that callback.
+    debugs(88, 3, "Missing reply!");
+    return false;
+}
+
 void
 clientReplyContext::purgeRequestFindObjectToPurge()
 {

=== modified file 'src/client_side_reply.h'
--- src/client_side_reply.h	2013-06-27 15:58:46 +0000
+++ src/client_side_reply.h	2013-12-18 17:19:00 +0000
@@ -140,6 +140,7 @@
     void triggerInitialStoreRead();
     void sendClientOldEntry();
     void purgeAllCached();
+    bool blockedHit() const;
 
     void sendBodyTooLargeError();
     void sendPreconditionFailedError();