------------------------------------------------------------ revno: 12369 revision-id: squid3@treenet.co.nz-20121020062943-wk6o41x6jbov20vx parent: squidadm@squid-cache.org-20121017025909-9agu2odpl0je9891 fixes bug(s): http://bugs.squid-cache.org/show_bug.cgi?id=3670 committer: Amos Jeffries branch nick: 3.3 timestamp: Sat 2012-10-20 00:29:43 -0600 message: HTTP/1.1: Cache-Control compliance upgrade trunk rev 11361 converted Cache-Control header from using a single mask bitmap (shared by request and response) to separate CC header objects in the request response. This conversion contained several regressions like the one bug 3670 reports. This patch: * documents HttpStateData::cacheableReply() clarifying the overall method action and what each individual check it doing. * resolves several visible regressions, including bug 3670. * extends the caching to handle the "no-cache" controls as per HTTP/1.1 (MAY store, but MUST revalidate before use). * extends the caching for several lesser known cases of "MAY store" exemptions handling authenticated transactions. * removes an old hack handling Pragme:no-cache (undefined in HTTP/1.1) One side effect of now caching transactions utilizing "no-cache" is that hacks around Pragma:no-cache are reduced to only having any effect when Cache-Control is absent. Reducing their performance cost. And now require --enable-http-violations is built in since HTTP/1.1 states that response Pragma are not defined and thus SHOULD be ignored. ------------------------------------------------------------ # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: squid3@treenet.co.nz-20121020062943-wk6o41x6jbov20vx # target_branch: http://bzr.squid-cache.org/bzr/squid3/3.3 # testament_sha1: 83fcefd78266ead61f6e6ec5022d69f475c88ae3 # timestamp: 2012-10-20 06:30:31 +0000 # source_branch: http://bzr.squid-cache.org/bzr/squid3/3.3 # base_revision_id: squidadm@squid-cache.org-20121017025909-\ # 9agu2odpl0je9891 # # Begin patch === modified file 'src/http.cc' --- src/http.cc 2012-10-13 06:06:27 +0000 +++ src/http.cc 2012-10-20 06:29:43 +0000 @@ -354,67 +354,103 @@ #define REFRESH_OVERRIDE(flag) 0 #endif - if (surrogateNoStore) - return 0; - - // RFC 2616: do not cache replies to responses with no-store CC directive - if (request && request->cache_control && - request->cache_control->noStore() && - !REFRESH_OVERRIDE(ignore_no_store)) - return 0; - - if (!ignoreCacheControl && request->cache_control != NULL) { - const HttpHdrCc* cc=request->cache_control; - if (cc->Private()) { - if (!REFRESH_OVERRIDE(ignore_private)) - return 0; - } - - if (cc->noCache()) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; - } - - if (cc->noStore()) { - if (!REFRESH_OVERRIDE(ignore_no_store)) - return 0; - } - } - - if (request->flags.auth || request->flags.authSent) { - /* - * Responses to requests with authorization may be cached - * only if a Cache-Control: public reply header is present. - * RFC 2068, sec 14.9.4 - */ - - if (!request->cache_control || !request->cache_control->Public()) { - if (!REFRESH_OVERRIDE(ignore_auth)) - return 0; - } - } - - /* Pragma: no-cache in _replies_ is not documented in HTTP, - * but servers like "Active Imaging Webcast/2.0" sure do use it */ - if (hdr->has(HDR_PRAGMA)) { - String s = hdr->getList(HDR_PRAGMA); - const int no_cache = strListIsMember(&s, "no-cache", ','); - s.clean(); - - if (no_cache) { - if (!REFRESH_OVERRIDE(ignore_no_cache)) - return 0; - } - } - - /* - * The "multipart/x-mixed-replace" content type is used for + // Check for Surrogate/1.0 protocol conditions + // NP: reverse-proxy traffic our parent server has instructed us never to cache + if (surrogateNoStore) { + debugs(22, 3, HERE << "NO because Surrogate-Control:no-store"); + return 0; + } + + // RFC 2616: HTTP/1.1 Cache-Control conditions + if (!ignoreCacheControl) { + // XXX: check to see if the request headers alone were enough to prevent caching earlier + // (ie no-store request header) no need to check those all again here if so. + // for now we are not reliably doing that so we waste CPU re-checking request CC + + // RFC 2616 section 14.9.2 - MUST NOT cache any response with request CC:no-store + if (request && request->cache_control && request->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) { + debugs(22, 3, HERE << "NO because client request Cache-Control:no-store"); + return 0; + } + + // NP: request CC:no-cache only means cache READ is forbidden. STORE is permitted. + // NP: request CC:private is undefined. We ignore. + // NP: other request CC flags are limiters on HIT/MISS. We don't care about here. + + // RFC 2616 section 14.9.2 - MUST NOT cache any response with CC:no-store + if (rep->cache_control && rep->cache_control->noStore() && + !REFRESH_OVERRIDE(ignore_no_store)) { + debugs(22, 3, HERE << "NO because server reply Cache-Control:no-store"); + return 0; + } + + // RFC 2616 section 14.9.1 - MUST NOT cache any response with CC:private in a shared cache like Squid. + // TODO: add a shared/private cache configuration possibility. + if (rep->cache_control && + rep->cache_control->Private() && + !REFRESH_OVERRIDE(ignore_private)) { + debugs(22, 3, HERE << "NO because server reply Cache-Control:private"); + return 0; + } + // NP: being conservative; CC:private overrides CC:public when both are present in a response. + + } + // RFC 2068, sec 14.9.4 - MUST NOT cache any response with Authentication UNLESS certain CC controls are present + // allow HTTP violations to IGNORE those controls (ie re-block caching Auth) + if (request && (request->flags.auth || request->flags.authSent) && !REFRESH_OVERRIDE(ignore_auth)) { + if (!rep->cache_control) { + debugs(22, 3, HERE << "NO because Authenticated and server reply missing Cache-Control"); + return 0; + } + + if (ignoreCacheControl) { + debugs(22, 3, HERE << "NO because Authenticated and ignoring Cache-Control"); + return 0; + } + + // HTTPbis pt7 section 4.1 clause 3: a response CC:public is present + bool mayStore = false; + if (rep->cache_control->Public()) { + debugs(22, 3, HERE << "Authenticated but server reply Cache-Control:public"); + mayStore = true; + + // HTTPbis pt7 section 4.1 clause 2: a response CC:must-revalidate is present + } else if (rep->cache_control->mustRevalidate() && !REFRESH_OVERRIDE(ignore_must_revalidate)) { + debugs(22, 3, HERE << "Authenticated but server reply Cache-Control:public"); + mayStore = true; + +#if 0 // waiting on HTTPbis WG agreement before we do this + // NP: given the must-revalidate exception we should also be able to exempt no-cache. + } else if (rep->cache_control->noCache() && !REFRESH_OVERRIDE(ignore_no_cache)) { + debugs(22, 3, HERE << "Authenticated but server reply Cache-Control:no-cache"); + mayStore = true; +#endif + + // HTTPbis pt7 section 4.1 clause 1: a response CC:s-maxage is present + } else if (rep->cache_control->sMaxAge()) { + debugs(22, 3, HERE << " Authenticated but server reply Cache-Control:s-maxage"); + mayStore = true; + } + + if (!mayStore) { + debugs(22, 3, HERE << "NO because Authenticated transaction"); + return 0; + } + + // NP: response CC:no-cache is equivalent to CC:must-revalidate,max-age=0. We MAY cache, and do so. + // NP: other request CC flags are limiters on HIT/MISS/REFRESH. We don't care about here. + } + + /* HACK: The "multipart/x-mixed-replace" content type is used for * continuous push replies. These are generally dynamic and * probably should not be cachable */ if ((v = hdr->getStr(HDR_CONTENT_TYPE))) - if (!strncasecmp(v, "multipart/x-mixed-replace", 25)) + if (!strncasecmp(v, "multipart/x-mixed-replace", 25)) { + debugs(22, 3, HERE << "NO because Content-Type:multipart/x-mixed-replace"); return 0; + } switch (rep->sline.status) { /* Responses that are cacheable */ @@ -435,11 +471,12 @@ */ if (!refreshIsCachable(entry) && !REFRESH_OVERRIDE(store_stale)) { - debugs(22, 3, "refreshIsCachable() returned non-cacheable.."); + debugs(22, 3, "NO because refreshIsCachable() returned non-cacheable.."); return 0; - } else + } else { + debugs(22, 3, HERE << "YES because HTTP status " << rep->sline.status); return 1; - + } /* NOTREACHED */ break; @@ -447,11 +484,17 @@ case HTTP_MOVED_TEMPORARILY: case HTTP_TEMPORARY_REDIRECT: - if (rep->expires > rep->date && rep->date > 0) + if (rep->date <= 0) { + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status << " and Date missing/invalid"); + return 0; + } + if (rep->expires > rep->date) { + debugs(22, 3, HERE << "YES because HTTP status " << rep->sline.status << " and Expires > Date"); return 1; - else + } else { + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status << " and Expires <= Date"); return 0; - + } /* NOTREACHED */ break; @@ -480,6 +523,7 @@ case HTTP_SERVICE_UNAVAILABLE: case HTTP_GATEWAY_TIMEOUT: + debugs(22, 3, HERE << "MAYBE because HTTP status " << rep->sline.status); return -1; /* NOTREACHED */ @@ -516,12 +560,12 @@ case HTTP_REQUESTED_RANGE_NOT_SATISFIABLE: case HTTP_EXPECTATION_FAILED: + debugs(22, 3, HERE << "NO because HTTP status " << rep->sline.status); return 0; default: /* RFC 2616 section 6.1.1: an unrecognized response MUST NOT be cached. */ - debugs (11, 3, HERE << "Unknown HTTP status code " << rep->sline.status << ". Not cacheable."); - + debugs (11, 3, HERE << "NO because unknown HTTP status code " << rep->sline.status); return 0; /* NOTREACHED */ @@ -927,12 +971,25 @@ no_cache: - if (!ignoreCacheControl && rep->cache_control) { - if (rep->cache_control->proxyRevalidate() || - rep->cache_control->mustRevalidate() || - rep->cache_control->hasSMaxAge() - ) - EBIT_SET(entry->flags, ENTRY_REVALIDATE); + if (!ignoreCacheControl) { + if (rep->cache_control) { + if (rep->cache_control->proxyRevalidate() || + rep->cache_control->mustRevalidate() || + rep->cache_control->noCache() || + rep->cache_control->hasSMaxAge()) + EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } +#if USE_HTTP_VIOLATIONS // response header Pragma::no-cache is undefined in HTTP + else { + // Expensive calculation. So only do it IF the CC: header is not present. + + /* HACK: Pragma: no-cache in _replies_ is not documented in HTTP, + * but servers like "Active Imaging Webcast/2.0" sure do use it */ + if (rep->header.has(HDR_PRAGMA) && + rep->header.hasListMember(HDR_PRAGMA,"no-cache",',')) + EBIT_SET(entry->flags, ENTRY_REVALIDATE); + } +#endif } #if HEADERS_LOG === modified file 'src/http.h' --- src/http.h 2012-09-22 20:07:31 +0000 +++ src/http.h 2012-10-20 06:29:43 +0000 @@ -60,6 +60,8 @@ void processReplyBody(); void readReply(const CommIoCbParams &io); virtual void maybeReadVirginBody(); // read response data from the network + + // Determine whether the response is a cacheable representation int cacheableReply(); CachePeer *_peer; /* CachePeer request made to */