------------------------------------------------------------ revno: 14082 revision-id: squid3@treenet.co.nz-20160916115328-0fpefy6pbtfq40cd parent: squid3@treenet.co.nz-20160908185514-e3zlew29xywz6c0o author: Eduard Bagdasaryan , Alex Rousskov committer: Amos Jeffries branch nick: 3.5 timestamp: Fri 2016-09-16 23:53:28 +1200 message: Fix logged request size (%http::>st) and other size-related %codes. The %[http::]>st logformat code should log the actual number of [dechunked] bytes received from the client. However, for requests with Content-Length, Squid was logging the sum of the request header size and the Content-Length header field value instead. The logged value was wrong when the client sent fewer than Content-Length body bytes. Also: * Fixed %http::>h and %http::h logformat code, but since ICAP services deal with (and log) both HTTP requests and responses, that focus on the HTTP message kind actually complicates ICAP logging configuration and will eventually require introduction of new %http logformat codes that would be meaningless in access.log context. - In ICAP context, %http::>h should log to-be-adapted HTTP message headers embedded in an ICAP request (HTTP request headers in REQMOD; HTTP response headers in RESPMOD). Before this change, %http::>h logged constant/"FYI" HTTP request headers in RESPMOD. - Similarly, %http::" and "<" characters should indicate ICAP message kind (where the being-logged HTTP message is embedded), not HTTP message kind, even for %http codes. TODO: %http::>h code logs "-" in RESPMOD because AccessLogEntry does not store virgin HTTP response headers. * Correctly initialize ICAP ALE HTTP fields related to HTTP message sizes for icap_log, including %http::>st, %http::sh, and %http::>sh format codes. * Initialize HttpMsg::hdr_sz in HTTP header parsing code. Among other uses, this field is needed to initialize HTTP fields inside ICAP ALE. * Synced default icap_log format documentation with the current code. ------------------------------------------------------------ # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: squid3@treenet.co.nz-20160916115328-0fpefy6pbtfq40cd # target_branch: http://bzr.squid-cache.org/bzr/squid3/3.5 # testament_sha1: 45a920a9aa3c53cab1b2bfb8af66bcd558fa96d5 # timestamp: 2016-09-16 12:51:02 +0000 # source_branch: http://bzr.squid-cache.org/bzr/squid3/3.5 # base_revision_id: squid3@treenet.co.nz-20160908185514-\ # e3zlew29xywz6c0o # # Begin patch === modified file 'src/adaptation/icap/ModXact.cc' --- src/adaptation/icap/ModXact.cc 2016-01-01 00:14:27 +0000 +++ src/adaptation/icap/ModXact.cc 2016-09-16 11:53:28 +0000 @@ -1259,31 +1259,32 @@ void Adaptation::Icap::ModXact::finalizeLogInfo() { - HttpRequest * request_ = NULL; - HttpRequest * adapted_request_ = NULL; - HttpReply * reply_ = NULL; - request_ = (virgin.cause? virgin.cause: dynamic_cast(virgin.header)); + HttpRequest *adapted_request_ = NULL; + HttpReply *adapted_reply_ = NULL; + HttpRequest *virgin_request_ = (virgin.cause ? virgin.cause : dynamic_cast(virgin.header)); if (!(adapted_request_ = dynamic_cast(adapted.header))) { - adapted_request_ = request_; - reply_ = dynamic_cast(adapted.header); + // if the request was not adapted, use virgin request to simplify + // the code further below + adapted_request_ = virgin_request_; + adapted_reply_ = dynamic_cast(adapted.header); } - Adaptation::Icap::History::Pointer h = (request_ ? request_->icapHistory() : NULL); + Adaptation::Icap::History::Pointer h = (virgin_request_ ? virgin_request_->icapHistory() : NULL); Must(h != NULL); // ICAPXaction::maybeLog calls only if there is a log al.icp.opcode = ICP_INVALID; al.url = h->log_uri.termedBuf(); const Adaptation::Icap::ServiceRep &s = service(); al.icap.reqMethod = s.cfg().method; - al.cache.caddr = request_->client_addr; + al.cache.caddr = virgin_request_->client_addr; - al.request = request_; + al.request = virgin_request_; HTTPMSGLOCK(al.request); al.adapted_request = adapted_request_; HTTPMSGLOCK(al.adapted_request); - if (reply_) { - al.reply = reply_; + if (adapted_reply_) { + al.reply = adapted_reply_; HTTPMSGLOCK(al.reply); } else al.reply = NULL; @@ -1296,25 +1297,29 @@ al.cache.ssluser = h->ssluser.termedBuf(); #endif al.cache.code = h->logType; - // XXX: should use icap-specific counters instead ? - al.http.clientRequestSz.payloadData = h->req_sz; + + const HttpMsg *virgin_msg = dynamic_cast(virgin.header); + if (!virgin_msg) + virgin_msg = virgin_request_; + assert(virgin_msg != virgin.cause); + al.http.clientRequestSz.header = virgin_msg->hdr_sz; + al.http.clientRequestSz.payloadData = virgin_msg->body_pipe->producedSize(); // leave al.icap.bodyBytesRead negative if no body if (replyHttpHeaderSize >= 0 || replyHttpBodySize >= 0) { const int64_t zero = 0; // to make max() argument types the same - al.icap.bodyBytesRead = - max(zero, replyHttpHeaderSize) + max(zero, replyHttpBodySize); + const uint64_t headerSize = max(zero, replyHttpHeaderSize); + const uint64_t bodySize = max(zero, replyHttpBodySize); + al.icap.bodyBytesRead = headerSize + bodySize; + al.http.clientReplySz.header = headerSize; + al.http.clientReplySz.payloadData = bodySize; } - if (reply_) { - al.http.code = reply_->sline.status(); - al.http.content_type = reply_->content_type.termedBuf(); - if (replyHttpBodySize >= 0) { - // XXX: should use icap-specific counters instead ? - al.http.clientReplySz.payloadData = replyHttpBodySize; - al.http.clientReplySz.header = reply_->hdr_sz; + if (adapted_reply_) { + al.http.code = adapted_reply_->sline.status(); + al.http.content_type = adapted_reply_->content_type.termedBuf(); + if (replyHttpBodySize >= 0) al.cache.highOffset = replyHttpBodySize; - } //don't set al.cache.objectSize because it hasn't exist yet Packer p; @@ -1323,7 +1328,7 @@ mb.init(); packerToMemInit(&p, &mb); - reply_->header.packInto(&p); + adapted_reply_->header.packInto(&p); al.headers.reply = xstrdup(mb.buf); packerClean(&p); === modified file 'src/cf.data.pre' --- src/cf.data.pre 2016-05-06 06:36:08 +0000 +++ src/cf.data.pre 2016-09-16 11:53:28 +0000 @@ -4427,13 +4427,35 @@ ICAP transaction log lines will correspond to a single access log line. - ICAP log uses logformat codes that make sense for an ICAP - transaction. Header-related codes are applied to the HTTP header - embedded in an ICAP server response, with the following caveats: - For REQMOD, there is no HTTP response header unless the ICAP - server performed request satisfaction. For RESPMOD, the HTTP - request header is the header sent to the ICAP server. For - OPTIONS, there are no HTTP headers. + ICAP log supports many access.log logformat %codes. In ICAP context, + HTTP message-related %codes are applied to the HTTP message embedded + in an ICAP message. Logformat "%http::>..." codes are used for HTTP + messages embedded in ICAP requests while "%http::<..." codes are used + for HTTP messages embedded in ICAP responses. For example: + + http::>h To-be-adapted HTTP message headers sent by Squid to + the ICAP service. For REQMOD transactions, these are + HTTP request headers. For RESPMOD, these are HTTP + response headers, but Squid currently cannot log them + (i.e., %http::>h will expand to "-" for RESPMOD). + + http::st Bytes sent to the ICAP server (TCP payload - only; i.e., what Squid writes to the socket). - - icap::st The total size of the ICAP request sent to the ICAP + server (ICAP headers + ICAP body), including chunking + metadata (if any). + + icap::a %icap::to/%03icap::Hs %icap::A %icap::to/%03icap::Hs %icap::http.method = request->method; aLogEntry->http.version = request->http_ver; aLogEntry->hier = request->hier; - if (request->content_length > 0) // negative when no body or unknown length - aLogEntry->http.clientRequestSz.payloadData += request->content_length; // XXX: actually adaptedRequest payload size ?? aLogEntry->cache.extuser = request->extacl_user.termedBuf(); // Adapted request, if any, inherits and then collects all the stats, but @@ -593,6 +591,9 @@ al->cache.objectSize = loggingEntry()->contentLen(); // payload duplicate ?? with or without TE ? al->http.clientRequestSz.header = req_sz; + // the virgin request is saved to al->request + if (al->request && al->request->body_pipe != NULL) + al->http.clientRequestSz.payloadData = al->request->body_pipe->producedSize(); al->http.clientReplySz.header = out.headers_sz; // XXX: calculate without payload encoding or headers !! al->http.clientReplySz.payloadData = out.size - out.headers_sz; // pretend its all un-encoded data for now. @@ -2744,6 +2745,7 @@ request->my_addr = conn->clientConnection->local; request->myportname = conn->port->name; request->http_ver = http_ver; + request->hdr_sz = http->req_sz; // Link this HttpRequest to ConnStateData relatively early so the following complex handling can use it // TODO: this effectively obsoletes a lot of conn->FOO copying. That needs cleaning up later. === modified file 'src/format/Format.cc' --- src/format/Format.cc 2016-04-07 04:43:45 +0000 +++ src/format/Format.cc 2016-09-16 11:53:28 +0000 @@ -309,6 +309,38 @@ *p = '\0'; } +/// XXX: Misnamed. TODO: Split reply; +#if USE_ADAPTATION + // al->icap.reqMethod is methodNone in access.log context + if (!msg && al->icap.reqMethod == Adaptation::methodReqmod) + msg = al->adapted_request; +#endif + return msg; +} + +/// XXX: Misnamed. See actualReplyHeader(). +/// \return HttpRequest or HttpReply for %http::>h. +static const HttpMsg * +actualRequestHeader(const AccessLogEntry::Pointer &al) +{ +#if USE_ADAPTATION + // al->icap.reqMethod is methodNone in access.log context + if (al->icap.reqMethod == Adaptation::methodRespmod) { + // XXX: for now AccessLogEntry lacks virgin response headers + return NULL; + } +#endif + return al->request; +} + void Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logSequenceNumber) const { @@ -547,8 +579,8 @@ case LFT_REQUEST_HEADER: - if (al->request) - sb = al->request->header.getByName(fmt->data.header.header); + if (const HttpMsg *msg = actualRequestHeader(al)) + sb = msg->header.getByName(fmt->data.header.header); out = sb.termedBuf(); @@ -567,15 +599,15 @@ break; - case LFT_REPLY_HEADER: - if (al->reply) - sb = al->reply->header.getByName(fmt->data.header.header); + case LFT_REPLY_HEADER: { + if (const HttpMsg *msg = actualReplyHeader(al)) + sb = msg->header.getByName(fmt->data.header.header); out = sb.termedBuf(); quote = 1; - - break; + } + break; #if USE_ADAPTATION case LFT_ADAPTATION_SUM_XACT_TIMES: @@ -757,8 +789,8 @@ break; #endif case LFT_REQUEST_HEADER_ELEM: - if (al->request) - sb = al->request->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); + if (const HttpMsg *msg = actualRequestHeader(al)) + sb = msg->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); out = sb.termedBuf(); @@ -776,18 +808,27 @@ break; - case LFT_REPLY_HEADER_ELEM: - if (al->reply) - sb = al->reply->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); + case LFT_REPLY_HEADER_ELEM: { + if (const HttpMsg *msg = actualReplyHeader(al)) + sb = msg->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); out = sb.termedBuf(); quote = 1; - - break; + } + break; case LFT_REQUEST_ALL_HEADERS: - out = al->headers.request; +#if USE_ADAPTATION + if (al->icap.reqMethod == Adaptation::methodRespmod) { + // XXX: since AccessLogEntry::Headers lacks virgin response + // headers, do nothing for now + out = NULL; + } else +#endif + { + out = al->headers.request; + } quote = 1; @@ -802,6 +843,10 @@ case LFT_REPLY_ALL_HEADERS: out = al->headers.reply; +#if USE_ADAPTATION + if (!out && al->icap.reqMethod == Adaptation::methodReqmod) + out = al->headers.adapted_request; +#endif quote = 1;