------------------------------------------------------------ revno: 14087 revision-id: squid3@treenet.co.nz-20160923124044-i1road76n1108syf parent: squid3@treenet.co.nz-20160923112248-1qvteivc0t3hgxnw fixes bug: http://bugs.squid-cache.org/show_bug.cgi?id=3819 author: Alex Rousskov committer: Amos Jeffries branch nick: 3.5 timestamp: Sat 2016-09-24 00:40:44 +1200 message: Bug 3819: "fd >= 0" assertion in file_write() during reconfiguration Other possible assertions include "NumberOfUFSDirs" in UFSSwapDir and "fd >= 0" in file_close(). And Fs::Ufs::UFSStoreState::drainWriteQueue() may segfault while dereferencing nil theFile, although I am not sure that bug is caused specifically by reconfiguration. Since trunk r9181.3.1, reconfiguration is done in at least two steps: First, mainReconfigureStart() closes/cleans/disables all modules supporting hot reconfiguration and then, at the end of the event loop iteration, mainReconfigureFinish() opens/configures/enables them again. UFS code hits assertions if it has to log entries or rebuild swap.state between those two steps. The tiny assertion opportunity window hides these bugs from most proxies that are not doing enough disk I/O or are not being reconfigured frequently enough. Asynchronous UFS cache_dirs such as diskd were the most exposed, but even blocking UFS caching code could probably hit [rebuild] assertions. The swap.state rebuilding (always initiated at startup) probably did not work as intended if reconfiguration happened during the rebuild time because reconfiguration closed the swap.state file being rebuilt. We now protect that swap.state file and delay rebuilding progress until reconfiguration is over. TODO: To squash more similar bugs, we need to move hot reconfiguration support into the modules so that each module can reconfigure instantly. ------------------------------------------------------------ # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: squid3@treenet.co.nz-20160923124044-i1road76n1108syf # target_branch: http://bzr.squid-cache.org/bzr/squid3/3.5 # testament_sha1: 78eb315dda916767cc6addf75f78289161a2226c # timestamp: 2016-09-23 12:51:02 +0000 # source_branch: http://bzr.squid-cache.org/bzr/squid3/3.5 # base_revision_id: squid3@treenet.co.nz-20160923112248-\ # 1qvteivc0t3hgxnw # # Begin patch === modified file 'src/fs/ufs/RebuildState.cc' --- src/fs/ufs/RebuildState.cc 2016-01-01 00:14:27 +0000 +++ src/fs/ufs/RebuildState.cc 2016-09-23 12:40:44 +0000 @@ -74,9 +74,11 @@ Fs::Ufs::RebuildState::RebuildStep(void *data) { RebuildState *rb = (RebuildState *)data; - rb->rebuildStep(); + if (!reconfiguring) + rb->rebuildStep(); - if (!rb->isDone()) + // delay storeRebuildComplete() when reconfiguring to protect storeCleanup() + if (!rb->isDone() || reconfiguring) eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1); else { -- StoreController::store_dirs_rebuilding; === modified file 'src/fs/ufs/UFSStoreState.cc' --- src/fs/ufs/UFSStoreState.cc 2016-01-01 00:14:27 +0000 +++ src/fs/ufs/UFSStoreState.cc 2016-09-23 12:40:44 +0000 @@ -421,7 +421,7 @@ if (flags.write_draining) return; - if (!theFile->canWrite()) + if (!theFile || !theFile->canWrite()) return; flags.write_draining = true; === modified file 'src/fs/ufs/UFSSwapDir.cc' --- src/fs/ufs/UFSSwapDir.cc 2016-01-01 00:14:27 +0000 +++ src/fs/ufs/UFSSwapDir.cc 2016-09-23 12:40:44 +0000 @@ -316,7 +316,8 @@ currentIOOptions(new ConfigOptionVector()), ioType(xstrdup(anIOType)), cur_size(0), - n_disk_objects(0) + n_disk_objects(0), + rebuilding_(false) { /* modulename is only set to disk modules that are built, by configure, * so the Find call should never return NULL here. @@ -723,6 +724,15 @@ void Fs::Ufs::UFSSwapDir::openLog() { + assert(NumberOfUFSDirs || !UFSDirToGlobalDirMapping); + ++NumberOfUFSDirs; + assert(NumberOfUFSDirs <= Config.cacheSwap.n_configured); + + if (rebuilding_) { // we did not close the temporary log used for rebuilding + assert(swaplog_fd >= 0); + return; + } + char *logPath; logPath = logFile(); swaplog_fd = file_open(logPath, O_WRONLY | O_CREAT | O_BINARY); @@ -733,13 +743,6 @@ } debugs(50, 3, HERE << "Cache Dir #" << index << " log opened on FD " << swaplog_fd); - - if (0 == NumberOfUFSDirs) - assert(NULL == UFSDirToGlobalDirMapping); - - ++NumberOfUFSDirs; - - assert(NumberOfUFSDirs <= Config.cacheSwap.n_configured); } void @@ -748,18 +751,19 @@ if (swaplog_fd < 0) /* not open */ return; - file_close(swaplog_fd); - - debugs(47, 3, "Cache Dir #" << index << " log closed on FD " << swaplog_fd); - - swaplog_fd = -1; - --NumberOfUFSDirs; - assert(NumberOfUFSDirs >= 0); - - if (0 == NumberOfUFSDirs) + if (!NumberOfUFSDirs) safe_free(UFSDirToGlobalDirMapping); + + if (rebuilding_) // we cannot close the temporary log used for rebuilding + return; + + file_close(swaplog_fd); + + debugs(47, 3, "Cache Dir #" << index << " log closed on FD " << swaplog_fd); + + swaplog_fd = -1; } bool @@ -839,6 +843,9 @@ void Fs::Ufs::UFSSwapDir::closeTmpSwapLog() { + assert(rebuilding_); + rebuilding_ = false; + char *swaplog_path = xstrdup(logFile(NULL)); // where the swaplog should be char *tmp_path = xstrdup(logFile(".new")); // the temporary file we have generated int fd; @@ -864,6 +871,8 @@ FILE * Fs::Ufs::UFSSwapDir::openTmpSwapLog(int *clean_flag, int *zero_flag) { + assert(!rebuilding_); + char *swaplog_path = xstrdup(logFile(NULL)); char *clean_path = xstrdup(logFile(".last-clean")); char *new_path = xstrdup(logFile(".new")); @@ -897,6 +906,7 @@ } swaplog_fd = fd; + rebuilding_ = true; { const StoreSwapLogHeader header; @@ -1053,18 +1063,17 @@ cleanLog = NULL; } -void -Fs::Ufs::UFSSwapDir::CleanEvent(void *unused) +/// safely cleans a few unused files if possible +int +Fs::Ufs::UFSSwapDir::HandleCleanEvent() { static int swap_index = 0; int i; int j = 0; int n = 0; - /* - * Assert that there are UFS cache_dirs configured, otherwise - * we should never be called. - */ - assert(NumberOfUFSDirs); + + if (!NumberOfUFSDirs) + return 0; // probably in the middle of reconfiguration if (NULL == UFSDirToGlobalDirMapping) { SwapDir *sd; @@ -1106,6 +1115,13 @@ ++swap_index; } + return n; +} + +void +Fs::Ufs::UFSSwapDir::CleanEvent(void *) +{ + const int n = HandleCleanEvent(); eventAdd("storeDirClean", CleanEvent, NULL, 15.0 * exp(-0.25 * n), 1); } @@ -1283,6 +1299,11 @@ void Fs::Ufs::UFSSwapDir::logEntry(const StoreEntry & e, int op) const { + if (swaplog_fd < 0) { + debugs(36, 5, "cannot log " << e << " in the middle of reconfiguration"); + return; + } + StoreSwapLogData *s = new StoreSwapLogData; s->op = (char) op; s->swap_filen = e.swap_filen; === modified file 'src/fs/ufs/UFSSwapDir.h' --- src/fs/ufs/UFSSwapDir.h 2016-01-01 00:14:27 +0000 +++ src/fs/ufs/UFSSwapDir.h 2016-09-23 12:40:44 +0000 @@ -145,6 +145,7 @@ bool pathIsDirectory(const char *path)const; int swaplog_fd; static EVH CleanEvent; + static int HandleCleanEvent(); /** Verify that the the CacheDir exists * * If this returns < 0, then Squid exits, complains about swap @@ -164,6 +165,7 @@ char const *ioType; uint64_t cur_size; ///< currently used space in the storage area uint64_t n_disk_objects; ///< total number of objects stored + bool rebuilding_; ///< whether RebuildState is writing the new swap.state }; } //namespace Ufs