------------------------------------------------------------ revno: 13189 [merge] revision-id: kinkie@squid-cache.org-20131218170315-ajv8rc9icsddy88x parent: squid3@treenet.co.nz-20131218030001-s4662gew223waehe parent: kinkie@squid-cache.org-20131218152206-u4qnyftoew03jei7 committer: Francesco Chemolli branch nick: trunk timestamp: Wed 2013-12-18 18:03:15 +0100 message: Merge CharacterSet ------------------------------------------------------------ Use --include-merges or -n0 to see merged revisions. ------------------------------------------------------------ # Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: kinkie@squid-cache.org-20131218170315-ajv8rc9icsddy88x # target_branch: http://bzr.squid-cache.org/bzr/squid3/trunk/ # testament_sha1: 7592cf9c293f5483b9683c76291307df29f2af50 # timestamp: 2013-12-18 17:54:21 +0000 # source_branch: http://bzr.squid-cache.org/bzr/squid3/trunk/ # base_revision_id: squid3@treenet.co.nz-20131218030001-\ # s4662gew223waehe # # Begin patch === modified file 'src/Makefile.am' --- src/Makefile.am 2013-12-12 09:41:39 +0000 +++ src/Makefile.am 2013-12-17 17:05:17 +0000 @@ -15,6 +15,7 @@ DnsLookupDetails.cc SBUF_SOURCE= \ + base/CharacterSet.h \ base/InstanceId.h \ MemBlob.h \ MemBlob.cc \ === modified file 'src/SBuf.cc' --- src/SBuf.cc 2013-11-27 15:52:03 +0000 +++ src/SBuf.cc 2013-12-17 10:19:44 +0000 @@ -27,6 +27,7 @@ */ #include "squid.h" +#include "base/CharacterSet.h" #include "base/RefCount.h" #include "Debug.h" #include "OutOfBoundsException.h" @@ -688,28 +689,45 @@ } SBuf::size_type -SBuf::find_first_of(const SBuf &set, size_type startPos) const -{ - // if set is 1 char big, use the char search. Stats updated there - if (set.length() == 1) - return find(set[0], startPos); - - ++stats.find; - - if (startPos == npos) - return npos; - - if (startPos >= length()) - return npos; - - if (set.length() == 0) - return npos; - - debugs(24, 7, "any of '" << set << "' " << " in id " << id); - char *cur = buf()+startPos, *end = bufEnd(); - while (cur < end) { - if (memchr(set.buf(), *cur, set.length())) - return (cur-buf()); +SBuf::find_first_of(const CharacterSet &set, size_type startPos) const +{ + ++stats.find; + + if (startPos == npos) + return npos; + + if (startPos >= length()) + return npos; + + debugs(24, 7, "first of characterset " << set.name << " in id " << id); + char *cur = buf()+startPos; + const char *end = bufEnd(); + while (cur < end) { + if (set[*cur]) + return cur-buf(); + ++cur; + } + debugs(24, 7, "not found"); + return npos; +} + +SBuf::size_type +SBuf::find_first_not_of(const CharacterSet &set, size_type startPos) const +{ + ++stats.find; + + if (startPos == npos) + return npos; + + if (startPos >= length()) + return npos; + + debugs(24, 7, "first not of characterset " << set.name << " in id " << id); + char *cur = buf()+startPos; + const char *end = bufEnd(); + while (cur < end) { + if (!set[*cur]) + return cur-buf(); ++cur; } debugs(24, 7, "not found"); === modified file 'src/SBuf.h' --- src/SBuf.h 2013-12-04 18:37:08 +0000 +++ src/SBuf.h 2013-12-17 10:19:44 +0000 @@ -101,6 +101,8 @@ SBufStats& operator +=(const SBufStats&); }; +class CharacterSet; + /** * A String or Buffer. * Features: refcounted backing store, cheap copy and sub-stringing @@ -512,8 +514,20 @@ * \return npos if no character in the set could be found * \param startPos if specified, ignore any occurrences before that position * if npos, then npos is always returned - */ - size_type find_first_of(const SBuf &set, size_type startPos = 0) const; + * + * TODO: rename to camelCase + */ + size_type find_first_of(const CharacterSet &set, size_type startPos = 0) const; + + /** Find first occurrence character NOT in character set + * + * \return npos if all characters in the SBuf are from set + * \param startPos if specified, ignore any occurrences before that position + * if npos, then npos is always returned + * + * TODO: rename to camelCase + */ + size_type find_first_not_of(const CharacterSet &set, size_type startPos = 0) const; /** sscanf-alike * === added file 'src/base/CharacterSet.cc' --- src/base/CharacterSet.cc 1970-01-01 00:00:00 +0000 +++ src/base/CharacterSet.cc 2013-12-18 15:22:06 +0000 @@ -0,0 +1,32 @@ +#include "squid.h" +#include "CharacterSet.h" + +const CharacterSet & +CharacterSet::operator +=(const CharacterSet &src) +{ + Storage::const_iterator s = src.chars_.begin(); + const Storage::const_iterator e = src.chars_.end(); + Storage::iterator d = chars_.begin(); + while (s != e) { + if (*s) + *d = 1; + ++s; + ++d; + } + return *this; +} + +CharacterSet & +CharacterSet::add(const unsigned char c) +{ + chars_[static_cast(c)] = 1; + return *this; +} + +CharacterSet::CharacterSet(const char *label, const char * const c) +: name(label == NULL ? "anonymous" : label), chars_(Storage(256,0)) +{ + const size_t clen = strlen(c); + for (size_t i = 0; i < clen; ++i) + add(c[i]); +} === added file 'src/base/CharacterSet.h' --- src/base/CharacterSet.h 1970-01-01 00:00:00 +0000 +++ src/base/CharacterSet.h 2013-12-18 15:22:06 +0000 @@ -0,0 +1,38 @@ +#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H +#define _SQUID_SRC_PARSER_CHARACTERSET_H + +#include + +/// optimized set of C chars, with quick membership test and merge support +class CharacterSet +{ +public: + typedef std::vector Storage; + + /// define a character set with the given label ("anonymous" if NULL) + /// with specified initial contents + CharacterSet(const char *label, const char * const initial); + + /// whether a given character exists in the set + bool operator[](unsigned char c) const {return chars_[static_cast(c)] != 0;} + + /// add a given character to the character set + CharacterSet & add(const unsigned char c); + + /// add all characters from the given CharacterSet to this one + const CharacterSet &operator +=(const CharacterSet &src); + + /// optional set label for debugging (default: "anonymous") + const char * name; + +private: + /** index of characters in this set + * + * \note guaranteed to be always 256 slots big, as forced in the + * constructor. This assumption is relied upon in operator[], add, + * operator+= + */ + Storage chars_; +}; + +#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */ === modified file 'src/base/Makefile.am' --- src/base/Makefile.am 2013-11-26 09:43:29 +0000 +++ src/base/Makefile.am 2013-12-17 10:19:44 +0000 @@ -12,6 +12,8 @@ AsyncJobCalls.h \ AsyncCallQueue.cc \ AsyncCallQueue.h \ + CharacterSet.h \ + CharacterSet.cc \ TidyPointer.h \ CbcPointer.h \ InstanceId.h \ === modified file 'src/icmp/Makefile.am' --- src/icmp/Makefile.am 2013-12-03 07:49:13 +0000 +++ src/icmp/Makefile.am 2013-12-15 11:47:07 +0000 @@ -23,6 +23,7 @@ noinst_LTLIBRARIES = libicmp-core.la libicmp.la SBUF_SOURCE= \ + $(top_srcdir)/src/base/CharacterSet.h \ $(top_srcdir)/src/SBuf.h \ $(top_srcdir)/src/SBuf.cc \ $(top_srcdir)/src/MemBlob.h \ === modified file 'src/tests/SBufFindTest.cc' --- src/tests/SBufFindTest.cc 2013-11-11 12:06:11 +0000 +++ src/tests/SBufFindTest.cc 2013-12-15 12:48:01 +0000 @@ -1,4 +1,5 @@ #include "squid.h" +#include "base/CharacterSet.h" #include "SBufFindTest.h" #include #include @@ -105,7 +106,7 @@ { theFindString = theStringHay.find_first_of(theStringNeedle, thePos); theBareNeedlePos = theStringHay.find_first_of(theStringNeedle); - theFindSBuf = theSBufHay.find_first_of(theSBufNeedle, thePos); + theFindSBuf = theSBufHay.find_first_of(CharacterSet("cs",theSBufNeedle.c_str()), thePos); checkResults("find_first_of"); } === modified file 'src/tests/testSBuf.cc' --- src/tests/testSBuf.cc 2013-11-27 15:52:03 +0000 +++ src/tests/testSBuf.cc 2013-12-17 10:19:44 +0000 @@ -1,4 +1,5 @@ #include "squid.h" +#include "base/CharacterSet.h" #include "Mem.h" #include "SBuf.h" #include "SBufFindTest.h" @@ -759,23 +760,47 @@ SBuf::size_type idx; // not found - idx=haystack.find_first_of(SBuf("ADHRWYP")); + idx=haystack.find_first_of(CharacterSet("t1","ADHRWYP")); CPPUNIT_ASSERT_EQUAL(SBuf::npos,idx); // found at beginning - idx=haystack.find_first_of(SBuf("THANDF")); + idx=haystack.find_first_of(CharacterSet("t2","THANDF")); CPPUNIT_ASSERT_EQUAL(0U,idx); //found at end of haystack - idx=haystack.find_first_of(SBuf("QWERYVg")); + idx=haystack.find_first_of(CharacterSet("t3","QWERYVg")); CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx); //found in the middle of haystack - idx=haystack.find_first_of(SBuf("QWERqYV")); + idx=haystack.find_first_of(CharacterSet("t4","QWERqYV")); CPPUNIT_ASSERT_EQUAL(4U,idx); } void +testSBuf::testFindFirstNotOf() +{ + SBuf haystack(literal); + SBuf::size_type idx; + + // all chars from the set + idx=haystack.find_first_not_of(CharacterSet("t1",literal.c_str())); + CPPUNIT_ASSERT_EQUAL(SBuf::npos,idx); + + // found at beginning + idx=haystack.find_first_not_of(CharacterSet("t2","a")); + CPPUNIT_ASSERT_EQUAL(0U,idx); + + //found at end of haystack + idx=haystack.find_first_not_of(CharacterSet("t3",literal.substr(0,literal.length()-1).c_str())); + CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx); + + //found in the middle of haystack + idx=haystack.find_first_not_of(CharacterSet("t4","The")); + CPPUNIT_ASSERT_EQUAL(3U,idx); +} + + +void testSBuf::testAutoFind() { SBufFindTest test; === modified file 'src/tests/testSBuf.h' --- src/tests/testSBuf.h 2013-07-26 09:20:09 +0000 +++ src/tests/testSBuf.h 2013-12-15 11:47:07 +0000 @@ -35,6 +35,7 @@ CPPUNIT_TEST( testRFindChar ); CPPUNIT_TEST( testRFindSBuf ); CPPUNIT_TEST( testFindFirstOf ); + CPPUNIT_TEST( testFindFirstNotOf ); CPPUNIT_TEST( testPrintf ); CPPUNIT_TEST( testScanf ); CPPUNIT_TEST( testCopy ); @@ -79,6 +80,7 @@ void testStartsWith(); void testSBufStream(); void testFindFirstOf(); + void testFindFirstNotOf(); void testAutoFind(); void testStdStringOps(); };