From ba7feaa6574c3f41a27866307fba3540df770a56 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 17 Mar 2017 17:38:54 -0400 Subject: [PATCH] intersects and logicalandCount --- include/concise.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++ tests/unit.cpp | 19 ++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/include/concise.h b/include/concise.h index c0ec79e..b15c198 100644 --- a/include/concise.h +++ b/include/concise.h @@ -106,6 +106,85 @@ template class ConciseSet { return; } + bool intersects(const ConciseSet &other) const { + if (isEmpty() || other.isEmpty()) { + return 0; + } + // scan "this" and "other" + WordIterator thisItr(*this); + WordIterator otherItr(other); + while (true) { + if (!thisItr.IsLiteral) { + if (!otherItr.IsLiteral) { + int minCount = std::min(thisItr.count, otherItr.count); + if(concise_and(thisItr.word, otherItr.word) & SEQUENCE_BIT) + if(minCount > 0 ) return true; + if (!thisItr.prepareNext(minCount) | + !otherItr.prepareNext(minCount)) // NOT || + break; + } else { + if( !isLiteralZero(thisItr.toLiteral() & otherItr.word) ) return true; + thisItr.word--; + if (!thisItr.prepareNext(1) | + !otherItr.prepareNext()) // do NOT use "||" + break; + } + } else if (!otherItr.IsLiteral) { + if( !isLiteralZero(thisItr.word & otherItr.toLiteral()) ) return true; + otherItr.word--; + if (!thisItr.prepareNext() | + !otherItr.prepareNext(1)) // do NOT use "||" + break; + } else { + // Java code simply does thisItr.word & otherItr.word below + if ( !isLiteralZero(concise_and(thisItr.word , otherItr.word)) ) return true; + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + break; + } + } + return false; + } + + size_t logicalandCount(const ConciseSet &other) const { + if (isEmpty() || other.isEmpty()) { + return 0; + } + size_t answer = 0; + // scan "this" and "other" + WordIterator thisItr(*this); + WordIterator otherItr(other); + while (true) { + if (!thisItr.IsLiteral) { + if (!otherItr.IsLiteral) { + int minCount = std::min(thisItr.count, otherItr.count); + if(concise_and(thisItr.word, otherItr.word) & SEQUENCE_BIT) + answer += 31 * minCount; + if (!thisItr.prepareNext(minCount) | + !otherItr.prepareNext(minCount)) // NOT || + break; + } else { + answer += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); + thisItr.word--; + if (!thisItr.prepareNext(1) | + !otherItr.prepareNext()) // do NOT use "||" + break; + } + } else if (!otherItr.IsLiteral) { + answer += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); + otherItr.word--; + if (!thisItr.prepareNext() | + !otherItr.prepareNext(1)) // do NOT use "||" + break; + } else { + // Java code simply does thisItr.word & otherItr.word below + answer += getLiteralBitCount(concise_and(thisItr.word , otherItr.word)); + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + break; + } + } + return answer; + } + ConciseSet logicalandnot(const ConciseSet &other) const { ConciseSet res; logicalandnotToContainer(other, res); @@ -394,6 +473,8 @@ template class ConciseSet { return answer; } + + void clear() { reset(); } void add(uint32_t e) { diff --git a/tests/unit.cpp b/tests/unit.cpp index 1a69436..739fde5 100644 --- a/tests/unit.cpp +++ b/tests/unit.cpp @@ -113,11 +113,17 @@ template void heaportest() { longcounter++; assert(longcounter == 60); ConciseSet tmp; + size_t expectedandsize1 = answer.logicalandCount(test1); tmp = answer.logicaland(test1); + assert(expectedandsize1 == tmp.size()); assert(tmp.size() == test1.size()); + size_t expectedandsize2 = answer.logicalandCount(test2); tmp = answer.logicaland(test2); + assert(expectedandsize2 == tmp.size()); assert(tmp.size() == test2.size()); + size_t expectedandsize3 = answer.logicalandCount(test3); tmp = answer.logicaland(test3); + assert(expectedandsize3 == tmp.size()); assert(tmp.size() == test3.size()); } @@ -150,6 +156,7 @@ template void basictest() { ConciseSet tmp; tmp = test1.logicalor(test2); assert(tmp.size() == 7); + assert(test1.logicalandCount(test2) == 3); tmp = test1.logicaland(test2); assert(tmp.size() == 3); tmp.add(100000); @@ -180,6 +187,7 @@ template void longtest() { } assert(test1.size() == 1000); ConciseSet shouldbetest1; + assert(testc.logicalandCount(test1) == 1000); shouldbetest1 = testc.logicaland(test1); assert(shouldbetest1.size() == 1000); for (int k = 0; k < 1000; ++k) { @@ -203,9 +211,12 @@ template void longtest() { assert(tmp.contains(k * 2)); assert(tmp.contains(k * 2 + 1)); } + assert(tmp.intersects(test2)); + assert(tmp.logicalandCount(test2) == 1000); tmp = tmp.logicaland(test2); assert(tmp.size() == 1000); - + assert(test1.intersects(test2) == false); + assert(test1.logicalandCount(test2) == 0); tmp = test1.logicaland(test2); assert(tmp.size() == 0); } @@ -316,7 +327,9 @@ template void toytest() { assert(equals(trueunion, union2)); ConciseSet intersect1; ConciseSet intersect2; + size_t expinter1 = test1.logicalandCount(test2); intersect1 = test1.logicaland(test2); + assert(expinter1 == intersect1.size()); intersect2 = test1.logicaland(test2); assert(equals(trueinter, intersect1)); assert(equals(trueinter, intersect2)); @@ -410,7 +423,9 @@ template void variedtest() { assert(equals(trueunion, union2)); ConciseSet intersect1; ConciseSet intersect2; + size_t expinter1 = test1.logicalandCount(test2); intersect1 = test1.logicaland(test2); + assert(expinter1 == intersect1.size()); intersect2 = test1.logicaland(test2); assert(equals(trueinter, intersect1)); assert(equals(trueinter, intersect2)); @@ -566,7 +581,9 @@ template void realtest() { assert(equals(trueunion, union2)); ConciseSet intersect1; ConciseSet intersect2; + size_t expinter1 = test1.logicalandCount(test2); intersect1 = test1.logicaland(test2); + assert(expinter1 == intersect1.size()); intersect2 = test1.logicaland(test2); assert(equals(trueinter, intersect1)); assert(equals(trueinter, intersect2));