How about looking at this problem from another view point?.. Consider your set of integers as a collection of one-dimension pictures. One of the way to organize them is to split each picture into two parts A
and B
and sort all pictures by categories:
A
contains only zeroes and B
contains some bits is set (at least one)
A
contains some bits set and B
contains only zeroes
A
and B
contains some bits set (superset of 1 and 2)
A
and B
contains only zeroes
Now you do the same split of your target/mask into the same parts and categorize in the same way. After that you can deduce next (by target/mask category):
- You can skip pictures from categories 2 and 4
- You can skip pictures from categories 1 and 4
- You can skip pictures from category 4
- All pictures match this target/mask
On the next level parts A
and B
is splitted again (so you have 4 parts) and so on.
Of course I don't expect it to give some speed-up. But for some sets of data where there is not so much bits is set (as opposite to variants with bit-based-tree) it might work better.
Update: I've got speedup for 34% in Haskell variant:
benchmarking burte-force list search
mean: 14.67350 ms, lb 14.65103 ms, ub 14.71614 ms, ci 0.950
std dev: 153.6920 us, lb 95.70642 us, ub 246.6497 us, ci 0.950
benchmarking tree-lookup search
mean: 9.592271 ms, lb 9.564509 ms, ub 9.667668 ms, ci 0.950
std dev: 216.6084 us, lb 100.3315 us, ub 455.2730 us, ci 0.950
Source code:
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-}
import Control.Arrow (first)
import Control.DeepSeq
import Data.Word
import Data.Bits
import Data.List
import Criterion.Main
import Criterion.Config
import System.Random
class BitmapsCollection a where
type BitmapOf a
bitmapsCollection :: [BitmapOf a] -> a
findMaskedPattern :: a -> BitmapOf a -> [BitmapOf a]
-- Plain lookup through array
newtype BitmapsList p = BitmapsList [p]
deriving (Show, NFData)
instance Bits p => BitmapsCollection (BitmapsList p) where
type BitmapOf (BitmapsList p) = p
bitmapsCollection = BitmapsList
findMaskedPattern (BitmapsList xs) m = filter (\e -> e .&. m == m) xs
-- Tree of bitmap partitions
data Bits p => BitmapsCoverTree p = EmptyBitmapsCoverNode
| BitmapsCoverNode (p,p) (BitmapsCoverTree p) (BitmapsCoverTree p) [p] [p]
| LeafBitmapsCoverNode [p]
deriving Show
instance (Bits p, NFData p) => NFData (BitmapsCoverTree p) where
rnf EmptyBitmapsCoverNode = ()
rnf (LeafBitmapsCoverNode xs) = rnf xs
rnf (BitmapsCoverNode mask node1 node2 category3 category4) = mask `deepseq` node1 `deepseq` node2 `deepseq` category3 `deepseq` rnf category4
data BitmapCoverCategory = CoverA | CoverB | CoverAB | CoverZero
coverCategory :: Bits a => (a, a) -> a -> BitmapCoverCategory
coverCategory (maskA, maskB) x = case (x .&. maskA, x .&. maskB) of
(0, 0) -> CoverZero
(0, _) -> CoverB
(_, 0) -> CoverA
_ -> CoverAB
coverCategorize :: Bits a => (a, a) -> [a] -> ([a], [a], [a], [a])
coverCategorize mask = walk (id, id, id, id) where
category = coverCategory mask
walk (a, b, ab, z) [] = (a [], b [], ab [], z [])
walk (a, b, ab, z) (x:xs) = case (category x) of
CoverA -> walk (a . (x:), b, ab, z) xs
CoverB -> walk (a, b . (x:), ab, z) xs
CoverAB -> walk (a, b, ab . (x:), z) xs
CoverZero -> walk (a, b, ab, z . (x:)) xs
suffixMask, prefixMask :: Bits a => Int -> a
suffixMask n = complement 0 `shiftL` n
prefixMask n = complement (suffixMask n)
rangeMask :: Bits a => (Int, Int) -> a
rangeMask (n, m) = suffixMask n .&. prefixMask m
instance Bits p => BitmapsCollection (BitmapsCoverTree p) where
type BitmapOf (BitmapsCoverTree p) = p
bitmapsCollection bms = buildCoverNode (0, bitSize (head bms)) bms where
splitBoundary = 4
buildCoverNode :: Bits a => (Int, Int) -> [a] -> BitmapsCoverTree a
buildCoverNode _ [] = EmptyBitmapsCoverNode
buildCoverNode (n, m) xs | (m - n) < splitBoundary = LeafBitmapsCoverNode xs -- too small
buildCoverNode (n, m) xs = BitmapsCoverNode mask node1 node2 category3 category4 where
mm = (n+m) `div` 2
mask = (rangeMask (n, mm), rangeMask (mm, m))
(category1, category2, category3, category4) = coverCategorize mask xs
node1 = buildCoverNode (n, mm) category1
node2 = buildCoverNode (mm, m) category2
findMaskedPattern EmptyBitmapsCoverNode _ = []
findMaskedPattern (LeafBitmapsCoverNode ps) m = filter (\e -> e .&. m == m) ps
findMaskedPattern (BitmapsCoverNode _ node1 node2 category3 category4) 0 = flatten where
flatten = findMaskedPattern node1 0 ++ findMaskedPattern node2 0 ++ category3 ++ category4
findMaskedPattern (BitmapsCoverNode mask node1 node2 category3 category4) m = result where
targetCategory = coverCategory mask m
filterTarget = filter (\p -> p .&. m == m)
result = case targetCategory of
CoverA -> findMaskedPattern node1 m ++ filterTarget category3
CoverB -> findMaskedPattern node2 m ++ filterTarget category3
CoverAB -> filterTarget category3
CoverZero -> category1 ++ category2 ++ category3 ++ category4
category1 = findMaskedPattern node1 0
category2 = findMaskedPattern node2 0
main = do
gen <- getStdGen
let size = 1000000
bitmaps :: [Word32]
(bitmap, genm) = first fromIntegral (random gen :: (Int, StdGen))
bitmaps = map fromIntegral (take size (randoms genm) :: [Int])
bitmapsList = bitmapsCollection bitmaps :: BitmapsList Word32
bitmapsTree = bitmapsCollection bitmaps :: BitmapsCoverTree Word32
bitmapsList `deepseq` bitmapsTree `deepseq` return ()
defaultMainWith defaultConfig (return ()) [
bench "burte-force list search" $ nf (findMaskedPattern bitmapsList) bitmap,
bench "tree-lookup search" $ nf (findMaskedPattern bitmapsTree) bitmap
]
Update: Kind of C++11 code. It gives 10.9444 ms for brute-force and 8.69286 ms for this algorithm. I've cheated by making output of distribution of turned on bits more sparse.
#include <iostream>
#include <vector>
#include <list>
#include <random>
#include <functional>
#include <cassert>
#include <memory>
#include <sys/time.h>
#include <sys/resource.h>
// benchmark boiler plate code
double cputime()
{
struct rusage usage;
int check = getrusage( RUSAGE_SELF, &usage );
assert(check == 0);
return (usage.ru_utime.tv_sec + usage.ru_utime.tv_usec*1.0e-6);
//return (((double)clock())/((double)CLOCKS_PER_SEC));
}
double measure(std::function<void()> func, size_t iterations)
{
double t1, t2;
size_t i;
t1 = cputime();
for(i = 0; i < iterations; ++i) func();
t2 = cputime();
return (t2 - t1);
}
std::pair<std::string, double> human(double value)
{
static const std::vector<std::pair<std::string, double>> prefixes = {
{ "pico", 1e-12 },
{ "nano", 1e-9 },
{ "micro", 1e-6 },
{ "milli", 1e-3 },
{ "", 1 },
{ "kilo", 1e3 },
{ "mega", 1e6 },
{ "giga", 1e9 },
{ "tera", 1e12 }
};
for(auto it = prefixes.begin(); it != prefixes.end(); ++it)
{
if (it->second > value)
{
auto prev = *(--it);
return std::pair<std::string, double>(prev.first, value/prev.second);
}
}
auto last = *prefixes.rbegin();
return std::pair<std::string, double>(last.first, value/last.second);
}
void bench(std::string name, std::function<void()> func, double bench_seconds = 10)
{
const double accurate_seconds = 0.1;
std::cout << "benchmarking " << name << std::endl
<< "estimating iterations" << std::endl;
size_t base_iterations = 1;
double base_seconds = measure(func, base_iterations);
while(base_seconds < accurate_seconds)
{
base_iterations *= 2;
base_seconds = measure(func, base_iterations);
}
const size_t iterations = bench_seconds * base_iterations / base_seconds;
const double estimated_seconds = iterations * base_seconds / base_iterations;
std::cout << "estimated time " << estimated_seconds << " seconds (" << iterations << " iterations)" << std::endl;
const double seconds = measure(func, iterations);
const auto ips = human(iterations / seconds);
const auto spi = human(seconds / iterations);
std::cout << "benchmark took " << seconds << " seconds" << std::endl
<< "average speed " << ips.second << ' ' << ips.first << " iterations per second" << std::endl
<< "average time " << spi.second << ' ' << spi.first << " seconds per iteration" << std::endl;
}
// plain brute-force lookup
template<class iterator>
std::list<typename iterator::value_type> brute_lookup(const typename iterator::value_type pattern, iterator begin, const iterator &end)
{
typedef typename iterator::value_type value_type;
std::list<value_type> result;
for(;begin != end; ++begin)
{
if ((*begin & pattern) == pattern) result.push_back(*begin);
}
return result;
}
// tree-traversing lookup
template<class _value_type>
struct cover_node
{
typedef _value_type value_type;
value_type mask_a, mask_b;
std::auto_ptr<cover_node<value_type>> node_a, node_b;
std::vector<value_type> category_ab, category_zero;
};
template<class _value_type>
std::ostream &pprint(std::ostream &s, const std::auto_ptr<cover_node<_value_type>> &node, const std::string indent = "")
{
if (!node.get())
{
s << indent << "cover_node: (null)" << std::endl;
return s;
}
s << indent
<< "cover_node: mask = " << std::hex << node->mask_a << "/" << node->mask_b
<< ", leafs = " << std::dec << node->category_ab.size() << "/" << node->category_zero.size() << std::endl;
const std::string sub = indent + " ";
pprint(s, node->node_a, sub);
return pprint(s, node->node_b, sub);
}
enum class cover_category { a, b, ab, zero };
template<class vt>
cover_category
identify_cover(const vt mask_a, const vt mask_b, const vt x)
{
const auto a = (x & mask_a) != 0;
const auto b = (x & mask_b) != 0;
if (!a)
{
if (!b) return cover_category::zero;
else return cover_category::b;
}
else
{
if (!b) return cover_category::a;
else return cover_category::ab;
}
}
template<class vt>
vt bitmask(const size_t n, const size_t m)
{
return (~0 << n) & ~(~0 << m);
}
template<class iterator>
std::auto_ptr<cover_node<typename iterator::value_type>>
build_cover_node(size_t n, size_t m, iterator begin, const iterator &end)
{
const size_t split_boundary = 4;
typedef typename iterator::value_type value_type;
std::auto_ptr<cover_node<value_type>> node(new cover_node<value_type>);
if ((m - n) < split_boundary) // too small group
{
// overlapped mask for simplification of sub-tree into list
node->mask_a = ~0;
node->mask_b = ~0;
node->category_ab.insert(node->category_ab.end(), begin, end);
return node;
}
std::list<value_type> category_a, category_b;
const size_t h = (n + m) / 2;
node->mask_a = bitmask<value_type>(n, h);
node->mask_b = bitmask<value_type>(h, m);
auto &category_ab = node->category_ab;
auto &category_zero = node->category_zero;
// categorize
for(;begin != end; ++begin)
{
switch(identify_cover(node->mask_a, node->mask_b, *begin))
{
case cover_category::a:
category_a.push_back(*begin);
break;
case cover_category::b:
category_b.push_back(*begin);
break;
case cover_category::ab:
category_ab.push_back(*begin);
break;
case cover_category::zero:
category_zero.push_back(*begin);
break;
}
}
// build sub-nodes
if (!category_a.empty()) node->node_a = build_cover_node(n, h, category_a.begin(), category_a.end());
if (!category_b.empty()) node->node_b = build_cover_node(h, m, category_b.begin(), category_b.end());
return node;
}
template<class _value_type>
struct cover_walker
{
typedef _value_type value_type;
typedef cover_node<value_type> node_type;
cover_walker(value_type target_pattern, const node_type &root_node) :
target(target_pattern)
{
walk(root_node);
}
const std::list<value_type> &get_result() const
{
return result;
}
private:
value_type target;
std::list<value_type> result;
template<class Container>
void filtered_add(const Container &xs)
{
for(auto it = xs.begin(); it != xs.end(); ++it)
{
const auto &x = *it;
if ((x & target) == target) result.push_back(x);
}
}
template<class Container>
void add(const Container &xs)
{
result.insert(result.end(), xs.begin(), xs.end());
}
void flatout(const node_type &node)
{
if (node.node_a.get()) flatout(*node.node_a);
if (node.node_b.get()) flatout(*node.node_b);
add(node.category_ab);
add(node.category_zero);
}
void walk(const node_type &node)
{
const auto &mask_a = node.mask_a;
const auto &mask_b = node.mask_b;
if (mask_a == mask_b)
{
filtered_add(node.category_ab);
return;
}
switch(identify_cover(mask_a, mask_b, target))
{
case cover_category::a:
if (node.node_a.get()) walk(*node.node_a);
filtered_add(node.category_ab);
break;
case cover_category::b:
if (node.node_b.get()) walk(*node.node_b);
filtered_add(node.category_ab);
break;
case cover_category::ab:
filtered_add(node.category_ab);
break;
case cover_category::zero:
flatout(node);
break;
}
}
};
int main()
{
std::mt19937 rng;
std::uniform_int_distribution<uint32_t> uint_dist;
const auto bitmap = uint_dist(rng);
//const uint32_t bitmap = 0;
std::vector<uint32_t> bitmaps;
bitmaps.resize(10000000);
//for(auto it = bitmaps.begin(); it < bitmaps.end(); ++it) *it = uint_dist(rng);
for(auto it = bitmaps.begin(); it < bitmaps.end(); ++it) *it = uint_dist(rng) & uint_dist(rng) & uint_dist(rng); // sparse
const auto brute = [&bitmaps, bitmap](){
brute_lookup(bitmap, bitmaps.begin(), bitmaps.end());
};
std::auto_ptr<cover_node<uint32_t>> cover_tree = build_cover_node<std::vector<uint32_t>::const_iterator>(0, 32, bitmaps.begin(), bitmaps.end());
pprint(std::cout, cover_tree);
const auto traversal = [&cover_tree, bitmap]() {
cover_walker<uint32_t>(bitmap, *cover_tree).get_result();
};
bench("brute-force array search", brute);
bench("tree-traversal search", traversal);
return 0;
}