We create a sequence using a prime number and one of its primitive roots modulo n that visits each number in an interval exactly once.
More specifically we are looking for a generator of the multiplicative group of integers modulo n.
We have to pick our prime number a little larger than the product prod([len(i) for i in iterables)]
, so we have to account for the cases in which we'd get index errors.
import random
from math import gcd
import math
from math import prod
from typing import Iterable
def next_prime(number):
if number < 0:
raise ValueError('Negative numbers can not be primes')
if number <= 1:
return 2
if number % 2 == 0:
number -= 1
while True:
number += 2
max_check = int(math.sqrt(number)) + 2
for divider in range(3, max_check, 2):
if number % divider == 0:
break
else:
return number
def is_primitive_root(a, n):
phi = n - 1
factors = set()
for i in range(2, int(phi ** 0.5) + 1):
if phi % i == 0:
factors.add(i)
factors.add(phi // i)
for factor in factors:
if pow(a, factor, n) == 1:
return False
return True
def find_random_primitive_root(n):
while True:
a = random.randint(2, n - 1)
if gcd(a, n) == 1 and is_primitive_root(a, n):
return a
class CoordinateMapper:
"""
A class that maps linear indices to multi-dimensional coordinates within a specified shape.
Args:
dims (Iterable[int]): An iterable representing the dimensions of the desired shape.
Example Usage:
shape = (2, 3, 5, 4)
mapper = CoordinateMapper(shape)
coordinates = mapper.map(10)
print(coordinates) # Output: [0, 1, 2, 2]
"""
def __init__(self, dims: Iterable[int]):
self.moduli = [prod(dims[i:]) for i in range(len(dims))]
self.divisors = [prod(dims[i + 1:]) for i in range(len(dims))]
def map(self, n: int):
return [(n % self.moduli[i]) // self.divisors[i] for i in range(len(self.moduli))]
def sampler(l):
close_prime = next_prime(l)
state = root = find_random_primitive_root(close_prime)
while state > l:
state = (state * root) % close_prime # Inlining the computation leads to a 20% speed up
yield state - 1
for i in range(l - 1):
state = (state * root) % close_prime
while state > l:
state = (state * root) % close_prime
yield state - 1
def _unique_combinations(*iterables):
cartesian_product_cardinality = prod([len(i) for i in iterables])
coordinate_mapper = CoordinateMapper([len(i) for i in iterables])
sequence = sampler(cartesian_product_cardinality)
for state in sequence:
yield tuple(iterable[coord] for iterable, coord in zip(iterables, coordinate_mapper.map(state)))
from itertools import product
a = [1, 2, 3, 5]
b = ["a", "b", "c", "d"]
u = _unique_combinations(a, b)
assert sorted(u) == sorted(product(a, b))
I started benchmarking the various approaches. I couldn't get any solution other than @matmarbon's to run without assertion error:
from itertools import product
import time
approaches= {
'prime_roots':_unique_combinations,
'matmarbon':random_order_cartesian_product,
'itertools.product':itertools.product,
}
a = list(range(10))
b = list(range(10))
for name, approach in approaches.items():
assert sorted(u)==sorted(product(a,b))
For the 2 algorithms I benchmarked the following, with itertools as baseline
import pandas as pd
import timeit
import matplotlib.pyplot as plt
def benchmark(approaches, list_lengths, num_repetitions):
results = []
for approach, function in approaches.items():
for length in list_lengths:
a = list(range(length))
b = list(range(length))
def f():
for item in function(a,b):
pass
execution_time = timeit.timeit(f, number=num_repetitions)
entry = {
'Approach': approach,
'List Length': length,
'Execution Time': execution_time
}
print(entry)
results.append(entry)
results_df = pd.DataFrame(results)
# Plot the benchmark results
plt.figure(figsize=(10, 6))
for approach in approaches.keys():
approach_results = results_df[results_df['Approach'] == approach]
plt.plot(approach_results['List Length'], approach_results['Execution Time'], marker='o', label=approach)
plt.xlabel('List Length')
plt.ylabel('Execution Time (seconds)')
plt.title('Benchmark Results')
plt.grid(True)
plt.legend()
plt.show()
list_lengths = [10,20,30,40,50,60,70,80,90,100]
num_repetitions = 3
benchmark(approaches, list_lengths, num_repetitions)
It appears that @matmarbon's algorithm, while correct is in O(k^n)
.
The prime roots performs in O(n^k)
for k~len(iterables)
(assuming somewhat evenly sized iterables)
Memory-wise the prime roots approach wins just because only O(1)
memory is required and nothing is stored.
Distribution wise the prime roots approach is not actually random but rather a difficult-to-predict-deterministic sequence. In practice the sequences should be "random" enough.
Credit to this stack overflow answer which inspired the solution.
{1, 2, 3}
and{2}
. You randomly pick2
from{1, 2, 3}
. What do you randomly pick from{2}
? – Pericarplist(map(random.choice, map(list, list_of_sets)))
will generate such a sample, doing it repeatedly will not avoid repetitions, though. – Amberjack