I'm learning to use Python's Multiprocessing package for embarrassingly parallel problems, so I wrote serial and parallel versions for determining the number of primes less than or equal to a natural number n. Based on what I read from a blog post and a Stack Overflow question, I came up with the following code:
Serial
import math
import time
def is_prime(start, end):
"""determine how many primes within given range"""
numPrime = 0
for n in range(start, end+1):
isPrime = True
for i in range(2, math.floor(math.sqrt(n))+1):
if n % i == 0:
isPrime = False
break
if isPrime:
numPrime += 1
if start == 1:
numPrime -= 1 # since 1 is not prime
return numPrime
if __name__ == "__main__":
natNum = 0
while natNum < 2:
natNum = int(input('Enter a natural number greater than 1: '))
startTime = time.time()
finalResult = is_prime(1, natNum)
print('Elapsed time:', time.time()-startTime, 'seconds')
print('The number of primes <=', natNum, 'is', finalResult)
Parallel
import math
import multiprocessing as mp
import numpy
import time
def is_prime(vec, output):
"""determine how many primes in vector"""
numPrime = 0
for n in vec:
isPrime = True
for i in range(2, math.floor(math.sqrt(n))+1):
if n % i == 0:
isPrime = False
break
if isPrime:
numPrime += 1
if vec[0] == 1:
numPrime -= 1 # since 1 is not prime
output.put(numPrime)
def chunks(vec, n):
"""evenly divide list into n chunks"""
for i in range(0, len(vec), n):
yield vec[i:i+n]
if __name__ == "__main__":
natNum = 0
while natNum < 2:
natNum = int(input('Enter a natural number greater than 1: '))
numProc = 0
while numProc < 1:
numProc = int(input('Enter the number of desired parallel processes: '))
startTime = time.time()
numSplits = math.ceil(natNum/numProc)
splitList = list(chunks(tuple(range(1, natNum+1)), numSplits))
output = mp.Queue()
processes = [mp.Process(target=is_prime, args=(splitList[jobID], output))
for jobID in range(numProc)]
for p in processes:
p.start()
for p in processes:
p.join()
print('Elapsed time:', time.time()-startTime, 'seconds')
procResults = [output.get() for p in processes]
finalResult = numpy.sum(numpy.array(procResults))
print('Results from each process:\n', procResults)
print('The number of primes <=', natNum, 'is', finalResult)
Here is what I get for n=10000000 (for parallel I request 8 processes):
$ python serial_prime_test.py
Enter a natural number greater than 1: 10000000
Elapsed time: 162.1960825920105 seconds
The number of primes <= 10000000 is 664579
$ python parallel_prime_test.py
Enter a natural number greater than 1: 10000000
Enter the number of desired parallel processes: 8
Elapsed time: 49.41204643249512 seconds
Results from each process:
[96469, 86603, 83645, 80303, 81796, 79445, 78589, 77729]
The number of primes <= 10000000 is 664579
So it looks like I can get a little over 3x speedup. Here are my questions:
- Clearly this is not linear speedup, so how much better can I do (or what speedup should I realistically expect)?
- It looks like Amdahl's Law answers this, but I don't know how to determine what fraction of my program is strictly serial.
Any help is appreciated.
Edit: There are 4 physical cores, capable of hyperthreading.