Out of curiosity and to learn a little bit about the numpy package i've
tryed to implement
a vectorised version of the 'Sieve of Zakiya'.
While the code itself works fine it is astounding for me that the numpy
Version is almost 7 times slower than
the pure python version. I tryed to find out if i am doing something wrong
but wasn't able to find any answer.
Some experimentation showed that indexing into an numpy array is the
bottleneck that slows down my code.
However i don't understand why.
So i am looking for any explaination why the numpy version is that slow
(i expected it to be at least as fast as the pure python version).
Thank you in advance
>pythonw -u "PrimeSieve.py"5 Erat: 0.000000 ZakijaP5: 0.000000 VZakijaP5: 0.000000
1000005 Erat: 0.219000 ZakijaP5: 0.219000 VZakijaP5: 1.578000
2000005 Erat: 0.468000 ZakijaP5: 0.469000 VZakijaP5: 3.297000
3000005 Erat: 0.719000 ZakijaP5: 0.703000 VZakijaP5: 5.000000
4000005 Erat: 0.937000 ZakijaP5: 0.985000 VZakijaP5: 6.734000
5000005 Erat: 1.219000 ZakijaP5: 1.218000 VZakijaP5: 8.172000
6000005 Erat: 1.500000 ZakijaP5: 1.531000 VZakijaP5: 9.829000
7000005 Erat: 1.781000 ZakijaP5: 1.813000 VZakijaP5: 11.500000
8000005 Erat: 2.047000 ZakijaP5: 2.094000 VZakijaP5: 13.187000
9000005 Erat: 2.312000 ZakijaP5: 2.391000 VZakijaP5: 14.891000
10000005 Erat: 2.578000 ZakijaP5: 2.672000 VZakijaP5: 16.641000
11000005 Erat: 2.891000 ZakijaP5: 2.953000 VZakijaP5: 18.203000
12000005 Erat: 3.110000 ZakijaP5: 3.297000 VZakijaP5: 19.937000
#------------------------------
Prime_Sieves.py--------------------------------------
from math import sqrt
def sieveOfErat(end):
if end < 2: return []
lng = (end-1)>>1
sieve = [True]*(lng+1)
for i in xrange(int(sqrt(end)) >1):
if not sieve[i]: continue
for j in xrange( (i*(i + 3) << 1) + 3, lng, (i << 1) + 3):
sieve[j] = False
primes = [2]
primes.extend([(i << 1) + 3 for i in xrange(lng) if sieve[i]])
return primes
def SoZP5(val):
if val < 3 : return [2]
if val < 5 : return [2,3]
lndx = (val-1)/2
sieve = [0]*(lndx+15)
sieve[0:14] = 0 , 1 , 2, 0, 4, 5, 0, 7, 8, 0, 10, 0, 0, 13
for _i in xrange(14, lndx , 15):
sieve[_i:_i+15] = _i -1, 0, 0, _i + 2 , 0, _i + 4, _i + 5, 0, _i +7,
_i + 8, 0, _i + 10, 0, 0, _i + 13
for i in xrange(2, (int(sqrt(val))-3/2)+1):
if not sieve[i]: continue
k = 30*i+45
p1 = 7*i+9 # 7 (2i+3)
p2 = 11*i+15 # 11 (2i+3)
p3 = 13*i+18 # 13 (2i+3)
p4 = 17*i+24 # 17 (2i+3)
p5 = 19*i+27 # 19 (2i+3)
p6 = 23*i+33 # 23 (2i+3)
p7 = 29*i+42 # 29 (2i+3)
p8 = 31*i+45 # 31 (2i+3)
for _i in xrange(0, lndx, k):
try:
sieve[_i+p1] = 0
sieve[_i+p2] = 0
sieve[_i+p3] = 0
sieve[_i+p4] = 0
sieve[_i+p5] = 0
sieve[_i+p6] = 0
sieve[_i+p7] = 0
sieve[_i+p8] = 0
except (IndexError, ):
break
primes = [(i*2)+3 for i in xrange(2, lndx) if sieve[i]]
primes[0:0] = [2,3,5]
return primes
from numpy import array, zeros
def VSoZP5(val):
""" Vectorised Version of Sieve of Zakia"""
if val < 3 : return [2]
if val < 5 : return [2,3]
lndx = (val-1)/2
sieve = zeros(lndx+15,dtype="int32") #<-- Offending line: Indexing a
numpy array is slow
sieve[0:14] = 0 , 1 , 2, 0, 4, 5, 0, 7, 8, 0, 10, 0, 0, 13
for _i in xrange(14, lndx , 15):
sieve[_i:_i+15] = _i -1, 0, 0, _i + 2 , 0, _i + 4, _i + 5, 0, _i +7,
_i + 8, 0, _i + 10, 0, 0, _i + 13
om = array([9, 15, 18, 24, 27, 33, 42, 45], dtype="int32")
bm = array([7, 11, 13, 17, 19, 23, 29, 31], dtype="int32")
for i in xrange(2, (int(sqrt(val))-3/2)+1):
if not sieve[i]: continue
k = 30*i+45
rm = (bm * i) + om
for _i in xrange(0, lndx/k + 1):
try:
sieve[rm] = 0 #<-- Offending line: Indexing a numpy array
is slow
rm += k
except (IndexError,):
break
#
primes = [(i*2)+3 for i in xrange(2, lndx) if sieve[i]]
primes[0:0] = [2,3,5]
return primes
if __name__ == '__main__':
import time
for j in range(5, 20000007, 1000000):
print j,
a = time.time()
soe = sieveOfErat(j)
print "Erat: %2f" % (time.time() -a,),
a = time.time()
soz5 = SoZP5(j)
print "ZakijaP5: %2f" % (time.time() -a),
a = time.time()
sovz5 = VSoZP5(j)
print "VZakijaP5: %2f" % (time.time() -a)
assert soe == soz5 == sovz5
#------------------------------
Prime_Sieves.py--------------------------------------