Public channel for discussing Numba usage. Don't post confidential info here! Consider posting questions to: https://numba.discourse.group/ !
def f1(x):
return x + 2
@njit
def tester():
x = 2
if needs_objmode(f1):
with objmode:
x += f1(x)
else:
x += f1(x)
I've noticed that when implementing a pattern like
def single_item(row):
return np.array(...)
def multiple_items(rows):
result = np.array(len(rows), N)
for i, row in enumerate(rows):
result[i] = single_item(row)
return result
Numba is usually slower than if I pass in a results array to single_item, e.g. out = result[i]
. Is this a sign that I should lean towards result arrays for my "private" numba routines?
Hi again! I'm trying this:
@numba.njit(["float64[:](float64[:,:],float64,float64,float64,float64,float64)"])
def sig_nb_parabol_eqn_2d(data, a, b, c, d, e):
x = data[0]
y = data[1]
return(-(((x - b) / a)**2 + ((y - d) / c)**2) + e).ravel()
But it says me this...
TypeError Traceback (most recent call last)
<ipython-input-3-3107ce5d498b> in <module>
28
29 nb_parabol_eqn_2d(data, a, b, c, d, e)
---> 30 sig_nb_parabol_eqn_2d(data, a, b, c, d, e)
31
32 print(timeit.timeit('parabol_eqn_2d(data, a, b, c, d, e)',globals=globals(),number=100000))
/usr/local/lib/python3.5/site-packages/numba/dispatcher.py in _explain_matching_error(self, *args, **kws)
572 msg = ("No matching definition for argument type(s) %s"
573 % ', '.join(map(str, args)))
--> 574 raise TypeError(msg)
575
576 def _search_new_conversions(self, *args, **kws):
TypeError: No matching definition for argument type(s) array(float64, 3d, C), float64, float64, float64, float64, float64
I don't get where it's getting that 3d float 64 array...
import numpy as np
import numba
from numba import prange
n=100
floats = np.zeros((n, n))
@numba.njit(parallel=True)
def compilable(dims, x):
def expensive_fn(x):
while x > 1.00001:
x **= 0.9999
return x
for i in prange(dims[0]):
for j in prange(dims[1]):
x[i][j] = expensive_fn(i + j + 50)
compilable((n, n), floats)
print(floats)
'''
Voronoi problem solved with numba vectorize & njit(paralell)
'''
import timeit
import numpy as np
from PIL import Image
from numba import vectorize, njit, prange, int32
def voronoi(size, points, colors):
h, w = size
n: int = w * h
n_points: int = len(points)
amask: int = np.int32(0xff00_0000)
max_int: int = np.iinfo(np.int32).max
@vectorize('int32(int32)', target='parallel', nopython=True, fastmath=True)
def calc_color(ix): # current index 0..n -> color
def distance_squared(p0, p1):
d0, d1 = p0[0] - p1[0], p0[1] - p1[1]
return d0 * d0 + d1 * d1
min_dist = max_int
circ_diam = 1 # as distance is squared
ind = -1
current_point = ix % w, ix // w
for i in range(n_points):
d = distance_squared(points[i], current_point)
if d < circ_diam: break
if d < min_dist:
min_dist = d
ind = i
return amask if ind == -1 else colors[ind] | amask
return calc_color(np.arange(n).astype('i4'))
@njit(parallel=True, fastmath=True)
def voronoi_jit(size, points, colors):
h, w = size
n: int = w * h
n_points: int = len(points)
amask: int = np.int32(0xff00_0000)
max_int: int = np.iinfo(np.int32).max
def calc_color(ix): # current index 0..n -> color
def distance_squared(p0, p1):
d0, d1 = p0[0] - p1[0], p0[1] - p1[1]
return d0 * d0 + d1 * d1
min_dist = max_int
circ_diam = 1 # as distance is squared
ind = -1
current_point = ix % w, ix // w
for i in range(n_points):
d = distance_squared(points[i], current_point)
if d < circ_diam: break
if d < min_dist:
min_dist = d
ind = i
return amask if ind == -1 else colors[ind] | amask
img = np.empty(n, dtype=int32)
for i in prange(n):
img[i] = calc_color(i)
return img
def test_voronoi():
sz = 1024 * 2
size = (sz, sz)
n = sz
n_points = n * 3
points = np.random.uniform(0, min(size), size=n_points * 2).reshape(n_points, 2).astype('i4') # x,y
colors = np.random.uniform(0x0000_0000, 0x00ff_ffff, size=n_points).astype('i4')
t0 = timeit.default_timer()
image = voronoi(size, points, colors)
# image = voronoi_jit(size, points, colors)
t0 = timeit.default_timer() - t0
img = Image.frombytes(mode='RGBA', size=size, data=image).show() # .save('voronoi.png', format='png')
print(f'generated voronoi, {n_points} points, of {size} in {t0:.3} secs')
if __name__ == '__main__':
test_voronoi()
Another odd q. This bombs, but I don't think it should, as the len of a cuda local array is necessarily a constant.
import numpy
from numba import cuda
ret = numpy.ndarray(1)
@cuda.jit
def a(r):
la = cuda.local.array(2, dtype=numpy.float64)
la[0] = 2.1
la[1] = 3.2
arr = cuda.local.array(len(la), dtype=numpy.int32)
r[0] = la[0]+la[1]
a[1,1](ret)
print(ret[0])
same if I do
arr = cuda.local.array(la.shape, dtype=numpy.int32)
hi @rforcen , thanks a lot for sharing. Gitter does not have good search capability, so no one would find your code in a few days. Why don't you post it to discourse? https://numba.discourse.group/
ok, thanks for the advise
@njit(parallel=True) # -> np.arange(n * n, dtype='i4').reshape(n, n)
def grid(n, m):
v = np.empty((n, m), dtype=np.int32)
for i in prange(n):
for j in prange(m):
v[i][j] = i * m + j
return v
@njit(parallel=True) # -> np.arange(n, dtype='i4')
def xrange(n):
v = np.empty((n), dtype=np.int32)
for i in prange(n):
v[i] = i
return v
And furthermore, what can I do to prevent this? It looks like each multiprocessing pool is re-compiling what I hope would be the same likelihood function -- even though I've cached it? Should I do ahead-of-time compilation?
i've also notice a delay on first run so i use a 'warm up' first call with a small number of iterations, see #warm up on https://github.com/rforcen/numba/blob/main/DomainColoring.py
I am trying to set up a jitclass with an empty list as below and am getting a typing error that I don't quite understand.
import numba as nb
from collections import OrderedDict
from numba.core.types import Tuple, int64, float64
from numba.core.types.containers import ListType
from numba.typed.typedlist import List
spec_dynamic_events = [('capacity', ListType(Tuple((float64, float64))))]
spec_dynamic_events=OrderedDict(spec_dynamic_events)
@nb.experimental.jitclass(spec_dynamic_events)
class DynamicEvent(object):
def __init__(self):
self.capacity = List.empty_list(Tuple((float64,float64)))
my_event=DynamicEvent()
This yields 'Failed in nopython mode pipeline (step: nopython frontend)
Untyped global name 'Tuple': cannot determine Numba type of <class 'numba.core.types.abstract._TypeMetaclass'> '
Could somebody give me a hint where to look for a solution or is this not supported?
Is there any documentation on the @guvectorize
layout declarations? I'm having problems passing in an array. For example:
@nb.guvectorize(["f8[:], f8[:], f8[:], f8[:]", ],
"(len_a),(len_b)->(len_a),(len_a)", nopython=True)
def foo(a, b, c, d):
len_a = len(a)
i = 0
mx = np.max(b)
mn = np.min(b)
while i < len_a:
c[i] = a[i] * mx
d[i] = a[i] + mn
i += 1
a = np.array([[ 0., 0., 0., 0.],
[ 1., 1., 1., 1.],
[ 2., 2., 2., 2.]])
b = np.array([[ 5., -1., 1., -1.],
[ 5., 2., 2., 1.]])
c, b = foo(a, b, axis=0)
This code gives the error:
TypeError: foo: axis can only be used with a single shared core dimension, not with the 2 distinct ones implied by signature (len_a),(len_b)->(len_a),(len_a).
There is only 1 core dimension in this function (the dimension of the 1st array that is used to determine the size of the output arrays), how do I declare the second array in the layout to be an array but a non-core dimension?
I can workaround this error being raised using this wrapper, but it is a hack:
def foo_with_axis(a, b, axis=-1):
c, d = foo(np.moveaxis(a, axis, -1), np.moveaxis(b, axis, -1))
return np.moveaxis(c, axis, -1), np.moveaxis(d, axis, -1)