返回介绍

Recreational

发布于 2025-02-25 23:44:05 字数 5796 浏览 0 评论 0 收藏 0

We will plot the famous Madnelbrot fractal and compare the code for and run times of a pure Pythoo with a GPU version.

Pure Python

# color function for point at (x, y)
def mandel(x, y, max_iters):
    c = complex(x, y)
    z = 0.0j
    for i in range(max_iters):
        z = z*z + c
        if z.real*z.real + z.imag*z.imag >= 4:
            return i
    return max_iters
def create_fractal(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    for x in range(width):
        real = xmin + x*pixel_size_x
        for y in range(height):
            imag = ymin + y*pixel_size_y
            color = mandel(real, imag, iters)
            image[y, x]  = color
gimage = np.zeros((1024, 1536), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50

start = timer()
create_fractal(xmin, xmax, ymin, ymax, gimage, iters)
dt = timer() - start

print "Mandelbrot created on CPU in %f s" % dt
plt.imshow(gimage);
Mandelbrot created on CPU in 34.773193 s

Numba

# Reuse regular function on GUO by using jit decorator
# This is using the jit decorator as a function (to avoid copying and pasting code)
import numba
mandel_numba = numba.jit(restype=uint32, argtypes=[float32, float32, uint32])(mandel)
@numba.jit
def create_fractal_numba(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    for x in range(width):
        real = xmin + x*pixel_size_x
        for y in range(height):
            imag = ymin + y*pixel_size_y
            color = mandel_numba(real, imag, iters)
            image[y, x]  = color
gimage = np.zeros((1024, 1536), dtype=np.uint8)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50

start = timer()
create_fractal_numba(xmin, xmax, ymin, ymax, gimage, iters)
dt = timer() - start

print "Mandelbrot created on CPU in %f s" % dt
plt.imshow(gimage);
Mandelbrot created on CPU in 0.339179 s

CUDA

# Reuse regular function on GUO by using jit decorator
mandel_gpu = cuda.jit(restype=uint32, argtypes=[float32, float32, uint32], device=True)(mandel)
@cuda.jit(argtypes=[float32, float32, float32, float32, uint8[:,:], uint32])
def create_fractal_kernel(xmin, xmax, ymin, ymax, image, iters):
    height, width = image.shape

    pixel_size_x = (xmax - xmin)/width
    pixel_size_y = (ymax - ymin)/height

    startX, startY = cuda.grid(2)
    gridX = cuda.gridDim.x * cuda.blockDim.x # stride in x
    gridY = cuda.gridDim.y * cuda.blockDim.y # stride in y

    for x in range(startX, width, gridX):
        real = xmin + x*pixel_size_x
        for y in range(startY, height, gridY):
            imag = ymin + y*pixel_size_y
            color = mandel_gpu(real, imag, iters)
            image[y, x]  = color
gimage = np.zeros((1024, 1536), dtype=np.uint8)
blockdim = (32, 8)
griddim = (32, 16)
xmin, xmax, ymin, ymax = np.array([-2.0, 1.0, -1.0, 1.0]).astype('float32')
iters = 50

start = timer()
d_image = cuda.to_device(gimage)
create_fractal_kernel[griddim, blockdim](xmin, xmax, ymin, ymax, d_image, iters)
d_image.to_host()
dt = timer() - start

print "Mandelbrot created on GPU in %f s" % dt
plt.imshow(gimage);
Mandelbrot created on GPU in 0.010257 s

Using CUDA liraries

See documentation at http://docs.continuum.io/numbapro/cudalib.html

Matrix multiplication wiht cublas

import numbapro.cudalib.cublas as cublas
blas = cublas.Blas()

n =100
A = np.random.random((n, n)).astype(np.float32)
B = np.random.random((n, n)).astype(np.float32)
C = np.zeros_like(A, order='F')

blas.gemm('T', 'T', n, n, n, 1.0, A, B, 1.0, C)

assert(np.allclose(np.dot(A, B), C))

Random numbers with curand

from numbapro.cudalib import curand
prng = curand.PRNG()
prng.seed = 123

@vectorize('float32(float32)', target='gpu')
def shift(x):
    return x*2 - 1

n = 1e7
x = np.empty(n*2).astype(np.float32)
prng.uniform(x)
r = shift(x).reshape((n, 2))
pi_hat =4*(r[:,0]**2 + r[:,1]**2 < 1).sum()/n
pi_hat
3.1409

FFT and IFFT

import numbapro.cudalib.cufft as cufft
num = 4
v = np.random.normal(0, 1, (num, 2))
z = v[:,0] + 1j*v[:,1]
print "{:<20}".format('Original'), z

x_gpu = np.zeros(num, dtype='complex')
cufft.fft(z, x_gpu)
print "{:<20}".format('CUDA FFT'), x_gpu

x_cpu = np.fft.fft(z)
print "{:<20}".format('CPU  FFT'), x_cpu

# NVidia IFFT returns unnormalzied results
cufft.ifft(x_gpu, z)
print "{:<20}".format('CUDA IFFT'), z/num

x_cpu = np.fft.ifft(x_cpu)
print "{:<20}".format('CPU  IFFT'), x_cpu
Original             [ 0.8236-0.564j   0.0743-1.0426j  0.3215+1.0885j -0.7250-1.7846j]
CUDA FFT             [ 0.4944-2.3028j  1.2440-2.4518j  1.7958+3.3518j -0.2400-0.853j ]
CPU  FFT             [ 0.4944-2.3028j  1.2440-2.4518j  1.7958+3.3518j -0.2400-0.853j ]
CUDA IFFT            [ 0.8236-0.564j   0.0743-1.0426j  0.3215+1.0885j -0.7250-1.7846j]
CPU  IFFT            [ 0.8236-0.564j   0.0743-1.0426j  0.3215+1.0885j -0.7250-1.7846j]

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
    我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
    原文