Package and download a project
1/* 2 * copyright 1993-2010 NVIDIA Corporation. all rights reserved. 3*4 * NVIDIA Corporation and its Licensors retain all intellectual property and 5 * proprietary rights in and to this software and related documentation. 6 * any use, reproduction, disclosure, or distribution of this software 7 * and related documentation without an express license agreement from 8 * NVIDIA Corporation is strictly prohibited. 9*10 * Please refer to the applicable NVIDIA End User License Agreement (EULA) 11 * associated with this source code for terms and conditions that govern 12 * your use of this NVIDIA software. 13*14 */15 16 # include <GL \ glut. h> 17 # include "Cuda. H "18 # include ".. /common/book. H "19 # include ".. /common/cpu_bitmap.h "20 # include" cuda_runtime.h "21 # include" device_launch_parameters.h "22 # include <math. h> 23 # define dim 1024 24 25 # define RND (x) (x * rand ()/rand_max) 26 # define INF 2e10f 27 28 struct sphere {29 float R, B, g; 30 float radius; 31 float x, y, z; 32 _ DEVICE _ float hit (float ox, float Oy, float * n) {33 float dx = ox-X; 34 float DY = Oy-y; 35 if (dx * dx + dy * dy <radius * radius) {36 float DZ = sqrtf (radius * radius-DX * DX-dy * Dy); 37 * n = DZ/sqrtf (radius * radius); 38 return DZ + z; 39} 40 return-INF; 41} 42}; 43 # define spheres 20 44 45 _ constant _ sphere S [spheres]; 46 47 _ global _ void kernel (unsigned char * PTR) {48 // map from threadidx/blockidx to pixel position 49 int x = threadidx. X + blockidx. x * blockdim. x; 50 int y = threadidx. Y + blockidx. y * blockdim. y; 51 int offset = x + y * blockdim. x * griddim. x; 52 float ox = (X-dim/2); 53 float Oy = (Y-dim/2); 54 55 float r = 0, G = 0, B = 0; 56 float maxz =-INF; 57 for (INT I = 0; I <spheres; I ++) {58 float N; 59 float T = s [I]. hit (OX, oy, & N); 60 if (T> maxz) {61 float fscale = N; 62 r = s [I]. R * fscale; 63G = s [I]. g * fscale; 64 B = s [I]. B * fscale; 65 maxz = T; 66} 67} 68 69 PTR [offset * 4 + 0] = (INT) (R * 255 ); 70 PTR [offset * 4 + 1] = (INT) (G * 255); 71 PTR [offset * 4 + 2] = (INT) (B * 255 ); 72 PTR [offset * 4 + 3] = 255; 73} 74 75 // globals needed by the update routine 76 struct datablock {77 unsigned char * dev_bitmap; 78 }; 79 80 int main (void) {81 datablock data; 82 // capture the start time 83 cudaevent_t start, stop; 84 handle_error (cudaeventcreate (& START )); 85 handle_error (cudaeventcreate (& stop); 86 handle_error (cudaeventrecord (START, 0); 87 88 cpubitmap Bitmap (dim, dim, & data); 89 unsigned char * dev_bitmap; 90 91 // allocate memory on the GPU for the output bitmap 92 handle_error (cudamalloc (void **) & dev_bitmap, 93 bitmap. image_size (); 94 95 // allocate temp memory, initialize it, copy to constant 96 // memory on the GPU, then free our temp memory 97 sphere * temp_s = (sphere *) malloc (sizeof (sphere) * spheres); 98 for (INT I = 0; I <spheres; I ++) {99 temp_s [I]. R = RND (1.0f); 100 temp_s [I]. G = RND (1.0f); 101 temp_s [I]. B = RND (1.0f); 102 temp_s [I]. X = RND (1000.0f)-500; 103 temp_s [I]. y = RND (1000.0f)-500; 104 temp_s [I]. z = RND (1000.0f)-500; 105 temp_s [I]. radius = RND (1001_f) + 20; 106} 107/* 108 store spheres spherical objects in constant memory 109 110 operate 111 */handle_error (cudamemcpytosymbol (S, temp_s, 112 sizeof (sphere) * spheres); 113 free (temp_s); 114 115 // generate a bitmap from our sphere data116 dim3 grids (DIM/16, dim/16); 117 dim3 threads (16, 16); 118 kernel <grids, threads> (dev_bitmap ); 119 120 // copy our bitmap back from the GPU for display121 handle_error (cudamemcpy (bitmap. get_ptr (), dev_bitmap, 122 bitmap. image_size (), 123 cudamemcpydevicetohost); 124 125 // get stop time, and display the timing results126 handle_error (cudaeventrecord (STOP, 0 )); 127 handle_error (cudaeventsynchronize (STOP); 128 float elapsedtime; 129 handle_error (cudaeventelapsedtime (& elapsedtime, 130 start, stop); 131 printf ("Time to generate: % 3.1f Ms \ n ", elapsedtime); 132 133 handle_error (cudaeventdestroy (start); 134 handle_error (cudaeventdestroy (STOP); 135 136 handle_error (cudafree (dev_bitmap )); 137 138 // display139 bitmap. display_and_exit (); 140}
The result is as follows:
Use constant memory to process Ray Tracing