Vector multiplication matrix (vector_matrix_multiplication)

Source: Internet
Author: User

/*************************************** *****************************
Created: 2009/09/19
Created: PM
Filename: vector_matrix_multiplication.cu
File base: vector_matrix_multiplication
File Ext: Cuda
Author: Zhao. kaiyong (AT) gmail.com
Purpose: Vector Matrix Multiplication
Copyright: everyone can use this code, please specify source.
For any use, please indicate the source;
Http://www.hpctech.com
Http://openhero.net
**************************************** *****************************/
# Include
# Include
# Include
# Include

/*************************************** *********************************/
/* Init Cuda */
/*************************************** *********************************/
# If _ device_emulation __

Bool initcuda (void) {return true ;}

# Else
Bool initcuda (void)
{
Int COUNT = 0;
Int I = 0;

Cudagetdevicecount (& COUNT );
If (COUNT = 0 ){
Fprintf (stderr, "there is no device./N ");
Return false;
}

For (I = 0; I cudadeviceprop prop;
If (cudagetdeviceproperties (& prop, I) = cudasuccess ){
If (prop. Major> = 1 ){
Break;
}
}
}
If (I = count ){
Fprintf (stderr, "there is no device supporting Cuda./N ");
Return false;
}
Cudasetdevice (I );

Printf ("Cuda initialized./N ");
Return true;
}

# Endif
/*************************************** *********************************/
/* Example */
/*************************************** *********************************/
_ Global _ static void vector_matrix_mult_kernel (float * a, long wa, float * B, long WB, float * C)
{
_ Shared _ float Suba [64];

A = a + threadidx. X;
B = B + blockidx. x * 64 + threadidx. X;
C = C + blockidx. x * 64 + threadidx. X;

Float subc = 0.0;

For (INT I = 0; I {
Suba [threadidx. x] = A [I];
_ Syncthreads ();

# Pragma unroll
For (Int J = 0; J {
Subc + = Suba [J] * B [0];
}
_ Syncthreads ();
}

C [0] = subc;
}

// _ Global _ static void vector_matrix_mult_kernel_32t (float * a, long wa, float * B, long WB, float * C)
//{

//}

# Define run_test

# Define wa 32
# Define WB 64
/*************************************** *********************************/
/* Hellocuda */
/*************************************** *********************************/
Int main (INT argc, char * argv [])
{

If (! Initcuda ()){
Return 0;
}

Srand (2009 );

Long WA = 64 * Wa;
Long WB = 64 * WB;

Long size_a = wa;
Long size_ B = wa * WB;
Long size_c = WB;

Float * Ha = (float *) malloc (sizeof (float) * size_a );
Float * HB = (float *) malloc (sizeof (float) * size_ B );
Float * HC = (float *) malloc (sizeof (float) * size_c );
Float * testhc = (float *) malloc (sizeof (float) * size_c );

For (INT I = 0; I {
Ha [I] = (float) rand ()/(float) rand_max;
}

For (INT I = 0; I {
HB [I] = (float) rand ()/(float) rand_max;
}

Float * da = 0;
Float * DB = 0;
Float * Dc = 0;

Cuda_safe_call (cudamalloc (void **) & Da, sizeof (float) * size_a ));
Cuda_safe_call (cudamalloc (void **) & dB, sizeof (float) * size_ B ));
Cuda_safe_call (cudamalloc (void **) & DC, sizeof (float) * size_c ));

Cuda_safe_call (cudamemcpy (DA, ha, sizeof (float) * size_a, cudamemcpyhosttodevice ));
Cuda_safe_call (cudamemcpy (dB, Hb, sizeof (float) * size_ B, cudamemcpyhosttodevice ));

Unsigned int timer = 0;
Cut_safe_call (cutcreatetimer (& timer ));
Cut_safe_call (cutstarttimer (timer ));

Dim3 threads = 64;
Dim3 blocks = WB/64;
Vector_matrix_mult_kernel> (DA, WA, DB, WB, DC );

Cut_check_error ("kernel execution failed/N ");

Cuda_safe_call (cudamemcpy (HC, DC, sizeof (float) * size_c, cudamemcpydevicetohost ));
Cut_safe_call (cutstoptimer (timer ));
Printf ("Processing Time: % F (MS)/n", cutgettimervalue (timer ));
Cut_safe_call (cutresettimer (timer ));

For (INT I = 0; I {
Float subc = 0.0;
For (Int J = 0; j {
Subc + = ha [J] * HB [J * WB + I];
}
Testhc [I] = subc;
}

Cut_safe_call (cutstoptimer (timer ));
Printf ("Processing Time: % F (MS)/n", cutgettimervalue (timer ));

Cut_safe_call (cutdeletetimer (timer ));

# Ifdef run_test

Cutboolean res = cutcomparel2fe (testhc, HC, size_c, 1e-6f );
Printf ("test % s/n", (1 = res )? "Passed": "failed ");
# Endif

Cuda_safe_call (cudafree (DA ));
Cuda_safe_call (cudafree (db ));
Cuda_safe_call (cudafree (DC ));
Free (HA );
Free (HB );
Free (HC );
Cut_exit (argc, argv );

Return 0;
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.