diff options
Diffstat (limited to 'main.c')
| -rw-r--r-- | main.c | 55 |
1 files changed, 55 insertions, 0 deletions
@@ -0,0 +1,55 @@ +#ifdef USE_VECTOR +#include <immintrin.h> +#endif +#include <stdio.h> +#include <time.h> +#include <stdlib.h> + +#define STRIDE (256/32) // How many integers fit in a 256 vector register +#define VSIZE (STRIDE*200000) + +int main(int argc, char *argv[]) { + + float start, end, duration; + + int *v1; + posix_memalign((void**)&v1, 32, VSIZE * sizeof(int)); + int *v2; + posix_memalign((void**)&v2, 32, VSIZE * sizeof(int)); + int result[VSIZE]; + +#ifndef USE_VECTOR + printf("Sequential.."); + start = (float)clock()/CLOCKS_PER_SEC; + for(int i=0;i<VSIZE;i++){ + result[i]=v1[i]+v2[i]; + } + end = (float)clock()/CLOCKS_PER_SEC; + duration= end - start; + printf(" done! (duration=%.6fs)\n", duration); +#else + printf("Vectorize..."); + __m256i a,b; + // e == extended (for historical reasons just brandy name) + // p == packed (multiple packed elements) + // i == integer + // 32 == each integer is 32bits + __m256i c = _mm256_add_epi32(a, b); + start = (float)clock()/CLOCKS_PER_SEC; + for(int i=0;i<VSIZE;i+=STRIDE){ + a = _mm256_load_si256((__m256i*)&v1[i]); + b = _mm256_load_si256((__m256i*)&v2[i]); + c = _mm256_add_epi32(a,b); + + _mm256_store_si256((__m256i*)&result[i],c); + } + end = (float)clock()/CLOCKS_PER_SEC; + duration= end - start; + printf(" done! (duration=%.6fs)\n", duration); +#endif + + free(v1); + free(v2); + + return 0; +} |
