#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>

#ifdef _OPENACC
#include <openacc.h>
extern void  matmul_acc(float * restrict C, float * restrict A, 
			float * restrict B, int m, int n, int p);
#endif

#ifdef _OPENMP
#include <omp.h>
extern void  matmul_mp(float * C, float * A, float * B,
		       int m, int n, int p);
#endif 

int main(int argc, char *argv[]) 
{ 
  int i,N;
  float *A, *B, *C;
  double time;
  
  if (argc==2)  sscanf(argv[1],"%d", &N);
  else {
    printf("Usage %s N \n", argv[0]);
    return 1;
  }
  
  A = (float*)malloc(N*N*sizeof(float));
  B = (float*)malloc(N*N*sizeof(float));
  C = (float*)malloc(N*N*sizeof(float));
  
  int num_threads=1;
  
  for (i=0;i<N*N;i++)    A[i] = i;
  for (i=0;i<N*N;i++)    B[i] = i;
  
#ifdef _OPENMP
#pragma omp parallel
  num_threads = omp_get_num_threads();
  time = omp_get_wtime();
  matmul_mp(C,A,B,N,N,N);
  time = omp_get_wtime()-time;
#endif
  
#ifdef _OPENACC
  struct timeval start, end;
  gettimeofday(&start, NULL);
  matmul_acc(C,A,B,N,N,N);
  gettimeofday(&end, NULL);
  time = end.tv_sec-start.tv_sec+(end.tv_usec - start.tv_usec)*1.e-6;
#endif
  
  printf("%d  %.1e\n", num_threads, time);
  
  free(C); 
  free(B); 
  free(A);
  return 0;
}

