#ifdef _OPENMP
#include <omp.h>
#endif

void matmul_mp(float *  C, float * A, float * B,
	       int m, int n, int p)
{
  /* A is m x n, B is n x p, C = A*B is m x p */
  int i,j,k;
  
#pragma omp parallel shared(A,B,C) private(i,j,k)
  {
#pragma omp for schedule(static)
    for (i=0; i<m; i++)  
      for (j=0; j<p; j++) 
	{
	  float sum = 0;
	  for (k=0; k<n; k++) 
	    sum += A[i*n+k]*B[k*p+j];
	  
	  C[i*p+j] = sum;
	}
  }
}

