#include <iostream>
#include <string>
#include <cmath>
#include <tbb/task_scheduler_init.h>
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <tbb/spin_mutex.h>

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>

using namespace tbb;
using namespace std;

spin_mutex mutex;

class ProduitMatrice {

  int m_size;
  
  int **matrix1;
  int **matrix2;
  int **matrix3;
  
public:
  void operator() ( const blocked_range<size_t>& r ) const {
    for (size_t i=r.begin(); i<r.end(); i++){
      for(int j=0;j<m_size;j++){
	  for(int k=0;k<m_size;k++){
	      spin_mutex::scoped_lock lock(mutex);
	      matrix3[i][j] += matrix1[i][k] * matrix2[k][j];
	      lock.release();
	  }
      }
    }
  }
  ProduitMatrice(int m, int **m1, int **m2, int **m3) :
    m_size(m), matrix1(m1), matrix2(m2), matrix3(m3) { }
};

int main(int argc, char **argv) {
  
  struct timeval tv_start, tv_finish;
  double realtime;
  
  int m_size;
  int **matrix1;
  int **matrix2;
  int **matrix3;
  
  int nb_threads;

  int i, j;

  task_scheduler_init init;
  int range;
  
  if(argc < 3){
    cout << "USAGE : " << argv[0] << " m_size nb_threads [-v]" << endl;
    cout << "        m_size : size of the matrice" << endl;
    cout << "        nb_threads : number of threads" << endl;
    cout << "        -v : verbose" << endl;
    return -1;

  }
  
  /* Depart du chrono */
  gettimeofday(&tv_start, 0);

  m_size=atoi(argv[1]);
  nb_threads=atoi(argv[2]);

  /* Creation des matrices */
  matrix1 = (int**)malloc(m_size*sizeof(int*));
  matrix2 = (int**)malloc(m_size*sizeof(int*));
  matrix3 = (int**)malloc(m_size*sizeof(int*));
  for(i=0;i<m_size;i++){
    matrix1[i] = (int*)malloc(m_size*sizeof(int));
    matrix2[i] = (int*)malloc(m_size*sizeof(int));
    matrix3[i] = (int*)malloc(m_size*sizeof(int));
    for(j=0;j<m_size;j++){
      matrix1[i][j] = i+j+1;
      matrix2[i][j] = i+j+1;
      matrix3[i][j] = 0;
    }
  }

  /* Creation des threads */
  range = ceil(m_size / (nb_threads + 0.0));
  parallel_for(blocked_range<size_t>(0, m_size, range),
	       ProduitMatrice(m_size, matrix1, matrix2, matrix3));
  
  /* Affichage du resultat */
  if(argc == 4){
      for(i=0;i<m_size;i++){
	printf("[");
	for(j=0;j<m_size;j++){
	  printf("[%d]", matrix3[i][j]);
	}
	printf("]\n");
      }
  }

  /* Liberation des matrices */
  for(i=0;i<m_size;i++){
    free(matrix1[i]);
    free(matrix2[i]);
    free(matrix3[i]);
  } 
  free(matrix1);
  free(matrix2);
  free(matrix3);

  /* Fin du chrono */
  gettimeofday(&tv_finish, 0);
  
  /* Affichage du resultat */
  realtime = (double)(tv_finish.tv_sec-tv_start.tv_sec) +
    ((double)(tv_finish.tv_usec-tv_start.tv_usec))*1e-6;
  cout << m_size << ";" << nb_threads << ";cpp_tbb;" << realtime << endl;
  
  return 0;
}
