#include<blp.h>
#include<cuda_generator.h>
#include<simulation.h>
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<pthread.h>


typedef struct {
  F_TYPE *output;
  F_TYPE *arg;
  BLPproblem *problem;
  BLPdata *data;
  seedStruct *seed;
  int cut;
  short number;
} ThreadStructureSimulation;

#ifdef SMP

void *simulationThread(void *inputInit) {
  int j;
  ThreadStructureSimulation *input = (ThreadStructureSimulation *) inputInit;

  for(j=input->number;j<threadsN;j+=SMP) {
    simulation(input->output, input->arg,
      input->problem, input->data, input->seed, input->cut, j);
  }

  pthread_exit(NULL);
}

#endif

void simulation(F_TYPE *output, F_TYPE *arg, BLPproblem *problem, BLPdata *data, seedStruct *seed, int cut, int idx) {
  F_TYPE denominator;
  F_TYPE nominator[100], delta[100];
  int drawsIndex=problem->N*idx;
  int format;
  int i,j,k,n,z,s;
  int covarMatrixOffset;
  int offsetDate, offsetOutput;
  int offsetSigma = problem->parameters+problem->product*problem->demoCharacteristics;
  int offsetXi = problem->allParams;
  F_TYPE vDraws[problem->product];

  int group;
  int date;
  int market;

  double re;
  if(idx<cut) { // Market shares
    date = idx/problem->market;
    int year = (date<3) ? -1 : (date-1)/2-1; // 96-97 are one year (-1)
    for(j=data->index[idx],n=0;j<data->index[idx+1];++j,++n) {
      output[j]=0;
      delta[n]=data->data[j][0]*arg[0]+data->data[j][1]*arg[1]+
        arg[2+(int) data->data[j][2]]+arg[offsetXi+j]+data->data[j][7]*arg[problem->power];;
    }
    for(i=0;i<problem->N;++i) {
      vectorNormal(seed[idx], problem->random_effects, vDraws);
      if(year>-1) {
        denominator=arg[problem->year_dummies+year];
      } else {
        denominator=1;
      }

      for(j=data->index[idx],n=0;j<data->index[idx+1];++j,++n) {
        re = arg[offsetSigma+problem->product]*vDraws[problem->product]*data->data[j][0];

        format=data->data[j][2];

        covarMatrixOffset=problem->parameters+format*problem->demoCharacteristics;

        nominator[n]=re+delta[n]+arg[offsetSigma+format]*vDraws[format];

        for(k=0;k<problem->demoCharacteristics;++k) {
          nominator[n]+=arg[covarMatrixOffset+k]*data->demographics[i+drawsIndex][k];
        }
        nominator[n]=exp(nominator[n]);
        denominator+=nominator[n];
      }
      for(j=data->index[idx],n=0;j<data->index[idx+1];++j,++n) {
        output[j]+=nominator[n]/denominator;
      }
    }
    for(j=data->index[idx];j<data->index[idx+1];++j) {
      output[j]/=problem->N;
    }
  } else {
    idx-=cut;
    date = idx/problem->demoGroups;
    offsetDate = date*problem->market;
    group = idx % problem->demoGroups;

    int year = (date<3) ? -1 : (date-1)/2-1; // 96-97 are one year (-1)

    drawsIndex = idx*problem->P;
    offsetOutput = data->index[cut]+idx*problem->product;
    for(j=offsetOutput;j<offsetOutput+problem->product;++j) {
      output[j]=0;
    }

    for(z=drawsIndex,i=0;i<problem->P;++z,++i) {
      for(s=0;s<problem->Pn;++s) {
        if(year>-1) {
          denominator=arg[problem->year_dummies+year];
        } else {
          denominator=1;
        }
        market = data->demoGroups[z][0];

        vectorNormal(seed[idx+cut], problem->random_effects, vDraws);
        for(j=data->index[offsetDate+market],n=0;j<data->index[offsetDate+market+1];++j,++n) {
          re = arg[offsetSigma+problem->product]*vDraws[problem->product]*data->data[j][0];
          format=data->data[j][2];

          covarMatrixOffset=10+format*problem->demoCharacteristics;

          nominator[n]=re+data->data[j][0]*arg[0]+
            data->data[j][1]*arg[1]+arg[2+format]+arg[offsetXi+j]+arg[offsetSigma+format]*vDraws[format]+
            data->data[j][7]*arg[problem->power];

          for(k=0;k<problem->demoCharacteristics;++k) {
            nominator[n]+=arg[covarMatrixOffset+k]*data->demoGroups[z][k+1];
          }

          nominator[n]=exp(nominator[n]);
          denominator+=nominator[n];
        }

        for(j=data->index[offsetDate+market],n=0;j<data->index[offsetDate+market+1];++j,++n) {
          format=data->data[j][2];
          output[offsetOutput+format]+=nominator[n]/denominator;
        }
        for(j=data->index[offsetDate+market],n=0;j<data->index[offsetDate+market+1];++j,++n) {
          format=data->data[j][2];
        }
      }
    }

    for(j=offsetOutput;j<offsetOutput+problem->product;++j) {
      output[j]/=problem->P*problem->Pn;
    }
  }
}

void augmentedSimulation(F_TYPE *output, F_TYPE *arg) {
  int i,j;
  int date_market = problem->date*problem->market;
  int startSum = data->index[date_market];
  int step = problem->product*problem->demoGroups;
  int start = data->index[date_market]+step;
  int end = data->index[date_market]+problem->date*step;
  F_TYPE *output2;

  #ifdef SMP
    ThreadStructureSimulation *input;
  #endif

  // Allocate memory
  output2 = (F_TYPE *) malloc((data->index[date_market]+problem->demoGroups*problem->date*problem->product)*sizeof(F_TYPE));

  #ifndef SMP
    for(i=0;i<threadsN;++i) {
      simulation(output2, arg, problem, data, seed, date_market, i);
    }
  #else
    pthread_t thread_id[SMP];
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    input = (ThreadStructureSimulation *) malloc(SMP*sizeof(ThreadStructureSimulation));
    for(j=0;j<SMP;j++) {
      input[j].output=output2;
      input[j].arg=arg;
      input[j].problem=problem;
      input[j].data=data;
      input[j].seed=seed;
      input[j].cut=date_market;
      input[j].number=j;

      pthread_create(&thread_id[j], &attr, simulationThread, (void *) &input[j] );
    }
    pthread_attr_destroy(&attr);
    for(j=0;j<SMP;j++) {
      pthread_join(thread_id[j], NULL);
    }

    free(input);
  #endif

  for(i=0;i<start;i++) {
    output[i]=output2[i];
  }

  for(j=0;j<step;++j) {
    output[startSum+j]=0;
  }

  for(i=start;i<end;i+=step) {
    for(j=0;j<step;++j) {
      output[startSum+j]+=output2[i+j];
    }
  }

  for(j=0;j<step;++j) {
    output[startSum+j]/=problem->date;
  }

  free(output2);
}
