/* Copyright (c) 2003-2004 Ecole centrale de Lyon
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 2.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "fdtd3d.h"
#include <zlib.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "utils.h"
#ifdef HAVE_MPI
#include <mpi.h>
#endif


static int fill_block(varBlockList *blocklist, int num_block,
		      int nx, int ny, int nz,
		      int off_x, int off_y, int off_z,
		      double *orig_index, double *orig_absor,
		      int orig_nx, int orig_ny, int orig_nz,
		      double cdtdx, double delta_t, double pml_decay)
{
  varBlock *blocks=blocklist->blocks;
  varBlock *block=blocks+num_block;
  int i,j,k,orig_x,orig_y,orig_z,block_type;
  int has_absor=0;
  double dt_decay=delta_t*pml_decay;
  double *tmp_dec=NULL;
  block->nx=nx;
  block->ny=ny;
  block->nz=nz;
  block->off_x=off_x;
  block->off_y=off_y;
  block->off_z=off_z;

  assert(nx && ny && nz); // We don't want an empty block

  block_type=BLOCK_PML_NONE;

  // pointers to neighbours
  if(off_x<0)
    {
      block_type|=BLOCK_PML_FACE_X;
      block->block_xm=blocklist->empty_block;
    }
  else
    block->block_xm = block - blocklist->ny*blocklist->nz;
  if(off_x>=orig_nx)
    {
      block_type|=BLOCK_PML_FACE_X;
      block->block_xp=blocklist->empty_block;
    }
  else
    block->block_xp = block + blocklist->ny*blocklist->nz;
  if(off_y<0)
    {
      block_type|=BLOCK_PML_FACE_Y;
      block->block_ym=blocklist->empty_block;
    }
  else
    block->block_ym = block - blocklist->nz;
  if(off_y>=orig_ny)
    {
      block_type|=BLOCK_PML_FACE_Y;
      block->block_yp=blocklist->empty_block;
    }
  else
    block->block_yp = block + blocklist->nz;
  if(off_z<0)
    {
      block_type|=BLOCK_PML_FACE_Z;
      block->block_zm=blocklist->empty_block;
    }
  else
    block->block_zm = block - 1;
  if(off_z>=orig_nz)
    {
      block_type|=BLOCK_PML_FACE_Z;
      block->block_zp=blocklist->empty_block;
    }
  else
    block->block_zp = block + 1;

#ifdef HAVE_MPI
  if(block_type != BLOCK_PML_NONE)
    // Gruik gruik, but we need the pointer to be non-NULL, and
    // it should segfault anyway if we try to access it
    block->PML=(varPMLblock *) 1;

  if(blocklist->mpi_node!=block->mpi_node)
    { // Allocate fields only for blocks near the calculation
      if(block->block_ym->mpi_node==blocklist->mpi_node ||
	 block->block_zm->mpi_node==blocklist->mpi_node)
	block->Ex=myalloc(nx*ny*nz*sizeof(double));
      if(block->block_xm->mpi_node==blocklist->mpi_node ||
	 block->block_zm->mpi_node==blocklist->mpi_node)
	block->Ey=myalloc(nx*ny*nz*sizeof(double));
      if(block->block_xm->mpi_node==blocklist->mpi_node ||
	 block->block_ym->mpi_node==blocklist->mpi_node)
	block->Ez=myalloc(nx*ny*nz*sizeof(double));
      if(block->block_yp->mpi_node==blocklist->mpi_node ||
	 block->block_zp->mpi_node==blocklist->mpi_node)
	block->HHx=myalloc(nx*ny*nz*sizeof(double));
      if(block->block_xp->mpi_node==blocklist->mpi_node ||
	 block->block_zp->mpi_node==blocklist->mpi_node)
	block->HHy=myalloc(nx*ny*nz*sizeof(double));
      if(block->block_xp->mpi_node==blocklist->mpi_node ||
	 block->block_yp->mpi_node==blocklist->mpi_node)
	block->HHz=myalloc(nx*ny*nz*sizeof(double));
      return nx*ny*nz;
    }
#endif

  block->cn=myalloc(nx*ny*nz*sizeof(double));
  if(orig_absor)
    tmp_dec=myalloc(nx*ny*nz*sizeof(double));
  
  // Fill the index array
#define posit(x,y,z) (((x)*ny+(y))*nz+(z))
#define orig_posit(x,y,z) (((x)*orig_ny+(y))*orig_nz+(z))
  for(i=0;i<nx;i++)
    {
      orig_x=off_x+i;
      if(orig_x<0) orig_x=0;
      if(orig_x>=orig_nx) orig_x=orig_nx-1;
      for(j=0;j<ny;j++)
	{
	  orig_y=off_y+j;
	  if(orig_y<0) orig_y=0;
	  if(orig_y>=orig_ny) orig_y=orig_ny-1;
	  for(k=0;k<nz;k++)
	    {
	      orig_z=off_z+k;
	      if(orig_z<0) orig_z=0;
	      if(orig_z>=orig_nz) orig_z=orig_nz-1;
	      block->cn[posit(i,j,k)]=SQUARE(cdtdx/orig_index[orig_posit(orig_x,orig_y,orig_z)]);
	      if(orig_absor && orig_absor[orig_posit(orig_x,orig_y,orig_z)])
		{
		  tmp_dec[posit(i,j,k)]=CELERITY*delta_t*orig_absor[orig_posit(orig_x,orig_y,orig_z)]/
		    orig_index[orig_posit(orig_x,orig_y,orig_z)];
		  has_absor=1;
		}
	    }
	}
    }
#undef posit
#undef orig_posit
  
  // Check whether we already have such an index block
  blocklist->index_crc[num_block]=crc32(0,(void *)block->cn,nx*ny*nz*sizeof(double));
  for(i=0;i<num_block;i++)
    {
      if(blocks[i].nx*blocks[i].ny*blocks[i].nz == nx*ny*nz &&
	 blocklist->index_crc[i] == blocklist->index_crc[num_block])
	{
	  block->Ex=block->cn;
	  memset(block->Ex,0,nx*ny*nz*sizeof(double));
	  block->cn=blocks[i].cn;
	  break;
	}
    }

  if(block_type==BLOCK_PML_NONE && has_absor)
    {
      // Use special absorbing blocks only outside of the PML's
      block->absor=blocklist->AbsBlocks + blocklist->num_absblocks++;
      block->absor->dec=tmp_dec;
      blocklist->decay_crc[num_block]=crc32(0,(void *)tmp_dec,nx*ny*nz*sizeof(double));
      for(i=0;i<num_block;i++)
	{
	  if(blocks[i].absor &&
	     blocks[i].nx*blocks[i].ny*blocks[i].nz == nx*ny*nz &&
	     blocklist->decay_crc[i] == blocklist->decay_crc[num_block])
	    {
	      block->absor->sumEx=tmp_dec;
	      memset(block->absor->sumEx,0,nx*ny*nz*sizeof(double));
	      block->absor->dec=blocks[i].absor->dec;
	      break;
	    }
	}
      if(block->absor->sumEx==NULL)
	block->absor->sumEx=myalloc(nx*ny*nz*sizeof(double));
      block->absor->sumEy=myalloc(nx*ny*nz*sizeof(double));
      block->absor->sumEz=myalloc(nx*ny*nz*sizeof(double));
    }
  else if(orig_absor)
    block->Ey=tmp_dec; // Allocated and unused, already set to zero
  
  if(block->Ex==NULL)
    block->Ex=myalloc(nx*ny*nz*sizeof(double));
  if(block->Ey==NULL)
    block->Ey=myalloc(nx*ny*nz*sizeof(double));
  block->Ez=myalloc(nx*ny*nz*sizeof(double));
  block->HHx=myalloc(nx*ny*nz*sizeof(double));
  block->HHy=myalloc(nx*ny*nz*sizeof(double));
  block->HHz=myalloc(nx*ny*nz*sizeof(double));

  if(block_type==BLOCK_PML_NONE)
    return nx*ny*nz;

  block->PML=blocklist->PMLblocks + blocklist->num_pmlblocks;

  block->PML->x_decay=(1-dt_decay/2)/(1+dt_decay/2);
  block->PML->y_decay=(1-dt_decay/2)/(1+dt_decay/2);
  block->PML->z_decay=(1-dt_decay/2)/(1+dt_decay/2);
  block->PML->x_rotcoef=1;
  block->PML->y_rotcoef=1;
  block->PML->z_rotcoef=1;

  if(block_type&BLOCK_PML_FACE_X)
    {
      block->PML->sumrotEx=myalloc(nx*ny*nz*sizeof(double));
      block->PML->sumrotHHx=myalloc(nx*ny*nz*sizeof(double));
    }
  if(block_type&BLOCK_PML_FACE_Y)
    {
      block->PML->sumrotEy=myalloc(nx*ny*nz*sizeof(double));
      block->PML->sumrotHHy=myalloc(nx*ny*nz*sizeof(double));
    }
  if(block_type&BLOCK_PML_FACE_Z)
    {
      block->PML->sumrotEz=myalloc(nx*ny*nz*sizeof(double));
      block->PML->sumrotHHz=myalloc(nx*ny*nz*sizeof(double));
    }

  switch(block_type)
    {
    case BLOCK_PML_FACE_X:
    case BLOCK_PML_FACE_Y:
    case BLOCK_PML_FACE_Z:
      // For faces, we use only the necessary fields
      break;
    default:
      // Here some fields are computed but unused, let's compute them
      // all in the same memory area
      if(!(block_type&BLOCK_PML_FACE_X))
	{
	  block->PML->sumrotEx=blocklist->garbage;
	  block->PML->sumrotHHx=blocklist->garbage;
	}
      if(!(block_type&BLOCK_PML_FACE_Y))
	{
	  block->PML->sumrotEy=blocklist->garbage;
	  block->PML->sumrotHHy=blocklist->garbage;
	}
      if(!(block_type&BLOCK_PML_FACE_Z))
	{
	  block->PML->sumrotEz=blocklist->garbage;
	  block->PML->sumrotHHz=blocklist->garbage;
	}
      block->PML->sumEx=blocklist->garbage;
      block->PML->sumEy=blocklist->garbage;
      block->PML->sumEz=blocklist->garbage;
      block->PML->sumHHx=blocklist->garbage;
      block->PML->sumHHy=blocklist->garbage;
      block->PML->sumHHz=blocklist->garbage;
    }
  if((block_type&BLOCK_PML_FACE_Y)&&(block_type&BLOCK_PML_FACE_Z))
    {
      block->PML->sumEx=myalloc(nx*ny*nz*sizeof(double));
      block->PML->sumHHx=myalloc(nx*ny*nz*sizeof(double));
      block->PML->x_decay=(1-dt_decay)/(1+dt_decay);
      block->PML->x_sumcoef=SQUARE(dt_decay)/(1+dt_decay);
    }
  if((block_type&BLOCK_PML_FACE_Z)&&(block_type&BLOCK_PML_FACE_X))
    {
      block->PML->sumEy=myalloc(nx*ny*nz*sizeof(double));
      block->PML->sumHHy=myalloc(nx*ny*nz*sizeof(double));
      block->PML->y_decay=(1-dt_decay)/(1+dt_decay);
      block->PML->y_sumcoef=SQUARE(dt_decay)/(1+dt_decay);
    }
  if((block_type&BLOCK_PML_FACE_X)&&(block_type&BLOCK_PML_FACE_Y))
    {
      block->PML->sumEz=myalloc(nx*ny*nz*sizeof(double));
      block->PML->sumHHz=myalloc(nx*ny*nz*sizeof(double));
      block->PML->z_decay=(1-dt_decay)/(1+dt_decay);
      block->PML->z_sumcoef=SQUARE(dt_decay)/(1+dt_decay);
    }

  switch(block_type)
    {
    case BLOCK_PML_FACE_X:
      block->PML->x_rotcoef=1+dt_decay/2;
      block->PML->x_sumrotcoef=dt_decay;
      break;
    case BLOCK_PML_FACE_Y:
      block->PML->y_rotcoef=1+dt_decay/2;
      block->PML->y_sumrotcoef=dt_decay;
      break;
    case BLOCK_PML_FACE_Z:
      block->PML->z_rotcoef=1+dt_decay/2;
      block->PML->z_sumrotcoef=dt_decay;
      break;
    case BLOCK_PML_EDGE_YZ:
      block->PML->x_rotcoef=1/(1+dt_decay);
      block->PML->y_sumrotcoef=dt_decay/(1+dt_decay/2);
      block->PML->z_sumrotcoef=dt_decay/(1+dt_decay/2);
      break;
    case BLOCK_PML_EDGE_ZX:
      block->PML->y_rotcoef=1/(1+dt_decay);
      block->PML->z_sumrotcoef=dt_decay/(1+dt_decay/2);
      block->PML->x_sumrotcoef=dt_decay/(1+dt_decay/2);
      break;
    case BLOCK_PML_EDGE_XY:
      block->PML->z_rotcoef=1/(1+dt_decay);
      block->PML->x_sumrotcoef=dt_decay/(1+dt_decay/2);
      block->PML->y_sumrotcoef=dt_decay/(1+dt_decay/2);
      break;
    case BLOCK_PML_CORNER:
      block->PML->x_rotcoef=(1+dt_decay/2)/(1+dt_decay);
      block->PML->y_rotcoef=(1+dt_decay/2)/(1+dt_decay);
      block->PML->z_rotcoef=(1+dt_decay/2)/(1+dt_decay);
      block->PML->x_sumrotcoef=dt_decay/(1+dt_decay);
      block->PML->y_sumrotcoef=dt_decay/(1+dt_decay);
      block->PML->z_sumrotcoef=dt_decay/(1+dt_decay);
      break;
    }

  blocklist->num_pmlblocks++;
  block->PML->block_type=block_type;
  return nx*ny*nz;
}

varBlockList *index_array_to_blocks_with_pml(SimulDesc *simul)
{
  int block_nx,block_ny,block_nz,total_cells,cur_block;
  int npml,bs,i,j,k,n;
  int off_x,off_y,off_z;
  varBlockList *a;
  varSpace *space=simul->space;
  double cdtdx=CELERITY*simul->simul->delta_t/space->delta_x;

  if(space==NULL || space->index==NULL)
    return NULL;

  block_nx=space->nx/space->block_size;
  if(space->nx%space->block_size)
    block_nx++;
  block_ny=space->ny/space->block_size;
  if(space->ny%space->block_size)
    block_ny++;
  block_nz=space->nz/space->block_size;
  if(space->nz%space->block_size)
    block_nz++;

  a=myalloc(sizeof(varBlockList));
  a->nx=block_nx+2;
  a->ny=block_ny+2;
  a->nz=block_nz+2;
  a->n=a->nx*a->ny*a->nz;
  a->blocks=myalloc(sizeof(varBlock)*a->n);
  a->index_crc=myalloc(sizeof(unsigned long)*a->n);
  a->PMLblocks=myalloc(sizeof(varPMLblock)*(a->n - block_nx*block_ny*block_nz));
  if(space->absor)
    {
      a->AbsBlocks=myalloc(sizeof(varAbsBlock)*a->n);
      a->decay_crc=myalloc(sizeof(unsigned long)*a->n);
    }

  total_cells=(space->nx+2*space->num_pml)*(space->ny+2*space->num_pml)*(space->nz+2*space->num_pml);

  a->empty_block=myalloc(sizeof(varBlock));
  a->empty_block->nx=MAX(space->num_pml,space->block_size);
  a->empty_block->ny=a->empty_block->nx;
  a->empty_block->nz=a->empty_block->nx;
  a->empty=myalloc(CUBE(a->empty_block->nx)*sizeof(double));
  a->empty_block->Ex=a->empty;
  a->empty_block->Ey=a->empty;
  a->empty_block->Ez=a->empty;
  a->empty_block->HHx=a->empty;
  a->empty_block->HHy=a->empty;
  a->empty_block->HHz=a->empty;
  a->garbage=myalloc(CUBE(a->empty_block->nx)*sizeof(double));

  npml=space->num_pml;
  bs=space->block_size;

#ifdef HAVE_MPI
  MPI_Comm_rank(MPI_COMM_WORLD, &(a->mpi_node));
  MPI_Comm_size(MPI_COMM_WORLD, &(a->mpi_nnodes));
  a->mpi_nblocks=(a->n-1)/a->mpi_nnodes+1;
  a->mpi_startblock=a->mpi_nblocks*a->mpi_node; // Could be improved
  a->mpi_endblock=MIN(a->n,a->mpi_startblock+a->mpi_nblocks);
  for(i = 0; i < a->n; i++)
    a->blocks[i].mpi_node=i/a->mpi_nblocks;
  a->empty_block->mpi_node=a->mpi_node;
#endif

#define inc_block(NX,NY,NZ,off_x,off_y,off_z) fill_block(a,cur_block++,NX,NY,NZ,off_x,off_y,off_z,space->index,space->absor,space->nx,space->ny,space->nz,cdtdx,simul->simul->delta_t,space->pml_decay);

  cur_block=0;
  n=0;
  off_x=off_y=off_z=-npml;

  n+=inc_block(npml,npml,npml,-npml,-npml,-npml);
  for(k=0;k<block_nz;k++)
    n+=inc_block(npml,npml,MIN(bs,space->nz-k*bs),-npml,-npml,k*bs);
  n+=inc_block(npml,npml,npml,-npml,-npml,space->nz);
  for(j=0;j<block_ny;j++)
    {
      n+=inc_block(npml,MIN(bs,space->ny-j*bs),npml,-npml,j*bs,-npml);
      for(k=0;k<block_nz;k++)
	n+=inc_block(npml,MIN(bs,space->ny-j*bs),MIN(bs,space->nz-k*bs),-npml,j*bs,k*bs);
      n+=inc_block(npml,MIN(bs,space->ny-j*bs),npml,-npml,j*bs,space->nz);
    }
  n+=inc_block(npml,npml,npml,-npml,space->ny,-npml);
  for(k=0;k<block_nz;k++)
    n+=inc_block(npml,npml,MIN(bs,space->nz-k*bs),-npml,space->ny,k*bs);
  n+=inc_block(npml,npml,npml,-npml,space->ny,space->nz);

  for(i=0;i<block_nx;i++)
    {
      n+=inc_block(MIN(bs,space->nx-i*bs),npml,npml,i*bs,-npml,-npml);
      for(k=0;k<block_nz;k++)
	n+=inc_block(MIN(bs,space->nx-i*bs),npml,MIN(bs,space->nz-k*bs),i*bs,-npml,k*bs);
      n+=inc_block(MIN(bs,space->nx-i*bs),npml,npml,i*bs,-npml,space->nz);
      for(j=0;j<block_ny;j++)
	{
	  n+=inc_block(MIN(bs,space->nx-i*bs),MIN(bs,space->ny-j*bs),npml,i*bs,j*bs,-npml);
	  for(k=0;k<block_nz;k++)
	    n+=inc_block(MIN(bs,space->nx-i*bs),MIN(bs,space->ny-j*bs),MIN(bs,space->nz-k*bs),i*bs,j*bs,k*bs);
	  n+=inc_block(MIN(bs,space->nx-i*bs),MIN(bs,space->ny-j*bs),npml,i*bs,j*bs,space->nz);
	}
      n+=inc_block(MIN(bs,space->nx-i*bs),npml,npml,i*bs,space->ny,-npml);
      for(k=0;k<block_nz;k++)
	n+=inc_block(MIN(bs,space->nx-i*bs),npml,MIN(bs,space->nz-k*bs),i*bs,space->ny,k*bs);
      n+=inc_block(MIN(bs,space->nx-i*bs),npml,npml,i*bs,space->ny,space->nz);
    }

  n+=inc_block(npml,npml,npml,space->nx,-npml,-npml);
  for(k=0;k<block_nz;k++)
    n+=inc_block(npml,npml,MIN(bs,space->nz-k*bs),space->nx,-npml,k*bs);
  n+=inc_block(npml,npml,npml,space->nx,-npml,space->nz);
  for(j=0;j<block_ny;j++)
    {
      n+=inc_block(npml,MIN(bs,space->ny-j*bs),npml,space->nx,j*bs,-npml);
      for(k=0;k<block_nz;k++)
	n+=inc_block(npml,MIN(bs,space->ny-j*bs),MIN(bs,space->nz-k*bs),space->nx,j*bs,k*bs);
      n+=inc_block(npml,MIN(bs,space->ny-j*bs),npml,space->nx,j*bs,space->nz);
    }
  n+=inc_block(npml,npml,npml,space->nx,space->ny,-npml);
  for(k=0;k<block_nz;k++)
    n+=inc_block(npml,npml,MIN(bs,space->nz-k*bs),space->nx,space->ny,k*bs);
  n+=inc_block(npml,npml,npml,space->nx,space->ny,space->nz);

#undef inc_block
  
  assert(n == total_cells);
  assert(cur_block == a->n);
#ifndef HAVE_MPI
  assert(a->num_pmlblocks == a->n - block_nx*block_ny*block_nz);
#endif
  assert(a->num_absblocks <= a->n);

  free(a->index_crc);
  a->index_crc=NULL;
  free(space->index);
  space->index=NULL;
  if(space->absor)
    {
      free(a->decay_crc);
      a->decay_crc=NULL;
      free(space->absor);
      space->absor=NULL;
    }

#ifdef HAVE_MPI
  for(i=a->mpi_startblock;i<a->mpi_endblock;i++)
    {
      if(a->blocks[i].block_xm->mpi_node != a->mpi_node)
	a->mpi_blocks_before++;
      if(a->blocks[i].block_ym->mpi_node != a->mpi_node)
	a->mpi_blocks_before++;
      if(a->blocks[i].block_zm->mpi_node != a->mpi_node)
	a->mpi_blocks_before++;
      if(a->blocks[i].block_xp->mpi_node != a->mpi_node)
	a->mpi_blocks_after++;
      if(a->blocks[i].block_yp->mpi_node != a->mpi_node)
	a->mpi_blocks_after++;
      if(a->blocks[i].block_zp->mpi_node != a->mpi_node)
	a->mpi_blocks_after++;
    }
#endif // HAVE_MPI

  return a;
}

varBlock *search_block(const varBlockList *l,int x,int y,int z)
{
  int i;
  for(i=0;i<l->n;i++)
    if(l->blocks[i].PML==NULL &&
       x>=l->blocks[i].off_x && x<l->blocks[i].off_x+l->blocks[i].nx &&
       y>=l->blocks[i].off_y && y<l->blocks[i].off_y+l->blocks[i].ny &&
       z>=l->blocks[i].off_z && z<l->blocks[i].off_z+l->blocks[i].nz)
      return l->blocks+i;
  return NULL;
}

int offset_in_block(const varBlock *b,int x, int y, int z)
{
  return ((x-b->off_x)*b->ny+y-b->off_y)*b->nz+z-b->off_z;
}

void destroy_blocklist(varBlockList *b)
{
  int i,j;
  for(i=0;i<b->n;i++)
    {
      for(j=0;j<i;j++)
	if(b->blocks[i].cn==b->blocks[j].cn)
	  break;
      if(i==j)
	free(b->blocks[i].cn);
      if(b->blocks[i].absor)
	{
	  for(j=0;j<i;j++)
	    if(b->blocks[j].absor &&
	       b->blocks[i].absor->dec==b->blocks[j].absor->dec)
	      break;
	  if(i==j)
	    free(b->blocks[i].absor->dec);
	  free(b->blocks[i].absor->sumEx);
	  free(b->blocks[i].absor->sumEy);
	  free(b->blocks[i].absor->sumEz);
	}
      free(b->blocks[i].Ex);
      free(b->blocks[i].Ey);
      free(b->blocks[i].Ez);
      free(b->blocks[i].HHx);
      free(b->blocks[i].HHy);
      free(b->blocks[i].HHz);
    }
  free(b->blocks);
  free(b->index_crc);
  free(b->decay_crc);
  for(i=0;i<b->num_pmlblocks;i++)
    {
#define free_ifok(pointer) if((pointer)!=b->empty && (pointer)!=b->garbage) free(pointer)
      free_ifok(b->PMLblocks[i].sumrotEx);
      free_ifok(b->PMLblocks[i].sumrotEy);
      free_ifok(b->PMLblocks[i].sumrotEz);
      free_ifok(b->PMLblocks[i].sumrotHHx);
      free_ifok(b->PMLblocks[i].sumrotHHy);
      free_ifok(b->PMLblocks[i].sumrotHHz);
      free_ifok(b->PMLblocks[i].sumEx);
      free_ifok(b->PMLblocks[i].sumEy);
      free_ifok(b->PMLblocks[i].sumEz);
      free_ifok(b->PMLblocks[i].sumHHx);
      free_ifok(b->PMLblocks[i].sumHHy);
      free_ifok(b->PMLblocks[i].sumHHz);
#undef free_ifok
    }
  free(b->PMLblocks);
  free(b->AbsBlocks);
  free(b->empty_block);
  free(b->empty);
  free(b->garbage);
  free(b);
}

void block_extract_field(const varBlockList *b,const varSpace *space,int field,int dir,double constant,double *dest,double *tmpbuf)
{
  int i,j,k,p;
  varBlock *bl;
  int nx,ny,nz;
  int orig_ny=space->ny,orig_nz=space->nz;
  double *tmp_field=NULL;

#define posit(x,y,z) (((x)*ny+(y))*nz+(z))
#define orig_posit(x,y,z) (((x)*orig_ny+(y))*orig_nz+(z))

  for(p=0;p<b->n;p++)
    {
      bl=b->blocks+p;
      if(bl->PML)
	continue;
      nx=bl->nx;
      ny=bl->ny;
      nz=bl->nz;
#ifdef HAVE_MPI
      // MPI_Barrier(MPI_COMM_WORLD);
      if(b->mpi_node==bl->mpi_node)
	{
#endif // HAVE_MPI
	  switch(field)
	    {
	    case FIELD_E:
	      switch(dir)
		{
		case DIR_X:
		  tmp_field=bl->Ex;
		  break;
		case DIR_Y:
		  tmp_field=bl->Ey;
		  break;
		case DIR_Z:
		  tmp_field=bl->Ez;
		  break;
		default:
		  panic("unknown direction.");
		}
	      break;
	    case FIELD_H:
	      switch(dir)
		{
		case DIR_X:
		  tmp_field=bl->HHx;
		  break;
		case DIR_Y:
		  tmp_field=bl->HHy;
		  break;
		case DIR_Z:
		  tmp_field=bl->HHz;
		  break;
		default:
		  panic("unknown direction.");
		}
	      break;
	    default:
	      panic("unknown field.");
	    }
#ifdef HAVE_MPI
	  if(b->mpi_node) // We need to send it to the master
	    {
	      MPI_Send(tmp_field, bl->nx*bl->ny*bl->nz, MPI_DOUBLE, 0,
		       TAG_CARTOOUT, MPI_COMM_WORLD);
	      continue;
	    }
	}
      else if(b->mpi_node==0)
	{
	  MPI_Recv(tmpbuf, bl->nx*bl->ny*bl->nz, MPI_DOUBLE, bl->mpi_node,
		   TAG_CARTOOUT, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	  tmp_field=tmpbuf;
	}
      else
	continue;
#endif // HAVE_MPI

      for(i=0;i<nx;i++)
	for(j=0;j<ny;j++)
	  for(k=0;k<nz;k++)
	    dest[orig_posit(i+bl->off_x,j+bl->off_y,k+bl->off_z)]=tmp_field[posit(i,j,k)]*constant;
    }

#undef posit
#undef orig_posit
}
