// $Id: base_alg.c,v 1.1 2001/04/04 22:23:06 tbraun Exp $
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <getopt.h>
#include <sys/time.h>

#define MAXLEN  25

//needleman wunsch alignment parameters
static const int GAPOCOST=2;
static const int GAPECOST=1;
static const int MISMCOST=1;
static const int MATCHRWD=2;


typedef enum {RANDOM = 0, 
	      NECKLACE,
	      STDINPUT,
	      MODNECK,
	      HEUNECK} algorithms;

static const char* algnames[5] ={"RANDOM","NECKLACE","STDINPUT","MODNECK",
				 "HEUNECK"};

typedef enum {A = 0,
	   T = 1,
	   C = 2,
	   G = 3} bases;

static const char basenames[4] = {'A','T','C','G'};

//general parameters

unsigned long long TRIES=5000;
int CRITICAL_TEMP=10;
algorithms ALGORITHM=RANDOM;
int DEBUG=0;
int LENGTH=10;


static char* taglist[500000];
int taglistlen;
char oldtag[MAXLEN+1];

float Tmelt(char [MAXLEN+1], char [MAXLEN+1]);
char* mutate (char* tag, int offset, int framesize);
char* generate_random_tag(int length);
char* tag_complement(int length, char* tag);
char* getFirstNecklace();
char* getNextNecklace();
char* toBase(int* a, int length);

static const float G_NN[4][4] = {-1.02f,-1.43f,-1.16f,-0.73f,
		 -1.38f,-1.77f,-2.09f,-1.16f,
		 -1.46f,-2.28f,-1.77f,-1.43f,
		 -0.60f,-1.46f,-1.38f,-1.02f};
static const float S_NN[4][4] = {-23.6f,-23.0f,-16.1f,-18.8f,
		 -19.3f,-15.6f,-25.5f,-16.1f,
		 -20.3f,-28.4f,-15.6f,-23.0f,
		 -18.5f,-20.3f,-19.3f,-23.6f};
static const float H_NN[4][4] = {-8400.0f,-8600.0f,-6100.0f,-6500.0f,
		 -7400.0f,-6700.0f,-10100.0f,-6100.0f,
		 -7700.0f,-11100.0f,-6700.0f,-8600.0f,
		 -6300.0f,-7700.0f,-7400.0f,-8400.0f};
// penalties halved because they will be applied twice...
// we want penalties to be applied in a strand-orientn-indep manner

static const float gcinitG =1.82f;
static const float gcinitS =-5.9f;
static const float gcinitH = 0.0f;
static const float atinitG = 2.8f;
static const float atinitS = -9.0f;
static const float atinitH = 0.0f;
static const float attermG = 0.4f;
static const float attermS = 0.0f;
static const float attermH = 400.0f;

const float RlnCt = -18.3f; //gas constant * nat log of strand concentration

// "necklace" stuff
const int k = 4;
int necklace[MAXLEN+2];
int first_run = 1;
int n_i=1;

unsigned long long spacing;
static const unsigned long long maxNecklaces[] = {
   1,
   4, 
   10, 
   24,      // maxNecklaces[t] contains # of necklaces of 
   70,      // length t
   208, 
   700, 
   2344, 
   8230, 
   29144, 
   104968, 
   381304, 
   1398500, 
   5162224, 
   19175140, 
   71582944, 
   268439590, 
   1010580544, 
   3817763740U, 
   14467258264U, 
   54975633976U
};


// HEUNECK parameters
static int MAX_MUTATION_TRIES = 5;
static int MUTATION_FRAMESIZE = 3;

void print_usage(char* name) {
  printf ("Usage: %s [OPTIONS]\n\n",name);
  printf("generate tags using nearest neighbour melting ");
  printf("temperature calculation. \n\n OPTIONS\n");
  printf("  -a     specify the algorithm to use. (default:0)\n");
  printf("           0 : RANDOM    (randomly generate new tags)\n");
  printf("           1 : NECKLACE  (use necklaces for generation)\n");
  printf("           2 : STDINPUT  (read tags from STDIN (spaced with \\n))\n");
  printf("           3 : MODNECK   (use necklaces, but spread them evenly\n");
  printf("                          across whole search space.)\n");
  printf("           4 : HEUNECK   (necklaces plus cool heuristic approach!)\n");
  printf("  -d     specify the debug value (default:0)\n");
  printf("           0 : no debug output. \n");
  printf("           1 : alignment and stacking information.\n");
  printf("           2 : same as 1, plus enthalpy and entropy");
  printf("           4 : HEUNECK infos");
  printf("values. \n");
  printf("  -i     specify maximum number of iterations (default:5000)\n");
  printf("           some algorithms might finish before this number\n");
  printf("           is reached.\n");
  printf("  -t     specify the highest temp. where non-\n");
  printf("           -complementary tags are still allowed to");
  printf(" hybridize (\"c\")\n");
  printf("  -l     specify the length of tags (default: 10) \n");
  printf("  --help print this message and exit\n\n");
}

int parse_args (int argc, char *argv[])
{
  int oid;
  int temp;

  while (1)
    {
      static struct option long_options[] = {
	{"help", 0, 0, 0},
	{0, 0, 0, 0}
      };
      /* `getopt_long' stores the option index here. */
      int option_index = 0;

      oid = getopt_long (argc, argv, "a:d:i:t:l:", long_options, &option_index);

      /* Detect the end of the options. */
      if (oid == -1)
	break;

      switch (oid)
	{
	case 0:
	  if (!strcmp (long_options[option_index].name, "help"))
	    {
	      print_usage(argv[0]);
	      return 1;
	    }
	  
	case 'a':
	  sscanf (optarg, "%d", &temp);
	  
	  switch (temp) {
	  case 0 : 
	    ALGORITHM = RANDOM;
	    break;
	  case 1 :
	    ALGORITHM = NECKLACE;
	    break;
	  case 2 :
	    ALGORITHM = STDINPUT;
	    break; 
	  case 3 :
	    ALGORITHM = MODNECK;
	    break;
	  case 4 :
	    ALGORITHM = HEUNECK;
	  }
	  break;
	case 'd':
	  sscanf (optarg, "%d", &DEBUG);
	  break;
	case 'i':
	  sscanf (optarg, "%d", &TRIES);
	  break;
	case 't':
	  sscanf (optarg, "%d", &CRITICAL_TEMP);
	  break;
	case 'l':
	  sscanf (optarg, "%d", &LENGTH);
	  break;
	case '?':
	  print_usage(argv[0]);
	  /* `getopt_long' already printed an error message. */
	  return 1;
	default:
	  abort();
	}
    }
  // -1 means: end reached --> continue
  return 0;
}

int main(int argc, char* argv[])
{
  char tag1[MAXLEN+1]; char tag2[MAXLEN+1];
  float Temp = 0.0f;
  unsigned long long i;
  int j;
  int finish = 0;
  char* current_tag;
  char* currtagcomp;

  // HEUNECK variables
  int mutation_tries_remaining;
  int mutation_offset;

  float curr_temp;
  float curr_comp_temp;
  float curr_max_temp;

  float new_tag_temp;
  float new_tag_comp_temp;
  float new_max_temp;
  
  char* new_tag;
  char* new_tag_comp;

  int currentTagBindsTooLow;

  srand(31337);
  i=rand();
  if (parse_args (argc, argv)) {
    //    printf("Problem in get_opt... BAEH!\n");
    return 1;
  }
  // 'bad' cases
  //printf("Melting temp %f\n",Tmelt("AGGAT","AGGT")-273);
  /* printf("Melting temp %f\n",Tmelt("AGG","AG")-273);
     printf("Melting temp %f\n",Tmelt("AAGG","AG")-273);
     printf("Melting temp %f\n",Tmelt("AG","AAGG")-273);
     printf("Melting temp %f\n",Tmelt("AGAAAAA","G")-273);
     printf("Melting temp %f\n",Tmelt("AAAAAAGAAAAAA","G")-273);
     printf("Melting temp %f\n",Tmelt("TTTTAGTTTTTTG","AG")-273);
     return(0); */
  taglistlen = 1;
  printf("Running %s with critical Temp=%i and %Lu tries. Taglength=%i/%i\n", 
	 algnames[ALGORITHM],CRITICAL_TEMP,TRIES,LENGTH,MAXLEN);
  fprintf(stderr, "Running %s with critical Temp=%i and %Lu tries. Taglength=%i/%i\n", algnames[ALGORITHM],CRITICAL_TEMP,TRIES,LENGTH,MAXLEN);

  switch (ALGORITHM) {
  case RANDOM: 
    taglist[0] = generate_random_tag(LENGTH); 
    break;
  case NECKLACE:
    taglist[0] = getFirstNecklace(LENGTH,0);
    break;
  case STDINPUT:
    taglist[0]=(char*) malloc(MAXLEN+1);
    fgets(taglist[0],MAXLEN+1,stdin);
    taglist[0][strlen(taglist[0])-1]='\0';
    break;
  case MODNECK:
    taglist[0] = getFirstNecklace(LENGTH,1);
    break;
  case HEUNECK:
    taglist[0] = getFirstNecklace(LENGTH,1);
    break;
  }
  
  printf("1 0\n");

  for (i=1;i<TRIES;i++){
      switch (ALGORITHM) {
      case RANDOM:
	current_tag = generate_random_tag(LENGTH); 
	break;
      case NECKLACE:
        if (!(current_tag = getNextNecklace(LENGTH))) finish=1;
	break;
      case STDINPUT:
	current_tag=(char*) malloc(MAXLEN+1);
	if (fgets(current_tag,MAXLEN+1,stdin))
	    current_tag[strlen(current_tag)-1]='\0';
	else finish = 1;
	break; 
      case MODNECK:
	  if (!(current_tag = getNextNecklace(LENGTH))) finish=1;
	  break;
      case HEUNECK:
        if (!(current_tag = getNextNecklace(LENGTH))) finish=1;
	break;
      }
      if (finish) break;

      // see if tag itself bind too weakly
      currentTagBindsTooLow = 0;
      if ((Tmelt(current_tag, current_tag)<273 + CRITICAL_TEMP * 1.1666 + 3.4)) { 
	currentTagBindsTooLow = 1;
	if (!(ALGORITHM == HEUNECK)) continue;
      }
	
      currtagcomp = tag_complement(strlen(current_tag),current_tag);
      
      //printf("\nnew current tag: %s and complement: %s\n",current_tag,currtagcomp);

      switch(ALGORITHM){
      case HEUNECK:
	
	mutation_tries_remaining = MAX_MUTATION_TRIES;

	for (j=0;j<taglistlen;j++){

	    curr_temp      = Tmelt(current_tag,taglist[j]);
	    curr_comp_temp = Tmelt(currtagcomp,taglist[j]); 
	    
	    curr_max_temp  = ( curr_temp > curr_comp_temp ) ? curr_temp : curr_comp_temp; 

	    if  ( curr_max_temp > 273+CRITICAL_TEMP || currentTagBindsTooLow) // current tag is bad
	      {

		if (DEBUG ==4)
		  printf ( " current tag rejected at temp %6.2f, j=%d/%d \n", curr_max_temp-273, j, taglistlen );

		// something goes wrong here at some point...
		if ( mutation_tries_remaining == 0){
		  // enough tries with the current tag - reject it.
		  //free(current_tag);
		  //free(currtagcomp);
		  break;
		}
		
		// try to find a tag that can coexist with taglist[j], 
		// by mutating up to MUTATION_FRAMESIZE times

		mutation_offset = 0;

		while (	( ( curr_max_temp > 273 + CRITICAL_TEMP )  ||  // ( still too high or ...
			  (Tmelt(current_tag, current_tag) < 273 + CRITICAL_TEMP *1.1666 + 3.4)) && // ... still too low) and
			( mutation_offset < MUTATION_FRAMESIZE ))    // ... more mutations allowed
		  {
		  
		  // mutate
		  
		  if (DEBUG ==4)
		    printf ( " current tag = %s \n", current_tag );

		  new_tag           = mutate (current_tag, mutation_offset, MUTATION_FRAMESIZE);
		  new_tag_comp      = tag_complement(strlen (new_tag), new_tag);

		  if (DEBUG ==4)
		    printf ( " mutated tag = %s \n", new_tag );
		  
		  mutation_offset++;

		  new_tag_temp      = Tmelt(new_tag, taglist[j]);
		  new_tag_comp_temp = Tmelt(new_tag_comp, taglist[j]);
		  
		  new_max_temp = ( new_tag_temp > new_tag_comp_temp ) ? new_tag_temp : new_tag_comp_temp;
		
		  if (DEBUG ==4)
		    printf ( " mutation yields new temp %6.2f \n", new_max_temp-273 );
		  
		  if (new_max_temp < curr_max_temp)
		    // the new tag is better than the original, so we replace the current tag 
		    {
		      strcpy(current_tag, new_tag);
		      currtagcomp    = new_tag_comp;
		      curr_max_temp  = new_max_temp;

		      free( new_tag );
		      free( new_tag_comp );
		    }
		  }
		

		  if ( curr_max_temp < 273 + CRITICAL_TEMP &&  
		       (Tmelt(current_tag, current_tag)>=273 + CRITICAL_TEMP * 1.6666 + 3.4) )
 
		  // mutation helped - the current tag can coexist with taglist[j]
		    {
		      mutation_tries_remaining--;
		    if (DEBUG ==4)
		      printf( " mutation successful!, remaining tries = %d\n", mutation_tries_remaining);
		    // increase total number of tries
		    i++;
		    if (i >= TRIES-1) break;
		    // start the for(j...) loop again to check current tag against all other tags
		    j = -1;
		    continue;
		  }
		else
		  // mutation didn't help - give up for this tag
		  {  
		    //free(current_tag);
		    //free(currtagcomp);
		    break;
		  }
	      }
	}
      	
	if (j==taglistlen) // current tag accepted 
	  {
	    taglist[taglistlen++]=current_tag;
	    //    printf("Current tag accepted\n");
	    printf("%i %i\n",j+1,i);
	  }
	break;



      default:  // used by all algorithms except for HEUNECK
	
	for (j=0;j<taglistlen;j++)
	  {
	    if ( (Tmelt(current_tag,taglist[j])>273+CRITICAL_TEMP) 
		 || (Tmelt(currtagcomp,taglist[j])>273+CRITICAL_TEMP) )
	      {
		free(current_tag);
		free(currtagcomp);
		break;
	      } 
	  }
	
	if (j==taglistlen) 
	  {
	    taglist[taglistlen++]=current_tag;
	    //    printf("Current tag accepted\n");
	    printf("%i %i\n",j+1,i);
	  }
      }
      
  }
  
  for (j=0;j<taglistlen;j++) {
    printf("Tag %i :%s  Tmelt with comp:%5.2f\n",j,taglist[j],Tmelt(taglist[j],taglist[j])-273);}
  
  printf("\nReal number of tags considered: %Lu\nRemember: %s --help for options.\n\n",i,argv[0]);
  if (ALGORITHM==MODNECK) printf("MODNECK: Spacing between necklaces was %d\n",spacing);
  return(0);
}


char* mutate ( char* tag, int offset, int framesize )
{
  char* result = (char*) malloc ( LENGTH+1 );
  int i;
  int c;
  strcpy ( result, tag );
  for ( i = offset; i < LENGTH; i+= framesize )
    {
      result[i] = basenames[ rand()%4 ];
    }
  
  return result;
}

char* generate_random_tag(int length)
{
  int i=0;
  char* tag=(char*) malloc(length+1);
  for (i=0; i<length;i++)
    {
      tag[i] = basenames[rand()%4];
    }
  tag[length]= 0;
  return tag;
}  

char* tag_complement(int length, char* tag)
{
  int i=0;
  char* comp=(char*) malloc(length+1);
  for (i=0; i<length; i++)
    {
      
      switch (tag[i]) {
      case 'A': 
	comp[length-i-1]='T';
	break;
      case 'C': 
	comp[length-i-1]='G';
	break;
      case 'G': 
	comp[length-i-1]='C';
	break;
      case 'T': 
	comp[length-i-1]='A';
	break;
      }
    }
  comp[length] = 0;
  //printf("%s\n",comp);
  return comp;
}
    
// This is the so-called FKM algorithm 
// for generating necklaces of length MAXLEN.
// I've taken it apart so that it generates
// one necklace at a time when called.
// A k-ary necklace is an equivalence class 
// of k-ary strings under rotation.
// Since we're dealing with DNA bases , k = 4.
//
// I put the paper in ~tbraun/536/project/papers/necklaces.ps
//

char* getFirstNecklace(int length, int allowSpacing)
{
  int i;
  int l;

  if (length > 20){
    l = 20;
  }
  else{
    l = length;
  }
  spacing = 1;
  
  if (allowSpacing) {
    if ( TRIES < maxNecklaces[l] ){
      spacing = maxNecklaces[l]/TRIES;
    }
  }
  
  for (i=0; i<length+1; i++)
    {
      necklace[i]=0;
    }
  n_i = length;
  first_run = 0;
  return (toBase(&necklace[1], length));
}


char*  getNextNecklace(int length)
{
  int i,j;
  char* result;
  int found_necklace;
  unsigned long long s;
  
  if (n_i == 0 || first_run)
    {
      return 0;
    }

  for ( s = 0; s < spacing; s++)
    {
      found_necklace = 0;
      while(!found_necklace)
	{
	  necklace[n_i]++;
	  for(j=1; j<=length-n_i; j++)
	    {
	      necklace[n_i+j] = necklace[j];
	}
	  if (length % n_i == 0)
	    {
	      found_necklace = 1;
	    }
	  n_i=length;
	  while (necklace[n_i]==k-1)
	    {
	      n_i--;
	    }
	  // return 0 if all necklaces have been generated
	  if (n_i == 0 && !found_necklace)
	    { 
	      return 0;
	    }
	}
    }
  result = toBase(&necklace[1], length);
  return result;
}

// This function converts an array of length 'length' of integers in
// the range [0..3] into a character array of DNA-bases
char* toBase(int* a, int length)
{
  int i;
  char* result=(char*) malloc(length+1);
  for (i=0; i<length; i++)
    {
      result[i] = basenames[a[i]];
    }
  //printf("\t\t\t%s\n",result);
  result[length]='\0';
  return result;
}


//calculate change in Gibbs free energy caused by a partially 
//complementary tag/antitag pair coming in contact.  Use this to ensure
//stability of the molecule at 37C.  Then use enthalpy and entropy to 
//calculate melting temperature.
float Tmelt(char tag1[MAXLEN+1], char tag2[MAXLEN+1])
{
  // dir: backtrace (arrows) array : 1 == vertical 2== diagonal 3== horizontal
  // a: scoring matrix
  
  int sawGC = 0;
  int stringlen = 0; int t1 = 0; int t2 = 0; int temph = 0; int tempv = 0; 
  int tempd = 0;
  char alignment[(2*MAXLEN)+1];
  int tag1out[(2*MAXLEN)+1];int tag2out[(2*MAXLEN)+1];
  int a[MAXLEN+1][MAXLEN+1]; int dir[MAXLEN+1][MAXLEN+1];
  int lent1 = 0; int lent2 = 0; int alnc = 0; 
  int t1prev = 0; int t2prev = 0; int t1p2 = 0; int t2p2 = 0;
  int Gx = 0; int Gy = 0;
  float Gtot = 0.0f; float Stot = 0.0; float Htot = 0.0; float Tm = 0.0;
  char t1ss[3] = "";
  int matches_in_a_row =0;

  lent1 = strlen(tag1); 
  lent2 = strlen(tag2);
   
  //  printf("%s %s\n",tag1,tag2);
  
  // backtrace array init
  // modify Needleman-wunsch the following way:
  // assume a gap BEFORE the first aligment, by changing initialisation
  // values. also, assume a gap BEHIND the last alignment, by changing
  // the score of the lower-left entry (to include the gap). This should
  // cause edges of the alignment to be treated equal with internal bases
  // 'grouping' alignments together and gaps towards the outside.
  // try aligning "AAGG" with "AG" to see the difference.

  // old version:
  /* for (t1 = 0; t1 < lent1+1; t1++) {
     for (t2 = 0; t2 < lent2+1; t2++) {
     if (t1 == 0) {
     dir[t1][t2] = 1;
     if (t2 == 1) 
     a[t1][t2] = -GAPOCOST;
     else if (t2>1) a[t1][t2] = a[t1][t2-1] - GAPECOST;
     } else if (t2 == 0) {
     dir[t1][t2] = 3;
     if (t1 == 1) 
     a[t1][t2] = -GAPOCOST;
     else if (t1>1) a[t1][t2] = a[t1-1][t2] - GAPECOST;
     } else {
     dir[t1][t2] = 0;
     a[t1][t2] = 0;
     }
     }
     } */
  // modified version:
  a[0][0] = -GAPOCOST;
  for (t1 = 0; t1 < lent1+1; t1++) {
    for (t2 = 0; t2 < lent2+1; t2++) {
      if (t1 == 0) {
	dir[t1][t2] = 1;
        if (t2>0) a[t1][t2] = a[t1][t2-1] - GAPECOST;
      } else if (t2 == 0) {
	dir[t1][t2] = 3;
	if (t1>0) a[t1][t2] = a[t1-1][t2] - GAPECOST;
      } else {
	dir[t1][t2] = 0;
	a[t1][t2] = 0;
      }
    }
  }
  dir[0][0] = 0;
  
  // start filling the score matrix a, looking for best global
  // alignment. (needleman-wunsch) 
  // assume: best NW alignment will result in highest melting temp.
  // 
  for (t2 = 1; t2 <= lent2; t2++) {
    for (t1 = 1; t1 <= lent1; t1++) {
      // now, calculate scores for the three possible ways to extend
      // the alignment. horizontal, vertical, or diagonal.

      // diagonal
      if (tag1[t1-1] == tag2[t2-1]) {
	// bases match. score= diagonal + match bonus
	tempd = a[t1-1][t2-1] + MATCHRWD;
	if ((t1==lent1)&&(t2==lent2)) tempd -= GAPOCOST; // modified version
      } else {
	tempd = a[t1-1][t2-1] - MISMCOST;
	if ((t1==lent1)&&(t2==lent2)) tempd -= GAPOCOST; // modified version
      }
      // bases didn't match. calculate gap cost.
      // first, check horizontal movement. if previous horizontal
      // cell was calculated from its previous horizontal cell, we
      // are extending a gap. GAPECOST. 
      // otherwise, we would initiate a gap. GAPOCOST.
      // horizontal
      if (dir[t1-1][t2] == 3) {
	//extend gap
	temph = a[t1-1][t2] - GAPECOST;
	if ((t1==lent1)&&(t2==lent2)) temph -= GAPECOST; // modified version
      } else {
	//open gap
	temph = a[t1-1][t2] - GAPOCOST;
	if ((t1==lent1)&&(t2==lent2)) temph -= GAPECOST; // modified version
      }
      
      // check vertically
      if (dir[t1][t2-1] == 1) {
	tempv = a[t1][t2-1] - GAPECOST;
	if ((t1==lent1)&&(t2==lent2)) tempv -= GAPECOST; // modified version
      } else {
	tempv = a[t1][t2-1] - GAPOCOST;
	if ((t1==lent1)&&(t2==lent2)) tempv -= GAPECOST; // modified version
      }
	
      //see what score is best. It is necessary that matches are ONLY chosen
      //if the diagonal score is HIGHER than every other score. In the equal
      //case, a gap has to be chosen, to get into gap extension mode, which
      //can be cheaper in the next step.

      /* if ((temph == tempv)&&(tempd<temph)) {
	 printf("\n---\nHouston: We have a problem...");
	 printf("\n%s\n%s\n%d\n%d\n---\n",tag1,tag2,t1,t2);
	 } */
      if ((tempd > temph) && (tempd > tempv)) {
	//yes. accept diagonal.
	a[t1][t2] = tempd;
	dir[t1][t2] = 2;  //assert diagonal in traceback array
      } else if (temph > tempv) {
	//horizontal gap is better
	a[t1][t2] = temph;
	dir[t1][t2] = 3;  //current edge horizontal
      } else {
	a[t1][t2] = tempv;
	dir[t1][t2] = 1;  //current edge vertical
	}
    }
  }

  // best alignment calculated. backtrace to get alignment.

  t1 = lent1;
  t2 = lent2;
  t1prev = t1;
  t2prev = t2;
  alnc = 0;

  while ((t1 > 0) || (t2 > 0)) {  
      //backtracking step records residues
      //in reverse order 

      //save old position 
      t1p2 = t1prev;
      t2p2 = t2prev;
      t1prev = t1;
      t2prev = t2;
      
      //move to next cell
      if (dir[t1][t2] == 2) {
	  t1 -= 1; t2 -= 1;
      } else if (dir[t1][t2] == 1) {
	  t2 -= 1;
      } else {
	  t1 -= 1;
    }

      //printf("%i %i BBB ",t1prev,t2prev);
      //printf("%c %c %c %c\n", tag1[t1], tag2[t2], 
      //tag1[t1prev], tag2[t2prev]);

      //fill in tag1out and tag2out with the calculated alignment.
      //also, generate alignment array that contains only matches of
      //  length 2 or more 
      // note that the alignment is stored in reversed form! 

    if (dir[t1prev][t2prev] == 2) {
      tag1out[alnc] = tag1[t1];
      tag2out[alnc] = tag2[t2];
      // printf("%c ",tag1out[alnc]);
      //printf("%c",tag2out[alnc]);
      if (tag1out[alnc]==tag2out[alnc]) {
	  if (matches_in_a_row==0) {
	      alignment[alnc]='-';
	  } else {
	      alignment[alnc]=tag1out[alnc];
	      alignment[alnc-1]=tag1out[alnc-1];
	  }
	
	matches_in_a_row++;
      } else {
	matches_in_a_row = 0;
	alignment[alnc] = '-';
      }
      // printf(" %c %i\n",alignment[alnc],matches_in_a_row);
    } else if (dir[t1prev][t2prev] == 3) {
      tag1out[alnc] = tag1[t1];
      tag2out[alnc] = '-';
      alignment[alnc] = '-';
      matches_in_a_row=0;
    } else {
      tag1out[alnc] = '-';
      tag2out[alnc] = tag2[t2];
      alignment[alnc] = '-';
      matches_in_a_row=0;
    }
    alnc += 1;
  }
  alignment[alnc] = '\0';//(int)'\0';

  
  //print out a (score) matrix
  //for (t2 = 0; t2 <= lent2; t2++) {
  //	for (t1 = 0; t1 <= lent1; t1++) {
  //		printf("%d ",a[t1][t2]);
  //	}
  //	printf("\n");
  //}
  
  //print out traceback matrix
  //for (t2 = 0; t2 <= lent2; t2++) {
  //	for (t1 = 0; t1 <= lent1; t1++) {
  //		printf("%d ",dir[t1][t2]);
  //	}
  //	printf("\n");
  //}

  //go backwards through the (backward) alignment and calculate Gibbs free
  //energy of the alignment
  for (t2 = 0; t2 <= alnc-1; t2++) {
    // if current letter in alignment is A or T and 
    // (the next in basepair order is 'bad' or we are at the end of alignment)
    // we are adding penalty for at termination
    
    /*  if ((strstr("AT",&alignment[t2]) != NULL) && 
	((strstr("-",&alignment[t2-1]) != NULL) || 
	(t2 == 0))) { */

    //if the current index of the alignment is a letter and ....
    //    there is a '-' just upstream or just downstream or
    //    we are at the beginning or end of the alignment
    //then we're just beginning or ending a stretch of stacked pairs, 
    //so we must (symmetrically) apply penalties for beginnings and ends
    //of stacked segments.
    if (alignment[t2]=='G' || alignment[t2]=='C') 
      sawGC = 1;
    

    if ((alignment[t2] != '-') && 
	((t2 < alnc - 1 && alignment[t2+1] == '-') || // at 5' end of a segment
	 (t2 == alnc - 1))){ //we are at the 5' end
      
      switch (alignment[t2]) {
      case 'T' :
	Gtot += attermG;
	Stot += attermS;
	Htot += attermH;
	if ( DEBUG > 0 && DEBUG < 4) 
	  printf("AT term penalty.\n");
	break;
      case 'G' :
	sawGC = 1;
	break;
      case 'C' : 
	sawGC = 1;
	break;
      case 'A' :
      }
      if (sawGC) {
	Gtot += gcinitG;
	Stot += gcinitS;
	Htot += gcinitH;       
	if ( DEBUG > 0 && DEBUG < 4) 
	  printf("GC init penalty.\n");
      } else {
	Gtot += atinitG;
	Stot += atinitS;
	Htot += atinitH;
	if ( DEBUG > 0 && DEBUG < 4) 
	  printf("AT init penalty.\n");
      }
      sawGC = 0;
    }
    
    if ((t2 > 0 && alignment[t2-1] == '-') || 
	// we are at 3' end of a match segment 
	(t2 == 0)) { // we are at the 3' end
      
      switch (alignment[t2]) {
      case 'T' :
	sawGC = 0;
	break;
      case 'G' :
	sawGC = 1;
	break;
      case 'C' : 
	sawGC = 1;
	break;
      case 'A' :
	Gtot += attermG;
	Stot += attermS;
	Htot += attermH;
	if ( DEBUG > 0 && DEBUG < 4) 
	  printf("AT term penalty.\n");
	sawGC = 0;
	break;
      default :
	sawGC = 0;
      }
    }
    // now we have 2 matches. score them.
    //	if ((strstr("ACGT",&alignment[t2]) != NULL) &&
    //  (strstr("ACGT",&alignment[t2-1]) != NULL)) {
    
    if (alignment[t2] != '-' && t2 != 0 && alignment[t2-1] != '-' ) {
      //to get the orientation of the pair right,
      //must take first the current base, then the previous
      //so the previous base must access Gx, current access Gy
      
      switch (alignment[t2]) {
      case 'T' : 
	Gx=3;
	break;
      case 'G' : 
	Gx=2;
	break;
      case 'C' : 
	Gx=1;
	break;
      case 'A' :
	Gx=0;
      }
      
      switch (alignment[t2-1]) {
      case 'T' : 
	Gy=3;
	break;
      case 'G' : 
	Gy=2;
	break;
      case 'C' : 
	Gy=1;
	break;
      case 'A' :
	Gy=0;
      }

      if ( DEBUG > 1 && DEBUG < 4) printf("H:%f S:%f G:%f t2:%i\n",H_NN[Gx][Gy],
					  S_NN[Gx][Gy],G_NN[Gx][Gy],t2);
      Gtot += G_NN[Gx][Gy];
      Stot += S_NN[Gx][Gy];
      Htot += H_NN[Gx][Gy];


      // check for at initiation. is there no base before current? if yes,
      // there might have been at init.  THIS IS ALL DONE EARLIER
      //if ((alignment[t2+1] == '\0') || (alignment[t2+1] == '-')) {
      //if ((alignment[t2] == 'A' || alignment[t2] == 'T')) {
      //  //alignment[t2] = 'l';
      //  Gtot += atinitG;
      //  Stot += atinitS;
      //  Htot += atinitH;
      //} else {
      //  //alignment[t2] = 'h';
      //  Gtot += gcinitG;
      //  Stot += gcinitS;
      //  Htot += gcinitH;
      //}
	// if binding would be too bad, do not bind. WRONG!
	//if (Gtot > 0.0) {
	//Gtot = 0.0;
	//Stot = 0.0;
	//Htot = 0.0;
	//}
      //}
    }
  }

  Tm = Htot/(Stot + RlnCt);
  	  

  if ( DEBUG > 0 && DEBUG < 4) {
      
      //print out alignment
      
      printf("alignment1:");
      for (t2 = alnc-1; t2 >= 0; t2--) {
	  printf("%c",tag2out[t2]);
      }
      printf("\n");
      printf("alignment2:");
      for (t1 = alnc-1; t1 >= 0; t1--) {
	  printf("%c",tag1out[t1]);
      }
      printf("\n");
      
      printf("stacked   :");
      for (t2 = alnc-1; t2 >= 0; t2--) {
	  printf("%c",alignment[t2]);
      }
      printf(" Tmelt: %5.2f Htot: %5.2f Stot: %5.2f\n---\n",Tm-273, Htot, Stot);
  }
      return(Tm);
      //printf("Gtot %f\n",Gtot);
}

// $Log: base_alg.c,v $
// Revision 1.1  2001/04/04 22:23:06  tbraun
// added final/.
//
// Revision 1.25  2001/04/03 06:00:54  tamhuynh
// *** empty log message ***
//
// Revision 1.24  2001/04/03 05:18:16  tbraun
// h check added.
//
// Revision 1.23  2001/03/31 01:14:03  tamhuynh
// added HEUNECK, our cool pseudo-evolutionary algorithm.
//
// Revision 1.22  2001/03/30 11:06:10  tbraun
// reversed debug info, fixed STDINPUT again.
//
// Revision 1.21  2001/03/30 06:00:43  tbraun
// STDINPUT fixed, comment fixed
//
// Revision 1.20  2001/03/30 05:34:16  gauthier
// A few changes...
//
// Revision 1.19  2001/03/30 01:01:32  tbraun
// added MODNECK algorithm, introduced long long numbers.
//
// Revision 1.18  2001/03/29 23:28:59  tamhuynh
// the generated necklaces are now spread evenly over the total necklace-space, if
// the number of tries is smaller than the number of possible necklaces...
//
// Revision 1.17  2001/03/28 21:57:40  tbraun
// fixed STDINPUT, output of tag numbers.
//
// Revision 1.16  2001/03/28 19:58:54  tbraun
// modified needleman-wunsch to avoid unnecessary internal gaps.
//
// Revision 1.15  2001/03/28 09:40:24  tbraun
// found 2 bugs in alignment calculation. hopefully removed them.
//
// Revision 1.14  2001/03/28 04:04:36  tbraun
// added get_opt functionality.
//
// Revision 1.13  2001/03/27 22:28:34  tbraun
// more modifications to Tmelt.
//
// Revision 1.12  2001/03/27 02:00:54  tbraun
// checked repaired version in. FOLKS: EVERYONE UPDATE TO THIS!!!
//
// Revision 1.11  2001/03/23 07:34:41  tbraun
// uh oh, Tmelt is ill!
//
// Revision 1.10  2001/03/23 06:28:14  tbraun
// fixed warnings, got rid of strstr.
//
// Revision 1.9  2001/03/20 20:52:07  tamhuynh
// fixed a small bug
//
// Revision 1.8  2001/03/17 00:02:02  lvandela
// *** empty log message ***
//
// Revision 1.7  2001/03/07 12:26:16  tbraun
// minor changes
//
// Revision 1.6  2001/03/06 22:26:34  tbraun
// introduced general parameters.
//
// Revision 1.5  2001/02/27 03:53:32  tbraun
// bug fixed in Tmelt - array init.
//
// Revision 1.4  2001/02/26 23:16:09  tamhuynh
// split getNextNecklace() into getFirst- and getNextNecklace()...
//
// Revision 1.3  2001/02/26 18:21:39  tbraun
// added id,log tags.
//


