program AsymmetryScaler;

{Written by:       John H. McDonald}
{                  Department of Biological Sciences}
{                  University of Delaware}
{                  Newark, Delaware  19716}
{                  http://udel.edu/~mcdonald}
{                  mcdonald@udel.edu}
   const
	   initialMaxTAI=10.0;
	   maxNumRandomInitial=1000;
	   maxNumIterations=1000;
	var
		myOutput: text; {the output data file}
		myInput: text; {the input data file}
		lastSlashPos: integer; {used in separating file path from file name}
		stringIndex: integer; {used in separating file path from file name}
		numRandomInitial: integer; {number of random initial starting positions for amino acids on scale}
		numDecimalPlaces: integer; {number of decimal places of precision, 1, 2, or 3}
	   maxUnitDivisions: integer; {number of positions in the scale}
      InputFileString: string[250]; {user-provided name of input file}
      OutputFileString: string[250]; {user-provided name of output file}
	   outputFileFullString: string[250]; {path+name of output file}
	   inputFileFullString: string[250]; {path+name of input file}
	   inputPath: string[250]; {path of program file, prepended to user-input file names to make full names} 
		actionChar: char; {y/n response from user}
      overWriteYes: char; {y/n response from user}
		inputCheck: boolean; {used in checking the user input for duplicate file names, etc.}

{=========This function converts numbers to amino acid symbols.================}
  function NumberToLetter(x:integer):string;
     var
        number:integer;
        result:string;
     begin
        number:=x;
        case number of
				1: result:='A';
            2: result:='C';
            3: result:='D';
            4: result:='E';
            5: result:='F';
            6: result:='G';
            7: result:='H';
            8: result:='I';
            9: result:='K';
            10: result:='L';
            11: result:='M';
            12: result:='N';
            13: result:='P';
            14: result:='Q';
            15: result:='R';
            16: result:='S';
            17: result:='T';
            18: result:='V';
            19: result:='W';
            20: result:='Y';

            21: result:='X';
            22: result:='-';
			otherwise
				result:='?';
            end;
			numberToLetter:=result;
      end;

{====Reads in the data from the input file and analyzes it}
  procedure DoAnalyzeInput;
     var
		  dummy: char;
		  index, index2, index3, index4, IterationIndex, RandomIndex, randomInt: integer;
		  nAB: array[1..20, 1..20] of integer;
		  pAB: array[1..20, 1..20] of extended;
		  EAB: extended;
		  N: integer;
		  BigPAB: array[1..20, 1..20] of extended;
		  bestTAI: array[1..20] of extended;
		  sortedTAI: array[0..21] of extended;
		  sortedAA: array[0..21] of integer;
		  TAI: array[1..20] of real;
		  BigD, NewBigD, NewTAI, oldBigD, overallBestBigD, averageTAI, minTAI, maxTAI, TAIrange: extended;
		  AAOrder: array[1..20] of integer;
		  AAIndex1,AAIndex2, numDivisions, unitDivisions: integer;

     begin
     readln(myInput); {read and ignore first two lines of matrix file}
     readln(myInput);
	  writeln(myOutput, 'D-score after each initial condition.');
	  writeln(myOutput, 'Replicate     D-score');
	  writeln(myOutput, '---------  ----------');
     overallBestBigD:=1000000;
	  unitDivisions:=10;
	  for index := 1 to 20 do {read n(obs) values for each pair of amino acids from matrix file}
        begin
        for index2:=1 to 6 do
	        read (myInput, dummy); {ignore name of amino acid}
        for index2 := 1 to 20 do
           begin
           read(myInput, nAB[index, index2]);
           end;
        readln(myInput);
        end;

{calculate pAB values, the observed proportion in each direction}
     for index := 1 to 20 do
        begin
        for index2 := 1 to 20 do
           begin
           pAB[index, index2] := 0;
           if nAB[index, index2] > 0 then
              pAB[index, index2] := nAB[index, index2] / (nAB[index, index2] + nAB[index2, index]);
           end;
        end;

	  writeln;
	  writeln('               Progress Bar');   {set up a progress bar on the screen}
	  if numRandomInitial>19 then
	     writeln('replicate   1  ',(numRandomInitial div 4):3,' ',(numRandomInitial div 2):3, ' ', 3*(numRandomInitial div 4):3, ' ',numRandomInitial:4);
	  if numRandomInitial<19 then
	     begin
	     write('replicate   1');
	     for index:=2 to numRandomInitial do
	        begin
		     if (index mod 5)=0 then
		        write(index:2);
		     if ((index mod 5)<>0) and ((index mod 5)<>4) then
		        write(' ');
		     end;
	     writeln;
	     end;
	  write('            ');

     for RandomIndex := 1 to numRandomInitial do     {do replicates with random initial order and random initial TAI values}
        begin
		  if numRandomInitial>1 then  {write a progress bar}
		     begin
		     if round(20.0*randomIndex/numRandomInitial)>round(20.0*(randomIndex-1)/numRandomInitial) then
		        write('|');
		     end;

	     AAOrder[1]:=1;
	     for index:=2 to 20 do
	   	  AAOrder[index]:=99;
	     for index:=2 to 20 do
	   	  begin
	   	  randomInt:=round(random*19.0+1.0);
	   	  while randomInt<99 do
	   	     begin
		   	  if AAOrder[randomInt]>20 then
		   		  begin
	   			  AAOrder[randomInt]:=index;
		   		  randomInt:=100;
		   		  end;
		   	  randomInt:=randomInt+1;
		   	  if randomInt=21 then
			   	  randomInt:=2;
		   	  end;
		     end;

{assign initial TAI at random, except first AA is 5.0}
	     TAI[1] := initialMaxTAI/2;
	     for index := 2 to 20 do
		     TAI[index] := random*initialMaxTAI;

{calculate initial BigPAB values, the predicted proportion in each direction}
	     for index := 1 to 20 do
		     begin
		     AAIndex1:=AAOrder[index];
		     for index2 := 1 to 20 do
		   	  begin
			     AAIndex2:=AAOrder[index2];
			     BigPAB[AAIndex1, AAIndex2] := 0;
			     if AAIndex1 <> AAIndex2 then
			   	  BigPAB[AAIndex1, AAIndex2] := (exp(TAI[AAIndex2] - TAI[AAIndex1])) / (1 + (exp(TAI[AAIndex2] - TAI[AAIndex1])));
			     end;
		     end;

{calculate initial BigD value, sum of weighted function of differences between observed and expected proportions}
          BigD := 0;
          for index := 1 to 20 do
             begin
             AAIndex1:=AAOrder[index];
             for index2 := 1 to 20 do
                begin
                AAIndex2:=AAOrder[index2];
                if AAIndex1 <> AAIndex2 then
                   begin
                   if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) > 0.000001 then {both non-zero}
                      BigD := BigD + (nAB[AAIndex1, AAIndex2] + nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2]) + pAB[AAIndex2, AAIndex1] * ln(pAB[AAIndex2, AAIndex1]
                          / BigPAB[AAIndex2, AAIndex1]));
                   if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) < 0.000001 then {one is zero}
                      begin
                      if pAB[AAIndex1, AAIndex2] > 0.000001 then
                         BigD := BigD + (nAB[AAIndex1, AAIndex2]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2]));
                      if pAB[AAIndex2, AAIndex1] > 0.000001 then
                         BigD := BigD + (nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex2, AAIndex1] * ln(pAB[AAIndex2, AAIndex1] / BigPAB[AAIndex2, AAIndex1]));
                      end;
                   end;
                end;
             end;
			 
{do a bunch of iterations}
          oldBigD:=100000; {bigD is the measure of overall fit; lower numbers are better fit}
          iterationIndex:=0;
          while (iterationIndex<maxNumIterations) do
             begin
             iterationIndex:=iterationIndex+1;
{find the min and max TAI values}
             minTAI:=initialMaxTAI;
		       maxTAI:=0;
		       for index3:=1 to 20 do
		          begin
			       if TAI[index3]<minTAI then
						 minTAI:=TAI[index3];
			       if TAI[index3]>maxTAI then
			          maxTAI:=TAI[index3];
			       end;
		       TAIrange:=maxTAI-minTAI;
		       minTAI:=trunc(minTAI-0.1*TAIrange);
		       maxTAI:=trunc(maxTAI+0.1*TAIrange+1);
		       TAIrange:=maxTAI-minTAI;
		       numDivisions:=round(TAIrange*unitDivisions);

{now change the TAI values for each amino acid at a time, except first is fixed at 5, calculate new BigPAB and new BigD}
             for index3 := 2 to 20 do
                begin
                for index4 := 0 to NumDivisions do
                   begin
                   TAI[AAOrder[index3]] := minTAI+(TAIrange/(NumDivisions*1.0)) * index4;
{calculate BigPAB values}
                   for index := 1 to 20 do
                      begin
                      AAIndex1:=AAOrder[index];
                      for index2 := 1 to 20 do
                         begin
                         AAIndex2:=AAOrder[index2];
                         BigPAB[AAIndex1, AAIndex2] := 0;
                         if AAIndex1 <> AAIndex2 then
                            BigPAB[AAIndex1, AAIndex2] := (exp(TAI[AAIndex2] - TAI[AAIndex1])) / (1 + (exp(TAI[AAIndex2] - TAI[AAIndex1])));
                         end;
                      end;

{calculate  BigD value}
                   NewBigD := 0;
                   for index := 1 to 20 do
                      begin
                      AAIndex1:=AAOrder[index];
                      for index2 := 1 to 20 do
                         begin
                         AAIndex2:=AAOrder[index2];
                         if AAIndex1 <> AAIndex2 then
                            begin
                            if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) > 0.000001 then
                               NewBigD := NewBigD + (nAB[AAIndex1, AAIndex2] + nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2]) + pAB[AAIndex2, AAIndex1]
                                     * ln(pAB[AAIndex2, AAIndex1] / BigPAB[AAIndex2, AAIndex1]));
                            if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) < 0.000001 then
                               begin
                               if pAB[AAIndex1, AAIndex2] > 0.000001 then
                                  NewBigD := NewBigD + (nAB[AAIndex1, AAIndex2]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2]));
                               if pAB[AAIndex2, AAIndex1] > 0.000001 then
                                  NewBigD := NewBigD + (nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex2, AAIndex1] * ln(pAB[AAIndex2, AAIndex1] / BigPAB[AAIndex2, AAIndex1]));
                               end;
                            end;
                         end;
                      end;
                   if NewBigD < (BigD + 0.000001) then
                      begin
							 BigD := NewBigD;
						    NewTAI := TAI[AAOrder[index3]];
                      end;
                   end; {each division}
                TAI[AAOrder[index3]] := NewTAI;
                end; {each amino acid}

                if BigD>(oldBigD-0.000001) then {done when BigD stops getting better}
                   begin
				    if unitDivisions>=maxUnitDivisions then
				       begin
                   writeln(myOutput, randomIndex : 8, BigD : 13 : 3);
				       if BigD<(overallBestBigD-0.000001) then
				          begin
				          overallBestBigD:=BigD;
					   	 for index := 1 to 20 do
						   	 bestTAI[index]:= TAI[index];
						    end;
                      iterationIndex:=maxNumIterations;
				       end;
				    if unitDivisions<maxUnitDivisions then
				       unitDivisions:=unitDivisions*10;
			       end;

             oldBigD:=BigD;
             end; {each iteration}
          end; {randomIndex}

	    averageTAI:=0;
	    for index:=1 to 20 do {calculate average TAI, then subtract to make average 0}
		    averageTAI:=averageTAI+bestTAI[index];
	    averageTAI:=averageTAI/20;
	    for index:=1 to 20 do
	       bestTAI[index]:=bestTAI[index]-averageTAI;

	    writeln(myOutput);
	    writeln(myOutput, 'The best D-score is ', overallBestBigD:12:3);
	    writeln(myOutput);
		 writeln(myOutput);
	    writeln(myOutput, 'AA   Asymmetry Index');
	    writeln(myOutput, '--   ---------------');
	    for index := 1 to 20 do
	       begin
			 writeln(myOutput,' ', numberToLetter(index), bestTAI[index] : 18 : 5);
          end;
	    writeln(myOutput);

	   {sort amino acids from lowest to highest TAI}
	    sortedTAI[0]:=-99;
		 sortedAA[0]:=-99;
	    for index:=1 to 20 do
	       begin
	       sortedTAI[index]:=99;
		    sortedAA[index]:=99;
		    end;
	    for index:=1 to 20 do
	       begin
		    for index2:=0 to 19 do
		       begin
			    if (bestTAI[index]>(sortedTAI[index2]+0.00001)) and (bestTAI[index]<(sortedTAI[index2+1]+0.00001)) then
			       begin
				    for index3:=21 downto (index2+2)  do
				       begin
				       sortedTAI[index3]:=sortedTAI[index3-1];
				       sortedAA[index3]:=sortedAA[index3-1];
				       end;
				    sortedTAI[index2+1]:=bestTAI[index];
				    sortedAA[index2+1]:=index;
				    end;
			    end;
		    end;
			 writeln(myOutput);
	       writeln(myOutput, 'AA   Asymmetry Index');
	       writeln(myOutput, '--   ---------------');
		    for index:=1 to 20 do
		       writeln(myOutput,' ', numberToLetter(sortedAA[index]), sortedTAI[index]:18:5);

		    writeln(myOutput);
			 writeln(myOutput);
			 writeln(myOutput, 'Sp.1-->Sp.2      N        p      E(p)');
			 writeln(myOutput, '-----------   ------   ------   ------');
	       for index:=1 to 20 do
		    begin
		    for index2:=(index+1) to 20 do
		       begin
			    EAB := (exp(sortedTAI[index2] - sortedTAI[index])) / (1 + (exp(sortedTAI[index2] - sortedTAI[index])));
			    N:=nAB[sortedAA[index], sortedAA[index2]]+nAB[sortedAA[index2], sortedAA[index]];
			    writeln(myOutput, '   ',numberToLetter(sortedAA[index]),'-->', numberToLetter(sortedAA[index2]),'      ',N:6, '   ',pAB[sortedAA[index], sortedAA[index2]]:6:4,'   ', EAB:6:4);
			    end;
		    end;
  	    end;


begin
  randomize;

  writeln('=================================================================');
  writeln;
  writeln('             Welcome to AsymmetryScaler.');
  writeln;
  writeln('For help on formatting the input file and other documentation, ');
  writeln(' or a version of this program for a different operating system, ');
  writeln(' see http://udel.edu/~mcdonald/asymmetryprograms.html.');
  writeln;

  inputCheck:=TRUE;
  while inputCheck do
     begin
     writeln('The input file must be a plain text file (extension .txt)');
     writeln('   in the same folder as this program.');
     writeln;
     write('Name of the input file: ');
     readln(InputFileString);
     if InputFileString[length(InputFileString)-3]<>'.' then
        InputFileString:=InputFileString+'.txt';

{The next few lines get the full path to the program, remove the name of the program, then
 add the name of the input file. The result is that "inputFileString" is converted from
 "input.txt" to "/Users/username/sliderfolder/input.txt" (on a Mac) or "C:\sliderfolder\input.txt" (on Windows).}
     inputPath:=paramStr(0);
     lastSlashPos:=0;
     for stringIndex:=1 to length(inputPath) do
        begin
	     if (inputPath[stringIndex]='/') or (inputPath[stringIndex]='\') then
	        lastSlashPos:=stringIndex;
	     end;
     delete(inputPath, lastSlashPos+1, length(inputPath)-lastSlashPos);
     inputFileFullString:=inputPath+inputFileString;
     assign(myInput, inputFileFullString);
     {$I-}
        reset(myInput);
     {$I+}
     if IOresult=0 then
        inputCheck:=FALSE;
	  if inputCheck then 
       writeln('I can''t find a file named "',inputfilestring,'", try again.');
     end;

 inputCheck:=TRUE; {get the name of the output file; if it already exists, ask before overwriting}
 while inputCheck do 
	 begin 
	 inputCheck:=FALSE; 
	 writeln; 
    write('Name of the output file: '); 
    readln(OutputFileString); 
    if OutputFileString[length(OutputFileString)-3]<>'.' then 
       OutputFileString:=OutputFileString+'.txt'; 
    OutputFileFullString:=inputPath+OutputFileString; 
    assign(myOutput, OutputFileFullString); 
    {$I-} 
       reset(myOutput); 
    {$I+} 
    if IOresult=0 then {file already exists} 
       begin 
       writeln('File "', outputfilestring, '" already exists. Do you want to overwrite it?'); 
       write('Enter "y" or "n": '); 
       readln(overWriteYes); 
       if overWriteYes='Y' then 
          overWriteYes:='y'; 
       if overWriteYes<>'y' then 
          begin 
          inputCheck:=TRUE; 
          close(myOutput); 
          end; 
       end; 
    end; 
 rewrite(myOutput); 

 writeln;
 inputCheck:=TRUE;
 while inputCheck do
	 begin
	 write('Number of replicates with random initial orders (1 to ', maxNumRandomInitial:4,'): ');
	 readln(numRandomInitial);
	 if (numRandomInitial>0) and (numRandomInitial<=maxNumRandomInitial) then
		 inputCheck:=FALSE;
	 if inputCheck then
		 writeln('Enter a number from 1 to ', maxNumRandomInitial:4);
	 end;
 writeln;

 writeln;
 inputCheck:=TRUE;
 while inputCheck do
	 begin
	 writeln('Do you want the asymmetry index precise to 1, 2, or 3 decimal places?');
	 writeln('(The program is VERY slow with 3 decimal places.)');
	 write('Enter 1, 2, or 3: ');
	 readln(numDecimalPlaces);
	 if (numDecimalPlaces=1) or (numDecimalPlaces=2) or (numDecimalPlaces=3) then
		 inputCheck:=FALSE;
	 if inputCheck then
		 writeln('Enter 1, 2, or 3.');
	 end;
 if numDecimalPlaces=1 then
	 maxUnitDivisions:=10;
 if numDecimalPlaces=2 then
	 maxUnitDivisions:=100;
 if numDecimalPlaces=3 then
	 maxUnitDivisions:=1000;
 writeln;
	
 DoAnalyzeInput;

 writeln;
 writeln;
 write('All done. Enter "q" to quit: ');
 readln(ActionChar);

 close(myOutput);
 close(myInput);
 end.

