program AsymmetryCounter; 

{Written by:       John H. McDonald} 
{                   Department of Biological Sciences} 
{                   University of Delaware} 
{                   Newark, Delaware  19716} 
{                   http://udel.edu/~mcdonald} 
{                   mcdonald@udel.edu} 
  const
     maxGeneLength = 10000; 

  var 
     myInput: text; {the input data file}
     myOutput: text; {the output data file}
	 myMatrixOutput: text; {the substitution matrix output file, for input into AsymmetryScaler} 
     inputCheck: boolean; {used in checking the user input for duplicate file names, etc.}
     aaCountArray: array[1..2,1..22] of longint; {used in counting frequencies of amino acids in two species; [n, 22] is total}
     lastSlashPos: integer; {used in separating file path from file name}
     stringIndex: integer; {used in separating file path from file name}
     InputFileString: string[250]; {user-provided name of input file}
     OutputFileString: string[250]; {user-provided name of output file}
	 outputFileFullString: string[250]; {path+name of output file}
	 inputFileFullString: string[250]; {path+name of input file}
	 inputPath: string[250]; {path of program file, prepended to user-input file names to make full names} 
     actionChar: char; {y/n response from user}
     overWriteYes: char; {y/n response from user}

{====================This function returns the greater of two integers.=====================} 
  function GreaterInteger (x, y: integer): integer; 
     var 
        First, Greater: integer; 
  	 begin 
     First := x; 
     Greater := y; 
     if First > Greater then 
        Greater := First; 
     GreaterInteger := Greater; 
     end; 

{=====================This function returns the lesser of two integers.=====================} 
  function LesserInteger (x, y: integer): integer; 
     var 
        First, Lesser: integer; 
     begin 
     First := x; 
     Lesser := y; 
     if First < Lesser then 
        Lesser := First; 
     LesserInteger := Lesser; 
     end; 

{====This function does exponentiation.=============} 
  function RaiseTo (x, y: extended): extended; 
     var 
        Result: extended; 
     begin 
     Result:=exp(y*ln(x)); 
     RaiseTo:=Result; 
     end; 

{======This function returns a number times its natural log, or 0 if the number is 0.=====} 
  function flnf (x: integer): extended; 
     var 
        F: integer; 
        Result: extended; 
     begin 
     F := x; 
     Result := 0; 
     if F > 0 then 
        Result := F * ln(F); 
     flnf := Result; 
     end; 

{=====This function does a G test of goodness-of-fit, comparing to an expected 50:50 ratio.====} 
  function Gtest(x,y:longint):extended; 
     var 
        A,B:real; 
        Result,expected:extended; 
     begin 
     A:=x*1.0; 
     B:=y*1.0; 

     expected:=(A+B)/2.0; 
     Result:=0; 
     if (A>0) and (B>0) then 
        Result:=2*(A*ln(A/expected)+B*ln(B/expected)); 
     if (A>0) and (B=0) then 
        Result:=2*(A*ln(A/expected)); 
     if (A=0) and (B>0) then 
        Result:=2*(B*ln(B/expected)); 
     if (A>0) and (B>0) then 
        Result:=result/(1+1/(2*(A+B))); {Williams correction} 
        Gtest:=Result; 
     end; 

{=====This function does the exact binomial test, comparing to an expected 50:50 ratio.===} 
  function Binomial(x,y:longint):extended; 
     var 
        N, S, T: longint; 
        factIndex, sIndex: integer; 
        Result, Top, Bottom, Binom:extended; 
     begin 
     N:=x+y; 
     S:=LesserInteger(x,y); 
     T:=GreaterInteger(x,y); 
     Result:=RaiseTo(0.5,N); 
     if S=T then 
        Result:=0.5; 
     if (S<>T) and (S>0) and (T>0) then 
        begin 
        for sIndex:=1 to S do 
           begin 
           binom:=RaiseTo(0.5,sIndex)*RaiseTo(0.5,N-sIndex); 
           Top:=1; 
           Bottom:=1; 
           for factIndex:=(N-sIndex+1) to N do 
              Top:=Top*factIndex; 
           for factIndex:=1 to sIndex do 
              Bottom:=Bottom*factIndex; 
           Result:=Result+(top/bottom)*Binom; 
           end; 
        end; 
     Binomial:=2*Result; 
     end; 

{=======This function calculates the P-value for the chi-square with one degree of freedom,====} 
{=======by interpolating in a table of values. I'm sure there are better ways to do this, but it works.=====}
  function ChiProb(x: extended):extended; 
     var 
        P: array[0..300] of extended; 
        result, chi: extended; 
     begin 
     P[0]:=0; P[1]:=-0.285245531; P[2]:=-0.423546323; P[3]:=-0.538055647; 
     P[4]:=-0.640385376; P[5]:=-0.735011126; P[6]:=-0.824217529; P[7]:=-0.909355598; 
     P[8]:=-0.991301572; P[9]:=-1.070661426; P[10]:=-1.147873503; P[11]:=-1.223269975; 
     P[12]:=-1.297105078; P[13]:=-1.369581297; P[14]:=-1.440861504; 
     P[15]:=-1.511080144; P[16]:=-1.580348535; P[17]:=-1.648760639; 
     P[18]:=-1.716396445; P[19]:=-1.78332488; P[20]:=-1.849605143; P[21]:=-1.915289392; 
     P[22]:=-1.980423497; P[23]:=-2.045047505; P[24]:=-2.109197646; 
     P[25]:=-2.172905679; P[26]:=-2.236200706; P[27]:=-2.299108349; 
     P[28]:=-2.361652416; P[29]:=-2.423854038; P[30]:=-2.485732506; 
     P[31]:=-2.547305872; P[32]:=-2.608590238; P[33]:=-2.669600561; 
     P[34]:=-2.730350508; P[35]:=-2.79085306; P[36]:=-2.85111986; P[37]:=-2.911161841; 
     P[38]:=-2.970989053; P[39]:=-3.03061121; P[40]:=-3.090037008; P[41]:=-3.149274702; 
     P[42]:=-3.208332022; P[43]:=-3.267216099; P[44]:=-3.325933986; 
     P[45]:=-3.384491948; P[46]:=-3.442896009; P[47]:=-3.501151854; 
     P[48]:=-3.559264843; P[49]:=-3.617240048; P[50]:=-3.675082263; 
     P[51]:=-3.732795941; P[52]:=-3.790385584; P[53]:=-3.847855176; 
     P[54]:=-3.905208603; P[55]:=-3.962449557; P[56]:=-4.019581556; 
     P[57]:=-4.076607948; P[58]:=-4.133531929; P[59]:=-4.190356544; 
     P[60]:=-4.247084616; P[61]:=-4.303719111; P[62]:=-4.360262591; P[63]:=-4.4167176; 
     P[64]:=-4.473086576; P[65]:=-4.529371854; P[66]:=-4.585575675; 
     P[67]:=-4.641700187; P[68]:=-4.697747454; P[69]:=-4.753719458; 
     P[70]:=-4.809618104; P[71]:=-4.865445223; P[72]:=-4.92120258; P[73]:=-4.976891871; 
     P[74]:=-5.032514731; P[75]:=-5.088072668; P[76]:=-5.143567341; 
     P[77]:=-5.199000142; P[78]:=-5.254372484; P[79]:=-5.309685734; 
     P[80]:=-5.364941208; P[81]:=-5.420140182; P[82]:=-5.475283886; 
     P[83]:=-5.530373513; P[84]:=-5.585410213; P[85]:=-5.640395103; 
     P[86]:=-5.695329261; P[87]:=-5.750213735; P[88]:=-5.805049535; 
     P[89]:=-5.859837646; P[90]:=-5.914579018; P[91]:=-5.969274575; 
     P[92]:=-6.023925212; P[93]:=-6.0785318; P[94]:=-6.133095181; P[95]:=-6.187616176; 
     P[96]:=-6.242095581; P[97]:=-6.296534118; P[98]:=-6.350932644; 
     P[99]:=-6.405291835; P[100]:=-6.459612403; 
     P[101]:=-7.000837153; P[102]:=-7.538856708; 
     P[103]:=-8.07414018; P[104]:=-8.607059703; P[105]:=-9.137915728; 
     P[106]:=-9.666954294; P[107]:=-10.19437961; P[108]:=-10.72036304; 
     P[109]:=-11.24504985; P[110]:=-11.76856426; P[111]:=-12.29101336; 
     P[112]:=-12.8124901; P[113]:=-13.33307577; P[114]:=-13.85284176; 
     P[115]:=-14.37185121; P[116]:=-14.89016018; P[117]:=-15.40781871; 
     P[118]:=-15.92487165; P[119]:=-16.44135932; P[120]:=-16.95731816; 
     P[121]:=-17.47278115; P[122]:=-17.98777831; P[123]:=-18.50233697; 
     P[124]:=-19.01648214; P[125]:=-19.53023672; P[126]:=-20.04362177; 
     P[127]:=-20.55665669; P[128]:=-21.06935938; P[129]:=-21.58174639; 
     P[130]:=-22.09383308; P[131]:=-22.60563371; P[132]:=-23.11716152; 
     P[133]:=-23.62842888; P[134]:=-24.13944733; P[135]:=-24.65022764; 
     P[136]:=-25.16077991; P[137]:=-25.67111361; P[138]:=-26.18123763; 
     P[139]:=-26.69116032; P[140]:=-27.20088954; P[141]:=-27.71043272; 
     P[142]:=-28.21979684; P[143]:=-28.72898851; P[144]:=-29.23801398; 
     P[145]:=-29.74687915; P[146]:=-30.25558964; P[147]:=-30.76415075; 
     P[148]:=-31.27256753; P[149]:=-31.78084477; P[150]:=-32.28898703; 
     P[151]:=-32.79699865; P[152]:=-33.30488375; P[153]:=-33.81264627; 
     P[154]:=-34.32028998; P[155]:=-34.82781845; P[156]:=-35.33523512; 
     P[157]:=-35.84254326; P[158]:=-36.34974601; P[159]:=-36.85684636; 
     P[160]:=-37.36384718; P[161]:=-37.87075124; P[162]:=-38.37756117; 
     P[163]:=-38.88427951; P[164]:=-39.39090869; P[165]:=-39.89745104; 
     P[166]:=-40.40390882; P[167]:=-40.91028419; P[168]:=-41.41657922; 
     P[169]:=-41.92279591; P[170]:=-42.4289362; P[171]:=-42.93500193; 
     P[172]:=-43.44099491; P[173]:=-43.94691685; P[174]:=-44.45276941; 
     P[175]:=-44.95855422; P[176]:=-45.46427282; P[177]:=-45.9699267; 
     P[178]:=-46.47551733; P[179]:=-46.98104609; P[180]:=-47.48651435; 
     P[181]:=-47.99192342; P[182]:=-48.49727456; P[183]:=-49.00256899; 
     P[184]:=-49.50780793; P[185]:=-50.0129925; P[186]:=-50.51812384; 
     P[187]:=-51.02320302; P[188]:=-51.52823109; P[189]:=-52.03320908; 
     P[190]:=-52.53813797; P[191]:=-57.58492454; P[192]:=-62.62770369; 
     P[193]:=-67.66710828; P[194]:=-72.70363215; P[195]:=-77.73766805; 
     P[196]:=-82.76953349; P[197]:=-87.79948883; P[198]:=-92.82775021; 
     P[199]:=-97.85449903; P[200]:=-102.879889; P[201]:=-107.9040516; 
     P[202]:=-112.9271; P[203]:=-117.9491326; P[204]:=-122.9702349; 
     P[205]:=-127.9904826; P[206]:=-133.009942; P[207]:=-138.0286724; 
     P[208]:=-143.0467262; P[209]:=-148.0641509; P[210]:=-153.0809886; 
     P[211]:=-158.0972777; P[212]:=-163.1130528; P[213]:=-168.1283454; 
     P[214]:=-173.143184; P[215]:=-178.1575949; P[216]:=-183.1716021; 
     P[217]:=-188.1852275; P[218]:=-193.1984915; P[219]:=-198.2114126; 
     P[220]:=-203.2240082; P[221]:=-208.2362943; P[222]:=-213.2482857; 
     P[223]:=-218.2599962; P[224]:=-223.2714387; P[225]:=-228.2826252; 
     P[226]:=-233.2935668; P[227]:=-238.3042742; P[228]:=-243.314757; 
     P[229]:=-248.3250246; P[230]:=-253.3350855; 

     result:=1; 
     if x<=10 then 
        begin 
        chi:=x*10; 
        result:=exp(P[trunc(chi)]-(chi-trunc(chi))*(P[trunc(chi)]-P[trunc(chi+1)])); 
        end; 
     if (x>10) and (x<=100) then 
        begin 
        chi:=x+90; 
        result:=exp(P[trunc(chi)]-(chi-trunc(chi))*(P[trunc(chi)]-P[trunc(chi+1)])); 
        end; 
     if (x>100) and (x<=500) then 
        begin 
        chi:=(x-100)/10+190; 
        result:=exp(P[trunc(chi)]-(chi-trunc(chi))*(P[trunc(chi)]-P[trunc(chi+1)])); 
        end; 
     chiProb:=result; 
     end; 


{=========This function converts amino acid symbols to numbers, for easier manipulation.================} 
  function letterToNumber(x:char):integer; 
     var 
        letter:char; 
        result:integer; 
     begin 
     letter:=x; 
     case letter of 
        'A': result:=1; 
        'C': result:=2; 
        'D': result:=3; 
        'E': result:=4; 
        'F': result:=5; 
        'G': result:=6; 
        'H': result:=7; 
        'I': result:=8; 
        'K': result:=9; 
        'L': result:=10; 
        'M': result:=11; 
        'N': result:=12; 
        'P': result:=13; 
        'Q': result:=14; 
        'R': result:=15; 
        'S': result:=16; 
        'T': result:=17; 
        'V': result:=18; 
        'W': result:=19; 
        'Y': result:=20; 
        'X': result:=21; 
        '-': result:=22; 
     otherwise 
        result:=23; 
        end; 
     letterToNumber:=result; 
     end; 

{====This function converts numbers back into amino acid symbols, for output.====}
  function NumberToLetter(x:integer):string; 
     var 
        number:integer; 
        result:string; 
     begin 
     number:=x; 
     case number of 
        1: result:='A'; 
        2: result:='C'; 
        3: result:='D'; 
        4: result:='E'; 
        5: result:='F'; 
        6: result:='G'; 
        7: result:='H'; 
        8: result:='I'; 
        9: result:='K'; 
        10: result:='L'; 
        11: result:='M'; 
        12: result:='N'; 
        13: result:='P'; 
        14: result:='Q'; 
        15: result:='R'; 
        16: result:='S'; 
        17: result:='T'; 
        18: result:='V'; 
        19: result:='W'; 
        20: result:='Y'; 
        21: result:='X'; 
        22: result:='-'; 

     otherwise 
        result:='?'; 
        end; 
     numberToLetter:=result; 
     end; 

{====Reads in the data from the input file, counts the number of each pair of amino acids, then outputs the results.======} 
  procedure DoAnalyzeInput; 
     var 
        geneName: string; {the name of each gene} 
        aminoAcid: char; {amino acid symbols, as read from the input file}
        bigArray:array[1..2,1..maxGeneLength] of integer; {array of amino acid sequences for one gene in two species} 
        FirstBactIndex, SecondBactIndex, ArrayIndex, GeneIndex: longint; {indexes of for loops}
        AminoAcidCount1: longint; {number of amino acids in gene in species 1} 
        AminoAcidCount2: longint; {number of amino acids in gene in species 2}
        numberAminoAcid: integer; {amino acid symbol translated into a number}
        TotalAB: longint; {total number of sites with amino acid A in sp. 1 and B in sp. 2}
        TotalBA: longint; {total number of sites with amino acid B in sp. 1 and A in sp. 2}
        statTest: extended; {G-value of the G-test of goodness-of-fit}
        pValue: extended; {P-value of the G-test or binomial test} 
        totalArray:array[1..22,1..22] of longint; {substitution matrix--total number of substitutions in each direction}
    
	 begin 
     for arrayIndex:=1 to 22 do {initialize counting arrays}
        begin 
        aaCountArray[1,arrayIndex]:=0; 
        aaCountArray[2,arrayIndex]:=0; 
        for secondBactIndex:=1 to 22 do 
           totalArray[arrayIndex, secondBactIndex]:=0; 
        end; 
     
	 read(myInput, aminoAcid); {reads the first '>' in the file} 
     readln(myInput, geneName); 
     geneIndex:=0;

     while not eof(myInput) do 
        begin 
        geneIndex:=geneIndex+1; 
        aminoAcidCount1:=0; 
        aminoAcid:='-'; 
        while (aminoAcid<>'>') and (not eof(myInput)) do {">"=start of gene from other species} 
           begin 
           read(myInput, aminoAcid); 
           numberAminoAcid:=lettertonumber(aminoAcid); 
           if numberAminoAcid<22 then  {22 is '-' or lowercase letter}
              begin 
              aminoAcidCount1:=aminoAcidCount1+1; 
              aaCountArray[1,numberAminoAcid]:=aaCountArray[1,numberAminoAcid]+1; 
              aaCountArray[1,22]:=aaCountArray[1,22]+1; 
              bigArray[1,aminoAcidCount1]:=numberAminoAcid; 
              end; 
           end; 

        aminoAcid:='-'; 
        readln(myInput);{start of gene from second species} 
        aminoAcidCount2:=0; 
        while (aminoAcid<>'>') and (not eof(myInput)) do {">"=start of next gene} 
           begin 
           read(myInput,aminoAcid); 
           numberAminoAcid:=lettertonumber(aminoAcid); 
           if numberAminoAcid<22 then 
              begin 
              aminoAcidCount2:=AminoAcidCount2+1; 
              aaCountArray[2,numberAminoAcid]:=aaCountArray[2,numberAminoAcid]+1; 
              aaCountArray[2,22]:=aaCountArray[2,22]+1; 
              bigArray[2,aminoAcidCount2]:=numberAminoAcid; 
              if (bigArray[1,AminoAcidCount2]<22) and (bigArray[2,AminoAcidCount2]<22) then 
                  begin 
                  totalArray[bigArray[1, aminoAcidCount2],bigArray[2, aminoAcidCount2]]:= 
                      totalArray[bigArray[1, aminoAcidCount2],bigArray[2, aminoAcidCount2]]+1; 
                  end; 
              end; 
           end; 

        if not eof(myInput) then 
           readln(myInput,geneName); {start with next gene}
        end; 

     writeln(myOutput,'Amino Acid Composition at unambiguously aligned sites'); {start writing totals to output file}
     writeln(myOutput); 
     writeln(myOutput,'AA     sp. 1     sp. 2 '); 
     writeln(myOutput,'--    -------   -------'); 
     for arrayIndex:=1 to 21 do 
        writeln(myOutput,numbertoletter(arrayIndex):2, (aaCountArray[1,arrayIndex]/aaCountArray[1,22]):11:5,
           (aaCountArray[2,arrayIndex]/aaCountArray[2,22]):10:5); 
     writeln(myOutput); 
     writeln(myOutput,'total', aaCountArray[1,22]:8, aaCountArray[2,22]:10); 
     writeln(myOutput); 
     writeln(myOutput); 
     writeln(myOutput,'           sp. 1-->sp. 2            G-value         P'); 
     writeln(myOutput,'--------------------------------   --------     ---------'); 
     for FirstBactIndex:=1 to 20 do 
        begin 
        for SecondBactIndex:=FirstBactIndex to 20 do 
           begin 
           write(myOutput,NumberToLetter(FirstBactIndex), '-->',NumberToLetter(SecondBactIndex),'    '); 
           TotalAB:=totalArray[firstBactIndex, secondBactIndex]; 
           write(myOutput, TotalAB:5, '    '); 
           TotalBA:=totalArray[secondBactIndex, firstBactIndex]; 
           write(myOutput,NumberToLetter(SecondBactIndex), '-->',NumberToLetter(FirstBactIndex),'    ', totalBA:5); 

           if (TotalBA+TotalAB)<50 then {do binomial test of goodness-of-fit}
              begin 
              Pvalue:=binomial(totalAB, totalBA); 
              if Pvalue>=0.00001 then 
                 write(myOutput, '       ----', '  ', Pvalue:12:7);
              if Pvalue<0.00001 then 
                 write(myOutput, '       ----', '  ',Pvalue:12); 
              end; 

           if (totalBA+totalAB)>49 then {do G-test of goodness-of-fit}
              begin 
              statTest:=Gtest(totalAB, totalBA); 
              if statTest<454 then 
                  begin 
                  Pvalue:=chiProb(statTest); 
                  if Pvalue>=0.00001 then 
                     write(myOutput, ' ', statTest:10:4, '  ',Pvalue:12:7); 
                  if Pvalue<0.00001 then 
                     write(myOutput, ' ', statTest:10:4, '  ',Pvalue:12); 
                  end; 
              if statTest>=454 then 
                  write(myOutput, ' ', statTest:10:4, '    <1.00E-100'); 
              end; 

           if Pvalue<0.05 then {write one, two or three stars for significant P-values}
              write(myOutput, '*'); 
           if Pvalue<0.01 then 
              write(myOutput, '*'); 
           if Pvalue<0.001 then 
              write(myOutput, '*'); 
           writeln(myOutput); 
           end; 
        end; 
     writeln(myOutput); 

     for FirstBactIndex:=1 to 21 do {write results for amino acid symbol 'X' to output file}
        begin 
        for SecondBactIndex:=21 to 21 do 
           begin 
           write(myOutput,NumberToLetter(FirstBactIndex), '-->',NumberToLetter(SecondBactIndex),'    '); 
           TotalAB:=totalArray[firstBactIndex, secondBactIndex]; 
           write(myOutput, TotalAB:5, '    '); 
           TotalBA:=totalArray[secondBactIndex, firstBactIndex]; 
           write(myOutput,NumberToLetter(SecondBactIndex), '-->',NumberToLetter(FirstBactIndex),'    ', totalBA:5); 

           if (TotalBA+TotalAB)<50 then 
              begin 
              Pvalue:=binomial(totalAB, totalBA); 
              if Pvalue>=0.00001 then 
                  write(myOutput, '       ----', '  ',Pvalue:12:7); 
              if Pvalue<0.00001 then 
                  write(myOutput, '       ----', '  ',Pvalue:12); 
              end; 
           if (totalBA+totalAB)>49 then 
              begin 
              statTest:=Gtest(totalAB, totalBA); 
              if statTest<454 then 
                  begin 
                  Pvalue:=chiProb(statTest); 
                  if Pvalue>=0.00001 then 
                     write(myOutput, ' ', statTest:10:4, '  ',Pvalue:12:7); 
                  if Pvalue<0.00001 then 
                     write(myOutput, ' ', statTest:10:4, '  ',Pvalue:12); 
                  end; 
              if statTest>=454 then 
                  write(myOutput, ' ', statTest:10:4, '    <1.00E-100'); 
              end; 

           if Pvalue<0.05 then 
              write(myOutput, '*'); 
           if Pvalue<0.01 then 
              write(myOutput, '*'); 
           if Pvalue<0.001 then 
              write(myOutput, '*'); 
           writeln(myOutput); 
           end; 
        end; 

     writeln(myMatrixOutput, '           amino acid in sp. 2'); {start writing results to substitution matrix file}
     write(myMatrixOutput, 'sp. 1 '); 
     for arrayIndex:=1 to 20 do 
         write(myMatrixOutput, NumberToLetter(arrayIndex):6); 
     writeln(myMatrixOutput); 
     for FirstBactIndex:=1 to 20 do 
        begin 
        write(myMatrixOutput, NumberToLetter(firstBactIndex):6); 
        for SecondBactIndex:=1 to 20 do 
           write(myMatrixOutput,totalArray[firstBactIndex,secondBactIndex]:6); 
        writeln(myMatrixOutput); 
        end; 
     end; 

  begin 
  writeln('================================================================='); 
  writeln; 
  writeln('             Welcome to AsymmetryCounter.'); 
  writeln; 
  writeln('For help on formatting the input file and other documentation, '); 
  writeln(' or a version of this program for a different operating system, '); 
  writeln(' see http://udel.edu/~mcdonald/asymmetryprograms.html.'); 
  writeln; 

  inputCheck:=TRUE; {get the name of the input file; if it doesn't exist, keep trying}
  while inputCheck do 
     begin 
     writeln('The input file must be a plain text file (extension .txt)'); 
     writeln('   in the same folder as this program.'); 
     writeln; 
     write('Name of the input file: '); 
     readln(InputFileString); 
     if InputFileString[length(InputFileString)-3]<>'.' then 
		InputFileString:=InputFileString+'.txt'; 

{The next few lines get the full path to the program, remove the name of the program, then 
add the name of the input file. The result is that "inputFileString" is converted from 
"input.txt" to "/Users/username/sliderfolder/input.txt" (on a Mac) or "C:\sliderfolder\input.txt" (on Windows).} 
     inputPath:=paramStr(0); 
     lastSlashPos:=0; 
     for stringIndex:=1 to length(inputPath) do 
        begin 
        if (inputPath[stringIndex]='/') or (inputPath[stringIndex]='\') then 
           lastSlashPos:=stringIndex; 
        end; 
     delete(inputPath, lastSlashPos+1, length(inputPath)-lastSlashPos); 
     inputFileFullString:=inputPath+inputFileString; 
     assign(myInput, inputfilefullstring); 
     {$I-} 
        reset(myInput); 
     {$I+} 
     if IOresult=0 then 
        inputCheck:=FALSE; 
     if inputCheck then 
        writeln('I can''t find a file named "',inputfilestring,', try again.'); 
     end; 

  inputCheck:=TRUE; {get the name of the output file; if it already exists, ask before overwriting}
  while inputCheck do 
     begin 
     inputCheck:=FALSE; 
     writeln; 
     write('Name of the main output file: '); 
     readln(OutputFileString); 
     if OutputFileString[length(OutputFileString)-3]<>'.' then 
        OutputFileString:=OutputFileString+'.txt'; 
     OutputFileFullString:=inputPath+OutputFileString; 
     assign(myOutput, OutputFileFullString); 
     {$I-} 
     reset(myOutput); 
     {$I+} 
     if IOresult=0 then {file already exists} 
        begin 
        writeln('File "', outputfilestring, '" already exists. Do you want to overwrite it?'); 
        write('Enter "y" or "n": '); 
        readln(overWriteYes); 
        if overWriteYes='Y' then 
           overWriteYes:='y'; 
        if overWriteYes<>'y' then 
           begin 
           inputCheck:=TRUE; 
           close(myOutput); 
           end; 
        end; 
     end; 
  rewrite(myOutput); 

  inputCheck:=TRUE; {get the name of the matrix output file; if it already exists, ask before overwriting}
  while inputCheck do 
     begin 
     inputCheck:=FALSE; 
     writeln; 
     write('Name of the substitution matrix output file: '); 
	 readln(OutputFileString); 
     if OutputFileString[length(OutputFileString)-3]<>'.' then 
        OutputFileString:=OutputFileString+'.txt'; 
     OutputFileFullString:=inputPath+OutputFileString; 
     assign(myMatrixOutput, OutputFileFullString); 
     {$I-} 
     reset(myMatrixOutput); 
     {$I+} 
     if IOresult=0 then {file already exists} 
        begin 
        writeln('File "', outputFileString, '" already exists. Do you want to overwrite it?'); 
        write('Enter "y" or "n": '); 
        readln(overWriteYes); 
        if overWriteYes='Y' then 
           overwriteYes:='y'; 
        if overWriteYes<>'y' then 
           begin 
           inputCheck:=TRUE; 
           close(myMatrixOutput); 
           end; 
        end; 
     end; 
  rewrite(myMatrixOutput); 

  DoAnalyzeInput; 

  writeln; 
  writeln; 
  write('All done. Enter "q" to quit: '); 
  readln(ActionChar); 

  close(myMatrixOutput); 
  close(myOutput); 
  close(myInput); 
  end. 