program AsymmetryScaler; {Written by: John H. McDonald} { Department of Biological Sciences} { University of Delaware} { Newark, Delaware 19716} { http://udel.edu/~mcdonald} { mcdonald@udel.edu} const initialMaxTAI=10.0; maxNumRandomInitial=1000; maxNumIterations=1000; var myOutput: text; {the output data file} myInput: text; {the input data file} lastSlashPos: integer; {used in separating file path from file name} stringIndex: integer; {used in separating file path from file name} numRandomInitial: integer; {number of random initial starting positions for amino acids on scale} numDecimalPlaces: integer; {number of decimal places of precision, 1, 2, or 3} maxUnitDivisions: integer; {number of positions in the scale} InputFileString: string[250]; {user-provided name of input file} OutputFileString: string[250]; {user-provided name of output file} outputFileFullString: string[250]; {path+name of output file} inputFileFullString: string[250]; {path+name of input file} inputPath: string[250]; {path of program file, prepended to user-input file names to make full names} actionChar: char; {y/n response from user} overWriteYes: char; {y/n response from user} inputCheck: boolean; {used in checking the user input for duplicate file names, etc.} {=========This function converts numbers to amino acid symbols.================} function NumberToLetter(x:integer):string; var number:integer; result:string; begin number:=x; case number of 1: result:='A'; 2: result:='C'; 3: result:='D'; 4: result:='E'; 5: result:='F'; 6: result:='G'; 7: result:='H'; 8: result:='I'; 9: result:='K'; 10: result:='L'; 11: result:='M'; 12: result:='N'; 13: result:='P'; 14: result:='Q'; 15: result:='R'; 16: result:='S'; 17: result:='T'; 18: result:='V'; 19: result:='W'; 20: result:='Y'; 21: result:='X'; 22: result:='-'; otherwise result:='?'; end; numberToLetter:=result; end; {====Reads in the data from the input file and analyzes it} procedure DoAnalyzeInput; var dummy: char; index, index2, index3, index4, IterationIndex, RandomIndex, randomInt: integer; nAB: array[1..20, 1..20] of integer; pAB: array[1..20, 1..20] of extended; EAB: extended; N: integer; BigPAB: array[1..20, 1..20] of extended; bestTAI: array[1..20] of extended; sortedTAI: array[0..21] of extended; sortedAA: array[0..21] of integer; TAI: array[1..20] of real; BigD, NewBigD, NewTAI, oldBigD, overallBestBigD, averageTAI, minTAI, maxTAI, TAIrange: extended; AAOrder: array[1..20] of integer; AAIndex1,AAIndex2, numDivisions, unitDivisions: integer; begin readln(myInput); {read and ignore first two lines of matrix file} readln(myInput); writeln(myOutput, 'D-score after each initial condition.'); writeln(myOutput, 'Replicate D-score'); writeln(myOutput, '--------- ----------'); overallBestBigD:=1000000; unitDivisions:=10; for index := 1 to 20 do {read n(obs) values for each pair of amino acids from matrix file} begin for index2:=1 to 6 do read (myInput, dummy); {ignore name of amino acid} for index2 := 1 to 20 do begin read(myInput, nAB[index, index2]); end; readln(myInput); end; {calculate pAB values, the observed proportion in each direction} for index := 1 to 20 do begin for index2 := 1 to 20 do begin pAB[index, index2] := 0; if nAB[index, index2] > 0 then pAB[index, index2] := nAB[index, index2] / (nAB[index, index2] + nAB[index2, index]); end; end; writeln; writeln(' Progress Bar'); {set up a progress bar on the screen} if numRandomInitial>19 then writeln('replicate 1 ',(numRandomInitial div 4):3,' ',(numRandomInitial div 2):3, ' ', 3*(numRandomInitial div 4):3, ' ',numRandomInitial:4); if numRandomInitial<19 then begin write('replicate 1'); for index:=2 to numRandomInitial do begin if (index mod 5)=0 then write(index:2); if ((index mod 5)<>0) and ((index mod 5)<>4) then write(' '); end; writeln; end; write(' '); for RandomIndex := 1 to numRandomInitial do {do replicates with random initial order and random initial TAI values} begin if numRandomInitial>1 then {write a progress bar} begin if round(20.0*randomIndex/numRandomInitial)>round(20.0*(randomIndex-1)/numRandomInitial) then write('|'); end; AAOrder[1]:=1; for index:=2 to 20 do AAOrder[index]:=99; for index:=2 to 20 do begin randomInt:=round(random*19.0+1.0); while randomInt<99 do begin if AAOrder[randomInt]>20 then begin AAOrder[randomInt]:=index; randomInt:=100; end; randomInt:=randomInt+1; if randomInt=21 then randomInt:=2; end; end; {assign initial TAI at random, except first AA is 5.0} TAI[1] := initialMaxTAI/2; for index := 2 to 20 do TAI[index] := random*initialMaxTAI; {calculate initial BigPAB values, the predicted proportion in each direction} for index := 1 to 20 do begin AAIndex1:=AAOrder[index]; for index2 := 1 to 20 do begin AAIndex2:=AAOrder[index2]; BigPAB[AAIndex1, AAIndex2] := 0; if AAIndex1 <> AAIndex2 then BigPAB[AAIndex1, AAIndex2] := (exp(TAI[AAIndex2] - TAI[AAIndex1])) / (1 + (exp(TAI[AAIndex2] - TAI[AAIndex1]))); end; end; {calculate initial BigD value, sum of weighted function of differences between observed and expected proportions} BigD := 0; for index := 1 to 20 do begin AAIndex1:=AAOrder[index]; for index2 := 1 to 20 do begin AAIndex2:=AAOrder[index2]; if AAIndex1 <> AAIndex2 then begin if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) > 0.000001 then {both non-zero} BigD := BigD + (nAB[AAIndex1, AAIndex2] + nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2]) + pAB[AAIndex2, AAIndex1] * ln(pAB[AAIndex2, AAIndex1] / BigPAB[AAIndex2, AAIndex1])); if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) < 0.000001 then {one is zero} begin if pAB[AAIndex1, AAIndex2] > 0.000001 then BigD := BigD + (nAB[AAIndex1, AAIndex2]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2])); if pAB[AAIndex2, AAIndex1] > 0.000001 then BigD := BigD + (nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex2, AAIndex1] * ln(pAB[AAIndex2, AAIndex1] / BigPAB[AAIndex2, AAIndex1])); end; end; end; end; {do a bunch of iterations} oldBigD:=100000; {bigD is the measure of overall fit; lower numbers are better fit} iterationIndex:=0; while (iterationIndexmaxTAI then maxTAI:=TAI[index3]; end; TAIrange:=maxTAI-minTAI; minTAI:=trunc(minTAI-0.1*TAIrange); maxTAI:=trunc(maxTAI+0.1*TAIrange+1); TAIrange:=maxTAI-minTAI; numDivisions:=round(TAIrange*unitDivisions); {now change the TAI values for each amino acid at a time, except first is fixed at 5, calculate new BigPAB and new BigD} for index3 := 2 to 20 do begin for index4 := 0 to NumDivisions do begin TAI[AAOrder[index3]] := minTAI+(TAIrange/(NumDivisions*1.0)) * index4; {calculate BigPAB values} for index := 1 to 20 do begin AAIndex1:=AAOrder[index]; for index2 := 1 to 20 do begin AAIndex2:=AAOrder[index2]; BigPAB[AAIndex1, AAIndex2] := 0; if AAIndex1 <> AAIndex2 then BigPAB[AAIndex1, AAIndex2] := (exp(TAI[AAIndex2] - TAI[AAIndex1])) / (1 + (exp(TAI[AAIndex2] - TAI[AAIndex1]))); end; end; {calculate BigD value} NewBigD := 0; for index := 1 to 20 do begin AAIndex1:=AAOrder[index]; for index2 := 1 to 20 do begin AAIndex2:=AAOrder[index2]; if AAIndex1 <> AAIndex2 then begin if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) > 0.000001 then NewBigD := NewBigD + (nAB[AAIndex1, AAIndex2] + nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2]) + pAB[AAIndex2, AAIndex1] * ln(pAB[AAIndex2, AAIndex1] / BigPAB[AAIndex2, AAIndex1])); if (pAB[AAIndex1, AAIndex2] * pAB[AAIndex2, AAIndex1]) < 0.000001 then begin if pAB[AAIndex1, AAIndex2] > 0.000001 then NewBigD := NewBigD + (nAB[AAIndex1, AAIndex2]) * (pAB[AAIndex1, AAIndex2] * ln(pAB[AAIndex1, AAIndex2] / BigPAB[AAIndex1, AAIndex2])); if pAB[AAIndex2, AAIndex1] > 0.000001 then NewBigD := NewBigD + (nAB[AAIndex2, AAIndex1]) * (pAB[AAIndex2, AAIndex1] * ln(pAB[AAIndex2, AAIndex1] / BigPAB[AAIndex2, AAIndex1])); end; end; end; end; if NewBigD < (BigD + 0.000001) then begin BigD := NewBigD; NewTAI := TAI[AAOrder[index3]]; end; end; {each division} TAI[AAOrder[index3]] := NewTAI; end; {each amino acid} if BigD>(oldBigD-0.000001) then {done when BigD stops getting better} begin if unitDivisions>=maxUnitDivisions then begin writeln(myOutput, randomIndex : 8, BigD : 13 : 3); if BigD<(overallBestBigD-0.000001) then begin overallBestBigD:=BigD; for index := 1 to 20 do bestTAI[index]:= TAI[index]; end; iterationIndex:=maxNumIterations; end; if unitDivisions(sortedTAI[index2]+0.00001)) and (bestTAI[index]<(sortedTAI[index2+1]+0.00001)) then begin for index3:=21 downto (index2+2) do begin sortedTAI[index3]:=sortedTAI[index3-1]; sortedAA[index3]:=sortedAA[index3-1]; end; sortedTAI[index2+1]:=bestTAI[index]; sortedAA[index2+1]:=index; end; end; end; writeln(myOutput); writeln(myOutput, 'AA Asymmetry Index'); writeln(myOutput, '-- ---------------'); for index:=1 to 20 do writeln(myOutput,' ', numberToLetter(sortedAA[index]), sortedTAI[index]:18:5); writeln(myOutput); writeln(myOutput); writeln(myOutput, 'Sp.1-->Sp.2 N p E(p)'); writeln(myOutput, '----------- ------ ------ ------'); for index:=1 to 20 do begin for index2:=(index+1) to 20 do begin EAB := (exp(sortedTAI[index2] - sortedTAI[index])) / (1 + (exp(sortedTAI[index2] - sortedTAI[index]))); N:=nAB[sortedAA[index], sortedAA[index2]]+nAB[sortedAA[index2], sortedAA[index]]; writeln(myOutput, ' ',numberToLetter(sortedAA[index]),'-->', numberToLetter(sortedAA[index2]),' ',N:6, ' ',pAB[sortedAA[index], sortedAA[index2]]:6:4,' ', EAB:6:4); end; end; end; begin randomize; writeln('================================================================='); writeln; writeln(' Welcome to AsymmetryScaler.'); writeln; writeln('For help on formatting the input file and other documentation, '); writeln(' or a version of this program for a different operating system, '); writeln(' see http://udel.edu/~mcdonald/asymmetryprograms.html.'); writeln; inputCheck:=TRUE; while inputCheck do begin writeln('The input file must be a plain text file (extension .txt)'); writeln(' in the same folder as this program.'); writeln; write('Name of the input file: '); readln(InputFileString); if InputFileString[length(InputFileString)-3]<>'.' then InputFileString:=InputFileString+'.txt'; {The next few lines get the full path to the program, remove the name of the program, then add the name of the input file. The result is that "inputFileString" is converted from "input.txt" to "/Users/username/sliderfolder/input.txt" (on a Mac) or "C:\sliderfolder\input.txt" (on Windows).} inputPath:=paramStr(0); lastSlashPos:=0; for stringIndex:=1 to length(inputPath) do begin if (inputPath[stringIndex]='/') or (inputPath[stringIndex]='\') then lastSlashPos:=stringIndex; end; delete(inputPath, lastSlashPos+1, length(inputPath)-lastSlashPos); inputFileFullString:=inputPath+inputFileString; assign(myInput, inputFileFullString); {$I-} reset(myInput); {$I+} if IOresult=0 then inputCheck:=FALSE; if inputCheck then writeln('I can''t find a file named "',inputfilestring,'", try again.'); end; inputCheck:=TRUE; {get the name of the output file; if it already exists, ask before overwriting} while inputCheck do begin inputCheck:=FALSE; writeln; write('Name of the output file: '); readln(OutputFileString); if OutputFileString[length(OutputFileString)-3]<>'.' then OutputFileString:=OutputFileString+'.txt'; OutputFileFullString:=inputPath+OutputFileString; assign(myOutput, OutputFileFullString); {$I-} reset(myOutput); {$I+} if IOresult=0 then {file already exists} begin writeln('File "', outputfilestring, '" already exists. Do you want to overwrite it?'); write('Enter "y" or "n": '); readln(overWriteYes); if overWriteYes='Y' then overWriteYes:='y'; if overWriteYes<>'y' then begin inputCheck:=TRUE; close(myOutput); end; end; end; rewrite(myOutput); writeln; inputCheck:=TRUE; while inputCheck do begin write('Number of replicates with random initial orders (1 to ', maxNumRandomInitial:4,'): '); readln(numRandomInitial); if (numRandomInitial>0) and (numRandomInitial<=maxNumRandomInitial) then inputCheck:=FALSE; if inputCheck then writeln('Enter a number from 1 to ', maxNumRandomInitial:4); end; writeln; writeln; inputCheck:=TRUE; while inputCheck do begin writeln('Do you want the asymmetry index precise to 1, 2, or 3 decimal places?'); writeln('(The program is VERY slow with 3 decimal places.)'); write('Enter 1, 2, or 3: '); readln(numDecimalPlaces); if (numDecimalPlaces=1) or (numDecimalPlaces=2) or (numDecimalPlaces=3) then inputCheck:=FALSE; if inputCheck then writeln('Enter 1, 2, or 3.'); end; if numDecimalPlaces=1 then maxUnitDivisions:=10; if numDecimalPlaces=2 then maxUnitDivisions:=100; if numDecimalPlaces=3 then maxUnitDivisions:=1000; writeln; DoAnalyzeInput; writeln; writeln; write('All done. Enter "q" to quit: '); readln(ActionChar); close(myOutput); close(myInput); end.