program DNASlider;
{DNA Slider is a public domain program for performing}
{simulations-based statistical tests on DNA sequence polymorphism and divergence data.}
{The tests are described in McDonald, J.H. 1998. Improved tests for heterogeneity}
{across a region of DNA sequence in the ratio of polymorphism to divergence. Molecular}
{Biology and Evolution 15: 377-384. The latest version of the}
{program is available from http://udel.edu/~mcdonald}
{Version 2.0, September 2005: This version. Ported to Windows and Mac OSX using}
{the Free Pascal Compiler. The program now uses a simple console interface, }
{rather than a graphical user interface. I eliminated the two Goss and Lewontin statistics and}
{increased the maximum size of several variables.}
{Version 1.11, January, 1998: Enlarged some of the parameters, such as the}
{length of the sequence that could be analyzed; made minor changes to the help screens.}
{Version 1.1, June 1997: Added help screens and made other improvements to the user interface.}
{Version 1.0, December 1996.}
{Written by: John H. McDonald}
{ Department of Biological Sciences}
{ University of Delaware}
{ Newark, Delaware 19716}
{ http://udel.edu/~mcdonald}
{ mcdonald@udel.edu}
const
{These constants are maximum values of various parameters; contact me if you need }
{me to recompile the program with a larger value for some of these. }
MaxNumSequences = 500; {The maximum number of alleles.}
MaxNumPoly = 1000; {The maximum number of polymorphisms.}
MaxNumFixed = 1000; {The maximum number of fixed differences.}
MaxNumRunsObs = 2000; {The maximum number of runs observed in the data.}
MaxNumSubs = 2000; {The maximum total number of variable sites.}
MaxNumReps = 100000; {The maximum number of replications you can run.}
MaxNumSites = 50000; {The maximum number of nucleotide sites in the sequence.}
MaxNumRecomb = 20; {The maximum number of recombination parameters.}
MaxRecomb = 500; {The maximum value for the recombination parameter.}
{The last three constants refer to parameters used in the simulations; if the program crashes }
{on big data sets (long sequences with lots of alleles), these may need to be increased.}
MinAlleleName = -1000; {The minimum negative name for allele.}
MaxAlleleName = 1000; {The maximum positive name for allele.}
MaxNumFrags = 10000; {The maximum number of fragments.}
VertSize = 20; {The height of the crude console graphs.}
HorizSize = 60; {The width of the crude console graphs.}
MaxGGraph = 36; {The maximum G-value shown on the console graph.}
type
AlleleArrayType = array[MinAlleleName..MaxAlleleName] of integer;
var
InputFileString,OutputFileString, inputFileFullString, outputFileFullString, inputPath: string[250]; {file names}
myOutput, myInput: text; {The output and input files.}
myDidReadInput, myFirstOutput, rangeBad: Boolean;
actionChar, overwriteYes: char;
recombIndex, stringIndex, GIndex, lastSlashPos: integer;
{The following variables are input from the parameter dialog.}
GeneName: string[250]; {The name of the gene.}
NameSpeciesA: string[250]; {The name of the species with polymorphism data.}
NameSpeciesB: string[250]; {The name of the outgroup species.}
NumSequences: integer; {The number of sequences sampled.}
NumSites: longint; {The number of nucleotides in the sequence.}
NumRecomb: integer; {The number of different recombination parameters used.}
RecombArray: array[1..MaxNumRecomb] of real; {The recombination parameters.}
NumReps: longint; {The number of replications to run.}
GraphWindowSize: integer; {The size of the sliding window for the graph.}
{The following variables are calculated from the data set.}
NumPoly: longint; {The number of polymorphisms observed.}
NumFixed: longint; {The number of fixed differences observed.}
NumVarSites: longint; {The total number of polymorphic and fixed sites.}
MaxGReal: extended; {The maximum G value in the observed data.}
maxAverageGReal: extended; {The maximum average G value in the observed data.}
KSReal: extended; {The Kolmogorov-Smirnov value in the observed data.}
numRunsReal: longint; {The number of runs in the observed data.}
minWindowSize, maxWindowSize: longint; {The smallest and largest windows used for calculating }
{the average and maximum sliding G values.}
{The next two variables are read from the data file.}
mySiteNumber: array[1..MaxNumSites] of integer; {The site number of each polymorphism or}
{fixed difference.}
myRealPoly: array[1..MaxNumSubs] of char; {P if poly, F if fixed difference.}
{The following variables are used in the simulations; they are global variables only because}
{they are so large.}
SitePolyTime: array[0..MaxNumSites] of real; {The length of time summed across all branches }
{within species A, for each site.}
SiteFixedTime: array[0..MaxNumSites] of real; {The length of time from coalescence of A, }
{to coalescence of A+B, plus B, for each site.}
SiteCoalesceTime: array[0..MaxNumSites] of real; {For each site, the oldest coalescence }
{of two alleles from species A.}
FragAllele: array[0..MaxNumFrags] of integer; {The allele name for each fragment.}
FragStart: array[0..MaxNumFrags] of integer; {The starting position of each fragment.}
FragEnd: array[0..MaxNumFrags] of integer; {The ending position of each fragment.}
AllelePresence: array[MinAlleleName..MaxAlleleName] of integer; {1 if that allele is present, 0 if it is not.}
AlleleStart: array[MinAlleleName..MaxAlleleName] of integer; {The starting position of each allele.}
AlleleEnd: array[MinAlleleName..MaxAlleleName] of integer; {The ending position of each allele.}
SiteSp2Allele: array[0..MaxNumSites] of integer; {For each site, the name of the allele}
{in species B.}
SitePosition: array[1..MaxNumSubs] of real; {The position of sites in the simulations.}
SitePoly: array[1..MaxNumSubs] of integer; {'1' for polymorphism, '0' for fixed.}
{The next variables keep track of the significance of the sliding G values for each window size.}
sigGCountArray: array[1..maxNumRecomb, 1..maxnumsubs] of longint;
maxGWindowArray: array[1..MaxNumSubs] of extended;
maxGCountArray: array[1..MaxNumRecomb, 0..1000] of longint;
maxGCritOverall, maxGCritRecomb: extended;
maxGRecombIndex: integer;
{====================================================================================================}
{This function returns the greater of two integers.}
function GreaterInteger (x, y: integer): integer;
var
First, Greater: integer;
begin
First := x;
Greater := y;
if First > Greater then
Greater := First;
GreaterInteger := Greater;
end;
{====================================================================================================}
{This function returns the lesser of two integers.}
function LesserInteger (x, y: integer): integer;
var
First, Lesser: integer;
begin
First := x;
Lesser := y;
if First < Lesser then
Lesser := First;
LesserInteger := Lesser;
end;
{====================================================================================================}
{This function returns a number times its natural log, or 0 if the number is 0.}
function flnf (x: integer): extended;
var
F: integer;
FResult: extended;
begin
F := x;
FResult := 0;
if F > 0 then
FResult := F * ln(F);
flnf := FResult;
end;
{====================================================================================================}
{This procedure reads in the data from the input file, then calculates four statistics:}
{the number of runs (McDonald 1996, Mol. Biol. Evol. 13:253-260),}
{the Kolmogorov-Smirnov statistic (Sokal and Rohlf 1981, Biometry, p. 719-720),}
{and the maximum sliding G-statistic and mean sliding G-statistic (McDonald 1998, Mol. Biol. Evol. 15: 377-384.}
{The two sliding G-statistics are found by calculating the G-statistic}
{(Sokal and Rohlf 1981, p. 709) for the 2x2 table comparing}
{the number of polymorphisms and fixed differences inside and outside each window.}
{All possible window positions are tried, and all window sizes are tried in which}
{the smallest expected value in the 2x2 table is 5 or more.}
{[or 3 if there are fewer than 11 polymorphisms or fixed differences.]}
{The maximum sliding G is the largest of these G statistics.}
{The mean sliding G statistic is found by taking the mean across all window positions}
{of these G statistics for each window size and taking the largest of these means.}
procedure DoAnalyzeInput;
var
A, B, C, D, Pindex, startIndex, siteIndex, cumPoly: longint;
maxAverageGWindowSize,maxGWindowSize, windowSize, stringIndex: integer;
averageGReal,GReal, KS: extended;
tempString: string[63];
numSubsOK, stringUnread: boolean;
begin
readln(myInput, geneName);{The first line of the data file contains the name of the gene.}
readln(myInput, NumVarSites); {The second line has an integer indicating the number of variable sites.}
numSubsOK := TRUE;
myDidReadInput:= TRUE;
numPoly := 0;
numFixed := 0;
if numSubsOK then
begin
for Pindex := 1 to NumVarSites do {each remaining line contains a site number, followed by }
{P or F for polymorphic or fixed}
begin
readln(myInput, mySiteNumber[Pindex], tempString);
stringUnread:=TRUE;
for stringIndex := 1 to length(tempString) do {lowercase is acceptable input}
begin
if ((tempString[stringIndex] = 'p') or (tempString[stringIndex] = 'P')) and stringUnread then
begin
myRealPoly[Pindex] := 'P';
stringUnread:= FALSE;
end;
if ((tempString[stringIndex] = 'f') or (tempString[stringIndex] = 'F')) and stringUnread then
begin
myRealPoly[Pindex] := 'F';
stringUnread:= FALSE;
end;
end;
if (myRealPoly[Pindex] = 'P') then
numPoly := numPoly + 1;
if (myRealPoly[Pindex] = 'F') then
numFixed := numFixed + 1;
end;
end;
if myDidReadInput = TRUE then
begin
{Calculate the number of runs.}
numRunsReal := 1;
for SiteIndex := 1 to (NumVarSites - 1) do
begin
if (myRealPoly[SiteIndex] <> myRealPoly[SiteIndex + 1]) then
numRunsReal := numRunsReal + 1;
end;
{Calculate the Kolmogorov-Smirnov statistic.}
KSReal := 0;
cumPoly := 0;
for SiteIndex := 1 to NumVarSites do
begin
if (myRealPoly[SiteIndex] = 'P') then
cumPoly := cumPoly + 1;
KS := (abs(cumPoly - (SiteIndex * (numPoly / NumVarSites)))) / NumVarSites;
if KS > KSReal then
KSReal := KS;
end;
{Calculate maximum sliding G and mean sliding G.}
MaxGReal := 0;
MaxAverageGReal := 0;
MinWindowSize := round(5.99 + (5 * (GreaterInteger(NumFixed, NumPoly) /
LesserInteger(NumFixed, NumPoly))));
if LesserInteger(NumFixed, NumPoly) < 11 then
MinWindowSize := round(3.99 + (3 * (GreaterInteger(NumFixed, NumPoly) /
LesserInteger(NumFixed, NumPoly))));
MaxWindowSize := NumVarSites - minWindowSize;
if minWindowSize < MaxWindowSize then
begin
for WindowSize := MinWindowSize to maxWindowSize do
begin
MaxGWindowArray[windowSize] := 0;
AverageGReal := 0;
for StartIndex := 1 to (1 + NumVarSites - WindowSize) do
begin
A := 0;
for SiteIndex := StartIndex to (StartIndex + WindowSize - 1) do
begin
if (myRealPoly[siteIndex] = 'P') then
A := A + 1; {number of polymorphic sites inside window}
end;
B := WindowSize - A; {number of fixed differences inside window}
C := NumPoly - A; {number of polymorphic sites outside window}
D := NumFixed - B; {number of fixed differences outside window}
GReal := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) -
flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D));
AverageGReal := AverageGReal + (GReal / (1 + NumVarSites - windowSize));
if GReal > MaxGWindowArray[WindowSize] then
MaxGWindowArray[WindowSize] := GReal;
if GReal > MaxGReal then
begin
MaxGReal := GReal;
maxGWindowSize := WindowSize;
end;
end;
if AverageGReal > MaxAverageGReal then
begin
MaxAverageGReal := AverageGReal;
maxAverageGWindowSize := windowSize;
end;
end;
end;
{Output the results to the output file.}
if myFirstOutput then
begin
writeln(myOutput, '============================================================');
writeln(myOutput);
writeln(myOutput);
myFirstOutput := FALSE;
end;
writeln(myOutput, 'There are ', numRunsReal : 4, ' runs in your data on ', GeneName, '.');
writeln(myOutput, 'The Kolmogorov-Smirnov statistic is ', KSReal : 8 : 6, '. ');
if minWindowSize < maxWindowSize then {There were enough variable sites to do the G tests.}
begin
writeln(myOutput);
writeln(myOutput, 'A window of ', maxAverageGWindowSize : 4,
' variable sites produces the largest average sliding G value.');
writeln(myOutput, 'For a window size of ', maxAverageGWindowSize : 4,
' the average sliding G value is ', maxAverageGReal : 8 : 4, '.');
writeln(myOutput);
writeln(myOutput, 'A window of ', maxGWindowSize : 4,
' variable sites produces the largest maximum sliding G value.');
writeln(myOutput, 'For a window size of ', maxGWindowSize : 4,
' the maximum sliding G value is ', maxGReal : 8 : 4, '.');
writeln(myOutput);
end;
writeln(myOutput);
if minWindowSize >= maxWindowSize then {Too few variable sites to do the G tests.}
begin
if numFixed > numPoly then
writeln(myOutput, 'With only ', numPoly : 2,
' polymorphisms you cannot do the sliding window G-tests.');
if numFixed <= numPoly then
writeln(myOutput, 'With only ', numFixed : 2,
' fixed differences you cannot do the sliding window G-tests.');
end;
end;
end;
{====================================================================================================}
{This procedure simulates neutral evolution, using the parameters entered from the parameter window}
{and calculated from the input data. Each simulated sequence starts at the present as a single}
{"allele" consisting of a single "fragment." Going back in time, a recombination event will break up}
{an allele into two alleles. A coalescence event takes two alleles and combines them into one; the}
{effect on the number of fragments depends on the pattern of overlap between the fragments in the two}
{alleles. After recombination and coalescence, it is possible for a single allele to consist of two}
{or more non-contiguous fragments. As coalescence and recombination are simulated, the total amount}
{of time on the within-species part of the phylogeny is recorded for each nucleotide site, as is the}
{time on the between-species part. After coalescence and recombination are simulated, the number of}
{polymorphic sites in the real data is distibuted across the simulated sequence in proportion to the}
{within-species time for each site; fixed differences are likewise distributed.}
{The procedure then calculates the four statistics for the simulated data and compares them to}
{the statistics for the real data. The procedure outputs the results to the screen and to the}
{output file. For the maximum sliding G test, the output includes the significance level for each}
{size of window applied to the real data. This is so that, for example, the user could see that a}
{very large and heuristically unenlightening window gave the largest G and thus the "best"}
{significance, but a smaller window had almost as great a G and yielded a much more detailed}
{picture of the peaks and valleys of polymorphism to divergence ratio. }
procedure DoRun;
var
TimePoly: real; {total time on all lineages within species A back to the most}
{recent common ancestor of them all}
TimeCoal: real; {time from the present back to the most recent common ancestor}
{of the sampled sequences in species A}
AlleleIndex: longint; {index used when running through the different alleles}
TimeFixed: real; {time from the most recent common ancestor of species A, back to}
{the coalescence of A+B, plus species B}
TimeSplit: real; {time from the present back to the split between species A+B;}
{note that while this is the most recent possible time for}
{the coalescence of A+B sequences, on average that will happen}
{1 time unit [2N generations] earlier)}
RecombIndex: integer; {index of the recombination parameter being used}
Recomb: real; {recombination parameter (4Nr) }
SigGCount, SigRunsCount, SigKSCount,
sigAverageGCount: array[1..maxNumRecomb] of longint; {number of simulations with the same or more extreme value than}
{that observed}
SiteIndex: longint; {index used when stepping through each site}
RepIndex: longint; {index used to indicate which replicate is being run}
LineagePolyTime: real; {the time during which there is more than one allele for at}
{least one site}
AlleleMaxA: integer; {the largest allele name in species A}
AlleleMinA: integer; {the smallest allele name in species A}
AlleleMaxB: integer; {the largest allele name in species B}
AlleleMinB: integer; {the smallest allele name in species B}
MaxFrag: longint; {the largest fragment name}
MinFrag: longint; {the smallest fragment name}
AlleleCountA: longint; {the number of alleles in species A}
AlleleCountB: longint; {the number of alleles in species B}
TotalAlleleLengthA: longint; {the number of possible recombination sites in all the alleles}
{in species A}
TotalAlleleLengthB: longint; {the number of possible recombination sites in all the alleles}
{in species B}
TotalFragLengthA: longint; {the total number of sites in all fragments in species A}
TotalFragLengthB: longint; {the total number of sites in all fragments in species B}
FragIndex: longint; {index used for stepping through the fragments}
EventTime: real; {the time to the next coalescence or recombination event}
RecombYes: string[1]; {'y' if the next event is a recombination, 'n' if it is}
{a coalescence}
RecombProb: real; {the probability that the next event is a recombination}
FirstAllele: integer; {the first of two alleles to be coalesced}
TempAllele: integer; {used in picking the two alleles to be coalesced}
SecondAllele: integer; {the second of two alleles to be coalesced}
TempMaxFrag: longint; {when coalescing two alleles, the largest name of a fragment}
FragIndex2: longint; {index used in coalescing two fragments}
PositionOne: longint; {when coalescing two alleles, the starting position of the first}
{fragment}
PositionTwo: longint; {ending position of first fragment}
PositionThree: longint; {starting position of second fragment}
PositionFour: longint; {ending position of second fragment}
NewFragOne: longint; {when coalescing, name of first new fragment}
NewFragTwo: longint; {when coalescing, name of second new fragment}
SiteIndex2: longint; {index used for stepping through sites}
RecombSite: real; {number used in choosing which allele to recombine within}
RecombPosition: longint; {position within allele at which recombination takes place}
FragName: longint; {when getting information about fragments, name of fragment}
SiteRandom: real; {random number from 0 to 1, used to assign position of fixed}
{and polymorphic sites in simulations}
SortTest: real; {test variable used in sorting}
SortIndex: longint; {index used during the sorting process}
SmallPosition: longint; {variable used in sorting}
TempPoly: integer; {'1' for polymorphism, '0' for fixed; after sorting}
{values used for calculating the statistics for the simulated data}
MaxGSim, GSim: extended;
NumRunsSim, cumPoly, cumGCountOld, cumGCountNew: longint;
KS, KSSim, maxAverageGSim, averageGSim: extended;
StartIndex: longint;
A, B, C, D: longint;
windowSize: integer;
begin
{Write some stuff to the output file.}
writeln(myOutput);
writeln(myOutput, 'Number of sequences from ', NameSpeciesA, ': ', NumSequences : 4);
writeln(myOutput, 'Outgroup species: ', NameSpeciesB);
writeln(myOutput, 'Length of ', GeneName, ': ', NumSites : 5);
writeln(myOutput, 'Number of polymorphisms: ', NumPoly : 4);
writeln(myOutput, 'Number of fixed differences: ', NumFixed : 4);
writeln(myOutput, 'Number of replicate simulations: ', NumReps : 5);
writeln(myOutput);
writeln(myOutput, 'Recomb. max G >= runs <= K-S >= avg. G >=');
if minWindowSize < maxWindowSize then
writeln(myOutput, 'parameter ', MaxGReal : 8 : 4, numRunsReal : 8, ' ', KSReal : 13 : 6,
maxAverageGReal : 11 : 4)
else
writeln(myOutput, 'parameter ', ' ------ ', numRunsReal : 8, KSReal : 13 : 6,
' ------- ');
{Some parameters are estimated from the input data, using equation 5 of Hudson 1990, Oxford Surv.}
{Evol. Biol. 7:1-44. The mean time for J alleles to coalesce into J-1 alleles is 1 over the number}
{of combinations of J items taken two at a time, or 1/((J*(J-1))/2). Of course,}
{time is measured in units of 2N generations, where N is the number of diploid individuals.}
TimePoly := 0;
TimeCoal := 0;
NumVarSites := NumFixed + NumPoly;
for AlleleIndex := 2 to NumSequences do
begin
TimePoly := TimePoly + (2 / (AlleleIndex - 1));
TimeCoal := TimeCoal + (2 / (AlleleIndex * (AlleleIndex - 1)));
end;
{Assuming the mutation rate hasn't changed, and that the species are close enough that very few}
{multiple hits have occurred, the ratio of the fixation time to the polymorphism time}
{can be estimated from the ratio of the number of fixed differences to the number of polymorphisms.}
TimeFixed := TimePoly * (NumFixed / NumPoly);
{Assuming that the population size in the ancestral species was the same as in the two descendant}
{species, the average time for an allele from species A to coalesce with an allele from species B,}
{once the two species have coalesced into one species, is 2N generations, or 1 time unit.}
{Therefore the time back to when species A and B coalesced is estimated as the time when alleles}
{from A coalesced with alleles from B, minus 1.}
TimeSplit := ((TimeFixed + TimeCoal) / 2) - 1;
{Initialize a couple of things. }
SitePolyTime[0] := 0;
SiteFixedTime[0] := 0;
{Do a set of replicate simulations for each recombination parameter chosen.}
RecombIndex := 1;
writeln;
writeln(' Progress Bar'); {set up a progress bar on the screen}
writeln('replicate 1 ',(numReps div 4):7,' ',(numReps div 2):7, ' ', 3*(numreps div 4):7, ' ',numReps:7);
repeat
begin
Recomb := RecombArray[RecombIndex];
writeln;
write('recomb #', recombIndex:2, ' ');
{Initialize variables used in counting the number of simulations with each number of runs.}
SigGCount[recombIndex] := 0;
SigRunsCount[recombIndex] := 0;
SigKSCount[recombIndex] := 0;
sigAverageGCount[recombIndex] := 0;
for windowSize := minWindowSize to maxWindowSize do
begin
sigGCountArray[recombIndex, windowSize] := 0;
for GIndex:=0 to 1000 do
begin
maxGCountArray[recombIndex, GIndex]:=0;
end;
end;
{Here is where each replicate simulation starts.}
RepIndex := 0;
repeat
RepIndex := RepIndex + 1;
if numReps>49 then {write a progress bar}
begin
if round(50.0*repIndex/numReps)>round(50.0*(repIndex-1)/numReps) then
write('|');
end;
{More initializing. }
LineagePolyTime := 0;
AlleleMaxA := NumSequences;
AlleleMinA := 1;
MaxFrag := NumSequences;
AlleleCountA := NumSequences;
TotalAlleleLengthA := (NumSites - 1) * NumSequences; {no recomb. at end of last site,}
{thus Numsites-1. }
TotalFragLengthA := NumSites * NumSequences;
{Initialize values for species A.}
for FragIndex := 1 to NumSequences do
begin
FragAllele[FragIndex] := FragIndex;
FragStart[FragIndex] := 0;
FragEnd[FragIndex] := NumSites;
AllelePresence[FragIndex] := 1;
AlleleStart[FragIndex] := 0;
AlleleEnd[FragIndex] := NumSites;
end;
for SiteIndex := 1 to NumSites do
begin
SitePolyTime[SiteIndex] := 0;
SiteFixedTime[SiteIndex] := 0;
SiteCoalesceTime[SiteIndex] := 0;
end;
{Initialize the rest of the arrays with zeros.}
for FragIndex := (NumSequences + 1) to MaxNumFrags do
begin
FragAllele[FragIndex] := 0;
FragStart[FragIndex] := 0;
FragEnd[FragIndex] := 0;
end;
for FragIndex := (NumSequences + 1) to MaxAlleleName do
begin
AllelePresence[FragIndex] := 0;
AlleleStart[FragIndex] := 0;
AlleleEnd[FragIndex] := 0;
end;
for FragIndex := MinAlleleName to 0 do
begin
AllelePresence[FragIndex] := 0;
AlleleStart[FragIndex] := 0;
AlleleEnd[FragIndex] := 0;
end;
{This part of the program coalesces the alleles within species A, back until the species split.}
while LineagePolyTime < TimeSplit do
begin
if (Recomb < 0.01) and (AlleleCountA = 1) then {If there's no recombination and }
LineagePolyTime := TimeSplit * 100; {all the alleles within species A}
{are coalesced, go on to coalesce}
{with species B.}
if (Recomb > 0.01) or (AlleleCountA > 1) then
begin
{The next equation is modified from Hudson 1983, p. 197 (Theor. Pop. Biol 23:183-201).}
{See also Hudson 1990, p. 42. It determines the time back to the next event, either a recombination}
{or coalescence event. The recombination parameter is the parameter for one allele that is}
{NumSites long, and therefore has (NumSites-1) possible recombination sites. }
EventTime := (-1 * ln(random)) / ((Recomb * (TotalAlleleLengthA /
(NumSites - 1.0))) + (AlleleCountA * (AlleleCountA - 1)));
LineagePolyTime := LineagePolyTime + EventTime;
end;
if LineagePolyTime < TimeSplit then {The two species are still separate.}
begin
{The next equation is modified from Hudson 1983, p. 197.}
RecombProb := (Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) /
((Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) +
(AlleleCountA * (AlleleCountA - 1)));
{Use random number to decide whether event is recombination or coalescence. }
RecombYes := 'n';
if random < RecombProb then
RecombYes := 'y';
{Here is where a coalescence event is simulated.}
if RecombYes = 'n' then
begin
{The two alleles to coalesce are chosen at random.}
FirstAllele := 0;
while FirstAllele = 0 do
begin
TempAllele := round(AlleleMinA + ((AlleleMaxA - (AlleleMinA - 1)) *
random));
if AllelePresence[TempAllele] > 0.5 then
FirstAllele := TempAllele;
end;
SecondAllele := 0;
while SecondAllele = 0 do
begin
TempAllele := round(AlleleMinA + ((AlleleMaxA - (AlleleMinA - 1)) *
random));
if (AllelePresence[TempAllele] > 0.5) and (TempAllele <> FirstAllele) then
SecondAllele := TempAllele;
end;
{Each fragment is compared with every other fragment. If a fragment consists of allele two, it is}
{renamed allele one. If an allele two fragment overlaps with an allele one fragment, the overlapping}
{part of allele two disappears, the overlapping part of allele one becomes a new fragment, and a}
{new fragment is made from the non-overlapping part of each fragment. The little comment diagrams}
{that illustrate this process will look best in a fixed-width font such as Geneva or Courier.}
FragIndex := 0;
TempMaxFrag := MaxFrag;
while FragIndex <= MaxFrag do
begin
FragIndex := FragIndex + 1;
if FragAllele[FragIndex] = FirstAllele then
begin
for FragIndex2 := 1 to MaxFrag do
begin
if FragAllele[FragIndex2] = SecondAllele then {One fragment has}
{allele one, one fragment has allele two.}
begin
PositionOne := FragStart[FragIndex];
PositionTwo := FragEnd[FragIndex];
PositionThree := FragStart[FragIndex2];
PositionFour := FragEnd[FragIndex2];
if (PositionTwo > PositionThree) and
(PositionFour > PositionOne) then
{There is overlap between the two fragments.}
begin
for SiteIndex := (1 +
greaterinteger(PositionOne,PositionThree))
to lesserinteger(PositionTwo, PositionFour) do
{Add appropriate times to site array. }
begin
SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex] +
LineagePolyTime;
SiteCoalesceTime[SiteIndex] := LineagePolyTime;
end;
FragAllele[FragIndex2] := 0;
NewFragOne := MaxFrag + 1;
NewFragTwo := MaxFrag + 2;
if (PositionOne < PositionThree) and
(PositionTwo < PositionFour) then
begin
FragStart[FragIndex] := PositionThree;
{fragment allele } FragAllele[NewFragOne] := FirstAllele;
{ 1 ---------- 1 } FragStart[NewFragOne] := PositionOne;
{ 2 ----------- 2 } FragEnd[NewFragOne] := PositionThree;
{ V } FragAllele[NewFragTwo] := SecondAllele;
{ 1 ------- 1 } FragStart[NewFragTwo] := PositionTwo;
{ 3 --- 1 } FragEnd[NewFragTwo] := PositionFour;
{ 4 ---- 2 } TempMaxFrag := NewFragTwo;
end;
if (PositionOne < PositionThree) and
(PositionTwo = PositionFour) then
begin
FragStart[FragIndex] := PositionThree;
{ 1 ---------- 1 } FragAllele[NewFragOne] := FirstAllele;
{ 2 ------- 2 } FragStart[NewFragOne] := PositionOne;
{ V } FragEnd[NewFragOne] := PositionThree;
{ 1 ------- 1 } TempMaxFrag := NewFragOne;
{ 3 --- 1 } end;
if (PositionOne < PositionThree) and
(PositionTwo > PositionFour) then
begin
FragStart[FragIndex] := PositionThree;
{ 1 ------------- 1 } FragEnd[FragIndex] := PositionFour;
{ 2 ------- 2 } FragAllele[NewFragOne] := FirstAllele;
{ V } FragStart[NewFragOne] := PositionOne;
{ 1 ------- 1 } FragEnd[NewFragOne] := PositionThree;
{ 3 --- 1 } FragAllele[NewFragTwo] := FirstAllele;
{ 4 --- 1 } FragStart[NewFragTwo] := PositionFour;
FragEnd[NewFragTwo] := PositionTwo;
TempMaxFrag := NewFragTwo;
end;
if (PositionOne = PositionThree) and
(PositionTwo < PositionFour) then
begin
FragAllele[NewFragOne] := SecondAllele;
{ 1 ---------- 1 } FragStart[NewFragOne] := PositionTwo;
{ 2 ------------- 2 } FragEnd[NewFragOne] := PositionFour;
{ V } TempMaxFrag := NewFragOne;
{ 1 ---------- 1 } end;
{ 3 --- 2 } if (PositionOne = PositionThree) and
(PositionTwo > PositionFour) then
begin
FragEnd[FragIndex] := PositionFour;
{ 1 ------------- 1 } FragAllele[NewFragOne] := FirstAllele;
{ 2 ---------- 2 } FragStart[NewFragOne] := PositionFour;
{ V } FragEnd[NewFragOne] := PositionTwo;
{ 1 ---------- 1 } TempMaxFrag := NewFragOne;
{ 3 --- 1 } end;
if (PositionOne > PositionThree) and
(PositionTwo < PositionFour) then
begin
FragAllele[NewFragOne] := SecondAllele;
{ 1 ------- 1 } FragStart[NewFragOne] := PositionThree;
{ 2 ------------- 2 } FragEnd[NewFragOne] := PositionOne;
{ V } FragAllele[NewFragTwo] := SecondAllele;
{ 1 ------- 1 } FragStart[NewFragTwo] := PositionTwo;
{ 3 --- 2 } FragEnd[NewFragTwo] := PositionFour;
{ 4 --- 2 } TempMaxFrag := NewFragTwo;
end;
if (PositionOne > PositionThree) and
(PositionTwo = PositionFour) then
begin
FragAllele[NewFragOne] := SecondAllele;
{ 1 ---------- 1 } FragStart[NewFragOne] := PositionThree;
{ 2 ------------- 2 } FragEnd[NewFragOne] := PositionOne;
{ V } TempMaxFrag := NewFragOne;
{ 1 ---------- 1 } end;
{ 3 --- 2 } if (PositionOne > PositionThree) and
(PositionTwo > PositionFour) then
begin
FragEnd[FragIndex] := PositionFour;
{ 1 ----------- 1 } FragAllele[NewFragOne] := FirstAllele;
{ 2 ----------- 2 } FragStart[NewFragOne] := PositionFour;
{ V } FragEnd[NewFragOne] := PositionTwo;
{ 1 -------- 1 } FragAllele[NewFragTwo] := SecondAllele;
{ 3 --- 1 } FragStart[NewFragTwo] := PositionThree;
{ 4 --- 2 } FragEnd[NewFragTwo] := PositionOne;
TempMaxFrag := NewFragTwo;
end;
end; {Note that no new fragments need to be made if}
{both fragments start and end at the same position.}
end;
end;
MaxFrag := TempMaxFrag;
end;
end;
for FragIndex := 1 to MaxFrag do {Rename all fragments from allele two}
{to allele one.}
begin
if FragAllele[FragIndex] = SecondAllele then
FragAllele[FragIndex] := FirstAllele;
end;
end;
{Here is where a recombination event is simulated. The first step is to pick which allele to}
{recombine within. The probability that an allele will be the site of a recombination event}
{is proportional to the size of that allele. Note that the size of an allele is one less than the}
{number of sites in the allele, because recombination must occur within an allele, not at its end.}
if RecombYes = 'y' then
begin
RecombSite := random * TotalAlleleLengthA;
FirstAllele := 0;
AlleleIndex := AlleleMinA - 1;
TotalAlleleLengthA := 0;
while FirstAllele = 0 do
begin
AlleleIndex := AlleleIndex + 1;
if AllelePresence[AlleleIndex] = 1 then
TotalAlleleLengthA := TotalAlleleLengthA + (AlleleEnd[AlleleIndex] -
(AlleleStart[AlleleIndex] + 1));
if TotalAlleleLengthA >= RecombSite then
FirstAllele := AlleleIndex;
end;
{Pick a new name for the recombined part of the allele. }
AlleleIndex := 0;
SecondAllele := 0;
while (AlleleIndex < MaxAlleleName) and (SecondAllele < 1) do
begin
AlleleIndex := AlleleIndex + 1;
if AllelePresence[AlleleIndex] = 0 then
begin
SecondAllele := AlleleIndex;
if SecondAllele > AlleleMaxA then
AlleleMaxA := SecondAllele;
end;
end;
{Choose a recombination position at random, then rename every fragment past that point from allele}
{one to allele two. If the recombination position is within a fragment, a new fragment is created}
{for the part of the fragment past the recombination position. }
RecombPosition := AlleleStart[FirstAllele] +
(1 + round((AlleleEnd[FirstAllele] - AlleleStart[FirstAllele] - 1) *
random));
NewFragOne := MaxFrag;
for FragIndex := 1 to MaxFrag do
begin
if FragAllele[FragIndex] = FirstAllele then
begin
if FragStart[FragIndex] >= RecombPosition then {Entire fragment is}
{past recombination position.}
begin
FragAllele[FragIndex] := SecondAllele;
end;
if (FragStart[FragIndex] < RecombPosition)
and (FragEnd[FragIndex] > RecombPosition) then
{Recombination position is within fragment.}
begin
NewFragOne := NewFragOne + 1;
FragAllele[NewFragOne] := SecondAllele;
FragStart[NewFragOne] := RecombPosition;
FragEnd[NewFragOne] := FragEnd[FragIndex];
FragEnd[FragIndex] := RecombPosition;
end;
end;
end;
MaxFrag := NewFragOne;
end;
{Having completed the coalescence or recombination event, it's time to see which alleles are present.}
{The first step is to use up any empty fragment names, which speeds up the running of the program.}
FragIndex := 1;
while FragIndex < MaxFrag do
begin
if FragAllele[FragIndex] = 0 then
begin
MaxFrag := MaxFrag - 1;
for FragIndex2 := FragIndex to MaxFrag do
begin
FragAllele[FragIndex2] := FragAllele[FragIndex2 + 1];
FragStart[FragIndex2] := FragStart[FragIndex2 + 1];
FragEnd[FragIndex2] := FragEnd[FragIndex2 + 1];
end;
FragAllele[MaxFrag + 1] := 0;
end;
FragIndex := FragIndex + 1;
end;
{Next some stuff is initialized. }
for AlleleIndex := AlleleMinA to AlleleMaxA do
begin
AllelePresence[AlleleIndex] := 0;
AlleleStart[AlleleIndex] := 0;
AlleleEnd[AlleleIndex] := 0;
end;
AlleleCountA := 0;
AlleleMinA := MaxAlleleName;
AlleleMaxA := MinAlleleName;
TotalFragLengthA := 0;
{Each fragment is compared with every other fragment. If a fragment consists of allele two, it}
{is renamed allele one. If an allele two fragment overlaps with an allele one fragment, the}
{overlapping part of allele two disappears, the overlapping part of allele one becomes a new}
{fragment, and a new fragment is made from the non-overlapping part of each fragment.}
for FragIndex := 1 to MaxFrag do
begin
if (FragAllele[FragIndex]) > 0.5 then
begin
FragName := FragAllele[FragIndex];
TotalFragLengthA := TotalFragLengthA + (FragEnd[FragIndex] -
FragStart[FragIndex]);
AlleleMaxA := greaterinteger(FragName, AlleleMaxA);
AlleleMinA := lesserinteger(FragName, AlleleMinA);
if AllelePresence[FragName] > 0.5 then
{Not the first fragment with that allele name.}
begin
AlleleStart[FragName] := LesserInteger(FragStart[FragIndex],
AlleleStart[FragName]);
AlleleEnd[FragName] := GreaterInteger(FragEnd[FragIndex],
AlleleEnd[FragName]);
end;
if AllelePresence[FragName] < 0.5 then
{The first fragment with that allele name.}
begin
AllelePresence[FragName] := 1;
AlleleStart[FragName] := FragStart[FragIndex];
AlleleEnd[FragName] := FragEnd[FragIndex];
AlleleCountA := AlleleCountA + 1;
end;
end;
end;
{The cumulative size of the alleles is measured, for use in choosing which allele to recombine.}
TotalAlleleLengthA := 0;
for AlleleIndex := AlleleMinA to AlleleMaxA do
begin
if AllelePresence[AlleleIndex] > 0.5 then
TotalAlleleLengthA := TotalAlleleLengthA +
(AlleleEnd[AlleleIndex] - (AlleleStart[AlleleIndex] + 1));
end;
end;
end;
{Now it's time to simulate recombination and coalescence in species B going back to}
{the species split. First, initialize values for species B.}
LineagePolyTime := 0;
AlleleCountB := 1;
TotalAlleleLengthB := NumSites - 1;
TotalFragLengthB := NumSites;
AlleleMinB := -1;
AlleleMaxB := -1;
MinFrag := MaxFrag + 1;
MaxFrag := MinFrag;
FragAllele[MinFrag] := -1;
FragStart[MinFrag] := 0;
FragEnd[MinFrag] := NumSites;
AllelePresence[-1] := 1;
AlleleStart[-1] := 0;
AlleleEnd[-1] := NumSites;
for SiteIndex := 1 to NumSites do
SiteSp2Allele[SiteIndex] := -1;
while LineagePolyTime < TimeSplit do
begin
if Recomb < 0.01 then
LineagePolyTime := TimeSplit * 100;
if (Recomb > 0.01) then
begin
EventTime := (-1 * ln(random)) / ((Recomb * (TotalAlleleLengthB /
(NumSites - 1.0))) + (AlleleCountB * (AlleleCountB - 1)));
LineagePolyTime := LineagePolyTime + EventTime;
end;
if LineagePolyTime < TimeSplit then
begin
RecombProb := (Recomb * (TotalAlleleLengthB / (NumSites - 1.0))) /
((Recomb * (TotalAlleleLengthB / (NumSites - 1.0))) + (AlleleCountB *
(AlleleCountB - 1)));
RecombYes := 'n';
if random < RecombProb then
RecombYes := 'y';
{Here is where a coalescence event is simulated within species B. }
if RecombYes = 'n' then
begin
{Next the two alleles to coalesce are chosen at random. }
FirstAllele := 0;
while FirstAllele = 0 do
begin
TempAllele := round((AlleleMinB) - 0.5 + ((AlleleMaxB -
(AlleleMinB - 1)) * random));
if AllelePresence[TempAllele] > 0.5 then
FirstAllele := TempAllele;
end;
SecondAllele := 0;
while SecondAllele = 0 do
begin
TempAllele := round((AlleleMinB) - 0.5 + ((AlleleMaxB -
(AlleleMinB - 1)) * random));
if (AllelePresence[TempAllele] > 0.5) and (TempAllele <> FirstAllele) then
SecondAllele := TempAllele;
end;
{Each fragment is compared with every other fragment. If a fragment consists of allele two,}
{it is renamed allele one.}
for FragIndex := MinFrag to MaxFrag do
begin
if FragAllele[FragIndex] = SecondAllele then
FragAllele[FragIndex] := FirstAllele;
end;
end;
{Here is where a recombination event is simulated in species B.}
if RecombYes = 'y' then
begin
RecombSite := random * TotalAlleleLengthB;
FirstAllele := 0;
AlleleIndex := AlleleMinB - 1;
TotalAlleleLengthB := 0;
while FirstAllele = 0 do
begin
AlleleIndex := AlleleIndex + 1;
if AllelePresence[AlleleIndex] = 1 then
TotalAlleleLengthB := TotalAlleleLengthB + (AlleleEnd[AlleleIndex] -
(AlleleStart[AlleleIndex] + 1));
if TotalAlleleLengthB >= RecombSite then
FirstAllele := AlleleIndex;
end;
AlleleIndex := 0;
SecondAllele := 0;
while (AlleleIndex > MinAlleleName) and (SecondAllele > -1) do
begin
AlleleIndex := AlleleIndex - 1;
if AllelePresence[AlleleIndex] = 0 then
begin
SecondAllele := AlleleIndex;
if SecondAllele < AlleleMinB then
AlleleMinB := SecondAllele;
end;
end;
RecombPosition := AlleleStart[FirstAllele] +
(1 + round((AlleleEnd[FirstAllele] - AlleleStart[FirstAllele] - 1) *
random));
NewFragOne := MaxFrag;
for FragIndex := MinFrag to MaxFrag do
begin
if FragAllele[FragIndex] = FirstAllele then
begin
if FragStart[FragIndex] >= RecombPosition then
begin
FragAllele[FragIndex] := SecondAllele;
end;
if (FragStart[FragIndex] < RecombPosition)
and (FragEnd[FragIndex] > RecombPosition) then
begin
NewFragOne := NewFragOne + 1;
FragAllele[NewFragOne] := SecondAllele;
FragStart[NewFragOne] := RecombPosition;
FragEnd[NewFragOne] := FragEnd[FragIndex];
FragEnd[FragIndex] := RecombPosition;
end;
end;
end;
MaxFrag := NewFragOne;
end;
for AlleleIndex := AlleleMinB to AlleleMaxB do
begin
AllelePresence[AlleleIndex] := 0;
AlleleStart[AlleleIndex] := 0;
AlleleEnd[AlleleIndex] := 0;
end;
AlleleCountB := 0;
AlleleMinB := MaxAlleleName;
AlleleMaxB := MinAlleleName;
TotalFragLengthB := 0;
{The fragments are checked and the allele information is recorded. }
for FragIndex := MinFrag to MaxFrag do
begin
if ((FragAllele[FragIndex]) < -0.5) then
begin
FragName := FragAllele[FragIndex];
TotalFragLengthB := TotalFragLengthB + (FragEnd[FragIndex] -
FragStart[FragIndex]);
AlleleMaxB := greaterinteger(FragName, AlleleMaxB);
AlleleMinB := lesserinteger(FragName, AlleleMinB);
for SiteIndex := (FragStart[FragIndex] + 1) to FragEnd[FragIndex] do
SiteSp2Allele[SiteIndex] := FragName;
if AllelePresence[FragName] > 0.5 then
begin
AlleleStart[FragName] := lesserinteger(FragStart[FragIndex],
AlleleStart[FragName]);
AlleleEnd[FragName] := greaterinteger(FragEnd[FragIndex],
AlleleEnd[FragName]);
end;
if AllelePresence[FragName] < 0.5 then
begin
AllelePresence[FragName] := 1;
AlleleStart[FragName] := FragStart[FragIndex];
AlleleEnd[FragName] := FragEnd[FragIndex];
AlleleCountB := AlleleCountB + 1;
end;
end;
end;
{The cumulative size of the alleles is measured, for use in choosing which allele to recombine. }
TotalAlleleLengthB := 0;
for AlleleIndex := AlleleMinB to AlleleMaxB do
begin
if AllelePresence[AlleleIndex] > 0.5 then
TotalAlleleLengthB := TotalAlleleLengthB + (AlleleEnd[AlleleIndex] -
(AlleleStart[AlleleIndex] + 1));
end;
end;
end;
{Now the recombination and coalescence are simulated within the ancestral species.}
LineagePolyTime := TimeSplit;
AlleleMinA := AlleleMinB;
AlleleCountA := AlleleCountA + AlleleCountB;
TotalAlleleLengthA := TotalAlleleLengthA + TotalAlleleLengthB;
TotalFragLengthA := TotalFragLengthA + TotalFragLengthB;
{This part of the program repeats until all of the alleles are coalesced.}
while TotalFragLengthA > NumSites do
begin
EventTime := (-1 * ln(random)) / ((Recomb * (TotalAlleleLengthA /
(NumSites - 1.0))) + (AlleleCountA * (AlleleCountA - 1)));
LineagePolyTime := LineagePolyTime + EventTime;
RecombProb := (Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) /
((Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) +
(AlleleCountA * (AlleleCountA - 1)));
RecombYes := 'n';
if random < RecombProb then
RecombYes := 'y';
{Here is where a coalescence event is simulated. }
if RecombYes = 'n' then
begin
FirstAllele := 0;
while FirstAllele = 0 do
begin
TempAllele := round((AlleleMinA - 0.5) + ((AlleleMaxA - (AlleleMinA - 1)) *
random));
if AllelePresence[TempAllele] > 0.5 then
FirstAllele := TempAllele;
end;
SecondAllele := 0;
while SecondAllele = 0 do
begin
TempAllele := round((AlleleMinA - 0.5) + ((AlleleMaxA - (AlleleMinA - 1)) *
random));
if (AllelePresence[TempAllele] > 0.5) and (TempAllele <> FirstAllele) then
SecondAllele := TempAllele;
end;
{Each fragment is compared with every other fragment. If a fragment consists of allele two, it}
{is renamed allele one. If an allele two fragment overlaps with an allele one fragment, the}
{overlapping part of allele two disappears, the overlapping part of allele one becomes a new}
{fragment, and a new fragment is made from the non-overlapping part of each fragment.}
FragIndex := 0;
TempMaxFrag := MaxFrag;
while FragIndex <= MaxFrag do
begin
FragIndex := FragIndex + 1;
if FragAllele[FragIndex] = FirstAllele then
begin
for FragIndex2 := 1 to MaxFrag do
begin
if FragAllele[FragIndex2] = SecondAllele then
begin
PositionOne := FragStart[FragIndex];
PositionTwo := FragEnd[FragIndex];
PositionThree := FragStart[FragIndex2];
PositionFour := FragEnd[FragIndex2];
if (PositionTwo > PositionThree)
and (PositionFour > PositionOne) then
begin
for SiteIndex := (1 + greaterinteger(PositionOne,
PositionThree)) to lesserinteger(PositionTwo, PositionFour) do
{Add appropriate times to site array. }
begin
if ((SiteSp2Allele[SiteIndex] <> FirstAllele)
and (SiteSp2Allele[SiteIndex] <> SecondAllele))
and (SiteSp2Allele[SiteIndex] <> 0) then
{coalescence within species}
begin
SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex] +
LineagePolyTime;
SiteCoalesceTime[SiteIndex] := LineagePolyTime;
end;
if (SiteSp2Allele[SiteIndex] = FirstAllele) or
(SiteSp2Allele[SiteIndex] = SecondAllele) then
{Coalescence between species. }
begin
SiteFixedTime[SiteIndex] := LineagePolyTime;
SiteSp2Allele[SiteIndex] := 0;
end;
end;
FragAllele[FragIndex2] := 0;
NewFragOne := MaxFrag + 1;
NewFragTwo := MaxFrag + 2;
if (PositionOne < PositionThree)
and (PositionTwo < PositionFour) then
begin
FragStart[FragIndex] := PositionThree;
FragAllele[NewFragOne] := FirstAllele;
FragStart[NewFragOne] := PositionOne;
FragEnd[NewFragOne] := PositionThree;
FragAllele[NewFragTwo] := SecondAllele;
FragStart[NewFragTwo] := PositionTwo;
FragEnd[NewFragTwo] := PositionFour;
TempMaxFrag := NewFragTwo;
end;
if (PositionOne < PositionThree)
and (PositionTwo = PositionFour) then
begin
FragStart[FragIndex] := PositionThree;
FragAllele[NewFragOne] := FirstAllele;
FragStart[NewFragOne] := PositionOne;
FragEnd[NewFragOne] := PositionThree;
TempMaxFrag := NewFragOne;
end;
if (PositionOne < PositionThree)
and (PositionTwo > PositionFour) then
begin
FragStart[FragIndex] := PositionThree;
FragEnd[FragIndex] := PositionFour;
FragAllele[NewFragOne] := FirstAllele;
FragStart[NewFragOne] := PositionOne;
FragEnd[NewFragOne] := PositionThree;
FragAllele[NewFragTwo] := FirstAllele;
FragStart[NewFragTwo] := PositionFour;
FragEnd[NewFragTwo] := PositionTwo;
TempMaxFrag := NewFragTwo;
end;
if (PositionOne = PositionThree)
and (PositionTwo < PositionFour) then
begin
FragAllele[NewFragOne] := SecondAllele;
FragStart[NewFragOne] := PositionTwo;
FragEnd[NewFragOne] := PositionFour;
TempMaxFrag := NewFragOne;
end;
if (PositionOne = PositionThree)
and (PositionTwo > PositionFour) then
begin
FragEnd[FragIndex] := PositionFour;
FragAllele[NewFragOne] := FirstAllele;
FragStart[NewFragOne] := PositionFour;
FragEnd[NewFragOne] := PositionTwo;
TempMaxFrag := NewFragOne;
end;
if (PositionOne > PositionThree)
and (PositionTwo < PositionFour) then
begin
FragAllele[NewFragOne] := SecondAllele;
FragStart[NewFragOne] := PositionThree;
FragEnd[NewFragOne] := PositionOne;
FragAllele[NewFragTwo] := SecondAllele;
FragStart[NewFragTwo] := PositionTwo;
FragEnd[NewFragTwo] := PositionFour;
TempMaxFrag := NewFragTwo;
end;
if (PositionOne > PositionThree)
and (PositionTwo = PositionFour) then
begin
FragAllele[NewFragOne] := SecondAllele;
FragStart[NewFragOne] := PositionThree;
FragEnd[NewFragOne] := PositionOne;
TempMaxFrag := NewFragOne;
end;
if (PositionOne > PositionThree)
and (PositionTwo > PositionFour) then
begin
FragEnd[FragIndex] := PositionFour;
FragAllele[NewFragOne] := FirstAllele;
FragStart[NewFragOne] := PositionFour;
FragEnd[NewFragOne] := PositionTwo;
FragAllele[NewFragTwo] := SecondAllele;
FragStart[NewFragTwo] := PositionThree;
FragEnd[NewFragTwo] := PositionOne;
TempMaxFrag := NewFragTwo;
end;
end;
end;
end;
MaxFrag := TempMaxFrag;
end;
end;
for FragIndex := 1 to MaxFrag do {Rename all fragments from allele two}
{to allele one. }
begin
if FragAllele[FragIndex] = SecondAllele then
FragAllele[FragIndex] := FirstAllele;
end;
for SiteIndex := 1 to NumSites do {Rename all alleles from species B}
{from allele two to allele one.}
begin
if SiteSp2Allele[SiteIndex] = SecondAllele then
SiteSp2Allele[SiteIndex] := FirstAllele;
end;
end;
{Here is where a recombination event is simulated in the ancestral species.}
if RecombYes = 'y' then
begin
RecombSite := random * TotalAlleleLengthA;
FirstAllele := 0;
AlleleIndex := AlleleMinA - 1;
TotalAlleleLengthA := 0;
while FirstAllele = 0 do
begin
AlleleIndex := AlleleIndex + 1;
if AllelePresence[AlleleIndex] = 1 then
TotalAlleleLengthA := TotalAlleleLengthA + (AlleleEnd[AlleleIndex] -
(AlleleStart[AlleleIndex] + 1));
if TotalAlleleLengthA >= RecombSite then
FirstAllele := AlleleIndex;
end;
{Pick a new name for the recombined part of the allele, if it is from species A}
{(has a positive allele name). }
if FirstAllele > 0 then
begin
AlleleIndex := 0;
SecondAllele := 0;
while (AlleleIndex < MaxAlleleName) and (SecondAllele < 1) do
begin
AlleleIndex := AlleleIndex + 1;
if AllelePresence[AlleleIndex] = 0 then
begin
SecondAllele := AlleleIndex;
if SecondAllele > AlleleMaxA then
AlleleMaxA := SecondAllele;
end;
end;
end;
{Pick a new name for the recombined part of the allele, if it is from species B }
{(has a negative allele name). }
if FirstAllele < 0 then
begin
AlleleIndex := 0;
SecondAllele := 0;
while (AlleleIndex > (MaxAlleleName * (-1))) and (SecondAllele > -1) do
begin
AlleleIndex := AlleleIndex - 1;
if AllelePresence[AlleleIndex] = 0 then
begin
SecondAllele := AlleleIndex;
if SecondAllele < AlleleMinA then
AlleleMinA := SecondAllele;
end;
end;
end;
RecombPosition := AlleleStart[FirstAllele] + (1 +
round((AlleleEnd[FirstAllele] - AlleleStart[FirstAllele] - 1) * random));
NewFragOne := MaxFrag;
for FragIndex := 1 to MaxFrag do
begin
if FragAllele[FragIndex] = FirstAllele then
begin
if FragStart[FragIndex] >= RecombPosition then
begin
FragAllele[FragIndex] := SecondAllele;
end;
if (FragStart[FragIndex] < RecombPosition)
and (FragEnd[FragIndex] > RecombPosition) then
begin
NewFragOne := NewFragOne + 1;
FragAllele[NewFragOne] := SecondAllele;
FragStart[NewFragOne] := RecombPosition;
FragEnd[NewFragOne] := FragEnd[FragIndex];
FragEnd[FragIndex] := RecombPosition;
end;
end;
end;
MaxFrag := NewFragOne;
for SiteIndex := (RecombPosition + 1) to NumSites do
begin
if SiteSp2Allele[SiteIndex] = FirstAllele then
SiteSp2Allele[SiteIndex] := SecondAllele;
end;
end;
FragIndex := 1;
while FragIndex < MaxFrag do
begin
if FragAllele[FragIndex] = 0 then
begin
MaxFrag := MaxFrag - 1;
for FragIndex2 := FragIndex to MaxFrag do
begin
FragAllele[FragIndex2] := FragAllele[FragIndex2 + 1];
FragStart[FragIndex2] := FragStart[FragIndex2 + 1];
FragEnd[FragIndex2] := FragEnd[FragIndex2 + 1];
end;
FragAllele[MaxFrag + 1] := 0;
end;
FragIndex := FragIndex + 1;
end;
{Next some stuff is initialized. }
for AlleleIndex := AlleleMinA to AlleleMaxA do
begin
AllelePresence[AlleleIndex] := 0;
AlleleStart[AlleleIndex] := 0;
AlleleEnd[AlleleIndex] := 0;
end;
AlleleCountA := 0;
AlleleCountB := 0;
AlleleMinA := MaxAlleleName;
AlleleMaxA := MinAlleleName;
TotalFragLengthA := 0;
{The fragments are checked and the allele information is recorded. }
for FragIndex := 1 to MaxFrag do
begin
if ((FragAllele[FragIndex]) > 0.5) or ((FragAllele[FragIndex]) < -0.5) then
begin
FragName := FragAllele[FragIndex];
TotalFragLengthA := TotalFragLengthA + (FragEnd[FragIndex] -
FragStart[FragIndex]);
AlleleMaxA := greaterinteger(FragName, AlleleMaxA);
AlleleMinA := lesserinteger(FragName, AlleleMinA);
if AllelePresence[FragName] > 0.5 then
begin
AlleleStart[FragName] := LesserInteger(FragStart[FragIndex],
AlleleStart[FragName]);
AlleleEnd[FragName] := GreaterInteger(FragEnd[FragIndex],
AlleleEnd[FragName]);
end;
if AllelePresence[FragName] < 0.5 then
begin
AllelePresence[FragName] := 1;
AlleleStart[FragName] := FragStart[FragIndex];
AlleleEnd[FragName] := FragEnd[FragIndex];
AlleleCountA := AlleleCountA + 1;
end;
end;
end;
{The cumulative size of the alleles is measured, for use in choosing which allele to recombine. }
TotalAlleleLengthA := 0;
for AlleleIndex := AlleleMinA to AlleleMaxA do
begin
if AllelePresence[AlleleIndex] > 0.5 then
TotalAlleleLengthA := TotalAlleleLengthA + (AlleleEnd[AlleleIndex] -
(AlleleStart[AlleleIndex] + 1));
end;
end;
{The coalescence process is now done.}
{Having finished with the coalescence process, the times associated with the remaining fragments}
{need to be added to the site arrays.}
for FragIndex := 1 to MaxFrag do
begin
if FragAllele[FragIndex] <> 0 then
begin
for SiteIndex := (FragStart[FragIndex] + 1) to FragEnd[FragIndex] do
begin
SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex] +
SiteCoalesceTime[SiteIndex];
if SiteFixedTime[SiteIndex] > SiteCoalesceTime[SiteIndex] then
SiteFixedTime[SiteIndex] := SiteFixedTime[SiteIndex] +
(SiteFixedTime[SiteIndex] - SiteCoalesceTime[SiteIndex]);
end;
end;
end;
{Make the times in the two site arrays into cumulative times, to aid in the distribution of }
{polymorphic and fixed sites. }
for SiteIndex := 1 to NumSites do
begin
SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex - 1] + SitePolyTime[SiteIndex];
SiteFixedTime[SiteIndex] := SiteFixedTime[SiteIndex - 1] + SiteFixedTime[SiteIndex];
end;
{Sprinkle polymorphisms, with the probability of a site getting a polymorphism equalling the}
{proportion of the total polymorphism time that is at that site. }
for SiteIndex := 1 to NumPoly do
begin
SiteRandom := random * SitePolyTime[NumSites];
for SiteIndex2 := 1 to NumSites do
begin
if (SiteRandom >= SitePolyTime[SiteIndex2 - 1])
and (SiteRandom < SitePolyTime[SiteIndex2]) then
begin
SitePosition[SiteIndex] := random + SiteIndex2;
SitePoly[SiteIndex] := 1;
end;
end;
end;
{Sprinkle fixed differences, with the probability of a site getting a fixed difference equalling}
{the proportion of the total fixed time that is at that site. }
for SiteIndex := (NumPoly + 1) to NumVarSites do
begin
SiteRandom := random * SiteFixedTime[NumSites];
for SiteIndex2 := 1 to NumSites do
begin
if (SiteRandom >= SiteFixedTime[SiteIndex2 - 1])
and (SiteRandom < SiteFixedTime[SiteIndex2]) then
begin
SitePosition[SiteIndex] := random + SiteIndex2;
SitePoly[SiteIndex] := 0;
end;
end;
end;
{Sort the simulated site positions based on their random numbers.}
for SiteIndex := 1 to (NumVarSites - 1) do
begin
SortTest := 2 * NumSites;
for SortIndex := SiteIndex to NumVarSites do
begin
if (SitePosition[SortIndex] < SortTest) then
begin
SortTest := SitePosition[SortIndex];
SmallPosition := SortIndex;
end;
end;
TempPoly := SitePoly[SmallPosition];
SitePosition[SmallPosition] := SitePosition[SiteIndex];
SitePosition[SiteIndex] := SortTest;
SitePoly[SmallPosition] := SitePoly[SiteIndex];
SitePoly[SiteIndex] := TempPoly;
end;
{An array has now been created with polymorphic and fixed sites on it.}
{The next step is to calculate the four statistics for this simulated data set.}
{Calculate the mean and maximum sliding G for the simulated data. }
MaxGSim := 0;
maxAverageGSim := 0;
for WindowSize := MinWindowSize to maxWindowSize do
begin
averageGSim := 0;
for StartIndex := 1 to (1 + NumVarSites - WindowSize) do
begin
A := 0;
for SiteIndex := StartIndex to (StartIndex + WindowSize - 1) do
begin
A := A + SitePoly[SiteIndex];
end;
B := WindowSize - A;
C := NumPoly - A;
D := NumFixed - B;
GSim := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) -
flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D));
averageGSim := averageGSim + (GSim / (1 + NumVarSites - WindowSize));
if GSim > MaxGSim then
MaxGSim := GSim;
end;
if AverageGSim > maxAverageGSim then
maxAverageGSim := averageGSim;
end;
if MaxGSim > (MaxGReal * 0.999999) then {the .9999999 makes it count values that are }
{greater than or equal to the observed}
SigGCount[recombIndex] := SigGCount[recombIndex] + 1;
if maxAverageGSim > (maxAverageGReal * 0.999999) then
SigAverageGCount[recombIndex] := SigAverageGCount[recombIndex] + 1;
for windowSize := minWindowSize to maxWindowSize do
begin
if MaxGSim >= maxGWindowArray[windowSize] then
sigGCountArray[recombIndex, windowSize] := sigGCountArray[recombIndex, windowSize]+1;
end;
if maxGSim>100 then
maxGSim:=100;
maxGCountArray[recombIndex, round(10*maxGSim)]:=maxGCountArray[recombIndex, round(10*maxGSim)]+1;
{Count the number of runs for the simulated data. }
numRunsSim := 1;
for SiteIndex := 1 to (NumVarSites - 1) do
begin
if (SitePoly[SiteIndex] <> SitePoly[SiteIndex + 1]) then
numRunsSim := numRunsSim + 1;
end;
if numRunsSim <= numRunsReal then
SigRunsCount[recombIndex] := SigRunsCount[recombIndex] + 1;
{Compute the Kolmorgov-Smirnov statistic for the simulated data. }
KSSim := 0;
cumPoly := 0;
for SiteIndex := 1 to NumVarSites do
begin
cumPoly := cumPoly + sitePoly[siteIndex];
KS := (abs(cumPoly - (SiteIndex * (numPoly / NumVarSites)))) / NumVarSites;
if KS > KSSim then
KSSim := KS;
end;
if KSSim > (KSReal * 0.999999) then
SigKSCount[recombIndex] := SigKSCount[recombIndex] + 1;
until (RepIndex = NumReps);
RecombIndex := RecombIndex + 1;
end;
until (RecombIndex > NumRecomb);
{Write some stuff to the console.}
writeln;
writeln;
writeln('Number of sequences from ', NameSpeciesA, ': ', NumSequences : 4);
writeln('Outgroup species: ', NameSpeciesB);
writeln('Length of ', GeneName, ': ', NumSites : 5);
writeln('Number of polymorphisms: ', NumPoly : 4);
writeln('Number of fixed differences: ', NumFixed : 4);
writeln('Number of replicate simulations: ', NumReps : 5);
writeln;
writeln('Recomb. max G >= runs <= K-S >= avg. G >=');
if minWindowSize < maxWindowSize then
writeln('parameter ', MaxGReal : 8 : 4, numRunsReal : 8,' ', KSReal : 13 : 6,
maxAverageGReal : 11 : 4)
else
writeln('parameter ', ' ------ ', numRunsReal : 8, KSReal : 13 : 6,
' ------- ');
for recombIndex:=1 to numRecomb do
begin
if minWindowSize < maxWindowSize then {G tests could be done}
begin
writeln(myOutput, RecombArray[recombIndex] : 6 : 2, (SigGCount[recombIndex] / RepIndex) : 12 : 4,
(sigRunsCount[recombIndex] / RepIndex) : 12 : 4, (sigKSCount[recombIndex] / RepIndex) : 12 : 4,
(sigAverageGCount[recombIndex] / RepIndex) : 12 : 4);
writeln(RecombArray[recombIndex] : 6 : 2, (SigGCount[recombIndex] / RepIndex) : 12 : 4,
(sigRunsCount[recombIndex] / RepIndex) : 12 : 4, (sigKSCount[recombIndex] / RepIndex) : 12 : 4,
(sigAverageGCount[recombIndex] / RepIndex) : 12 : 4);
end
else {G tests couldn't be done}
begin
writeln(myOutput, RecombArray[recombIndex] : 6 : 2, ' ------ ', (sigRunsCount[recombIndex] / RepIndex) : 12 : 4,
(sigKSCount[recombIndex] / RepIndex) : 12 : 4, ' ------ ');
writeln(RecombArray[recombIndex] : 6 : 2, ' ------ ', (sigRunsCount[recombIndex] / RepIndex) : 12 : 4,
(sigKSCount[recombIndex] / RepIndex) : 12 : 4, ' ------ ');
end;
end;
writeln;
writeln(myOutput);
writeln('Recomb. Maximum G critical values');
write('parameter P<0.05 P<0.01');
writeln(myOutput, 'Recomb. Maximum G critical values');
write(myOutput, 'parameter P<0.05 P<0.01');
if minWindowSize0.95) then
begin
write(' ', (GIndex/10):6:1);
write(myOutput, ' ', (GIndex/10):6:1);
if GIndex>maxGCritRecomb then
maxGCritRecomb:=GIndex;
end;
if (cumGCountOld/NumReps<=0.99) and (cumGCountNew/NumReps>0.99) then
begin
write(' ', (GIndex/10):6:1);
write(myOutput, ' ', (GIndex/10):6:1);
end;
cumGCountOld:=cumGCountNew;
end;
if maxGCritRecomb>maxGCritOverall then
begin
maxGCritOverall:=maxGCritRecomb;
maxGRecombIndex:=recombIndex;
end;
end;
end;
writeln;
writeln(myOutput);
writeln(myOutput);
writeln(myOutput);
maxGCritOverall:=maxGCritOverall/10;
end;
{====================================================================================================}
{This procedure draws a crude console graph of the polymorphism/divergence ratio.}
{It also saves the numbers necessary to draw a better graph using a graphing program.}
procedure DoGraph;
var
A, B, C, D, siteIndex, startIndex, midPoint, horizIndex, vertIndex, yIndex, newV, upperCritV, lowerCritV, cumGCount: longint;
PgraphArray: array[0..horizSize, 0..vertSize] of char;
makeGraphTable: char;
GGraph: extended;
firstLine: boolean;
begin
upperCritV:=-1;
lowerCritV:=-1;
write('Would you like to save the numbers used in the graph? Enter "y" or "n": ');
readln(makeGraphTable);
if makeGraphTable='y' then
begin
writeln(myOutput);
writeln(myOutput);
writeln(myOutput, '===========Sliding window of ', graphWindowSize:3,' variable sites.========');
writeln(myOutput);
if graphWindowSize>=minWindowSize then
writeln(myOutput, ' Start Midpoint End prop. poly. G-value P-value');
if graphWindowSize=minWindowSize then
begin
{calculate the upper polymorphism value corresponding to the P<0.05 critical values}
firstLine:=TRUE;
for startIndex:=round(GraphWindowSize*NumPoly/NumVarSites) to GraphWindowSize do
begin
A:=startIndex;
B := GraphWindowSize - A;
C := NumPoly - A;
D := NumFixed - B;
GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) -
flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D));
if GGraph>=maxGCritOverall then
begin
newV := round(vertSize*A/GraphWindowSize);
if firstLine=TRUE then
begin
upperCritV:=newV;
for horizIndex:=1 to (horizSize-1) do
begin
PgraphArray[horizIndex, newV]:='.';
end;
firstLine:=FALSE;
end;
end;
end;
{calculate the lower polymorphism value corresponding to the P<0.05 critical values}
firstLine:=TRUE;
for startIndex:=round(GraphWindowSize*NumPoly/NumVarSites) downto 0 do
begin
A:=startIndex;
B := GraphWindowSize - A;
C := NumPoly - A;
D := NumFixed - B;
GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) -
flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D));
if GGraph>=maxGCritOverall then
begin
newV := round(vertSize*A/GraphWindowSize);
if firstLine=TRUE then
begin
lowerCritV:=newV;
for horizIndex:=1 to (horizSize-1) do
begin
PgraphArray[horizIndex, newV]:='.';
end;
firstLine:=FALSE;
end;
end;
end;
end;
{calculate proportion of varying sites that are polymorphic for sliding windows}
{for the console graph, put an 'X' in PgraphArray at the appropriate place.}
for StartIndex := 1 to (1 + NumVarSites - GraphWindowSize) do
begin
A := 0;
for SiteIndex := StartIndex to (StartIndex + GraphWindowSize - 1) do
begin
if (myRealPoly[SiteIndex] = 'P') then
A := A + 1;
end;
B := GraphWindowSize - A;
C := NumPoly - A;
D := NumFixed - B;
GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) -
flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D));
midPoint := round(horizSize*(((mySiteNumber[StartIndex] + mySiteNumber[StartIndex +
graphWindowSize - 1]) / 2.0)/mySiteNumber[NumVarSites]));
newV := round(vertSize*A/GraphWindowSize);
PgraphArray[midPoint, newV]:='X';
cumGCount:=0;
for GIndex:=0 to round(GGraph*10) do {calculate the cum. prob. for this poly. value}
begin
cumGCount:=cumGCount+maxGCountArray[maxGRecombIndex, GIndex];
end;
if (makeGraphTable='y') and (graphWindowSize>=minWindowSize) then
writeln(myOutput, startIndex : 4, mySiteNumber[StartIndex] : 9,
((mySiteNumber[StartIndex] + mySiteNumber[StartIndex +
GraphWindowSize - 1]) / 2.0) : 10 : 1,
mySiteNumber[StartIndex + GraphWindowSize - 1] : 8,
((A * 1.0) / GraphWindowSize) : 9 : 3, GGraph:12:3, 1-cumGCount/numReps:10:6);
if (makeGraphTable='y') and (graphWindowSize=minWindowSize) then
begin
writeln(myOutput);
writeln(myOutput);
writeln(myOutput,'All possible G-values and P-values for a window of ', graphWindowSize:3, ' variable sites.');
writeln(myOutput);
writeln(myOutput, 'prop. poly. G-value P-value');
for A:=0 to GraphWindowSize do
begin
B := GraphWindowSize - A;
C := NumPoly - A;
D := NumFixed - B;
GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) -
flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D));
cumGCount:=0;
for GIndex:=0 to round(GGraph*10) do {calculate the cum. prob. for this poly. value}
begin
cumGCount:=cumGCount+maxGCountArray[maxGRecombIndex, GIndex];
end;
writeln(myOutput, A/(GraphWindowSize):7:3,' ', GGraph:10:3, ' ', 1-cumGCount/numReps:10:6);
end;
end;
{draw the polymorphism graph}
writeln;
writeln;
for yIndex:=0 to vertSize do
begin
vertIndex:=vertSize-yIndex;
if vertIndex mod 4=0 then
write(' ', (vertIndex/vertSize):4:2,' |');
if vertIndex=((vertSize div 2)+1) then
write('poly |');
if (vertIndex mod 4>0) and (vertIndex<>((vertSize div 2)+1)) then
write(' |');
for horizIndex:=1 to (horizSize-1) do
write(PgraphArray[horizIndex, vertIndex]);
if (vertIndex=upperCritV) or (vertIndex=lowerCritV) then
writeln('| P<0.05');
if (vertIndex<>upperCritV) and (vertIndex<>lowerCritV) then
writeln('|');
end;
write(' ');
for horizIndex:=-3 to horizSize-3 do
begin
if (horizIndex+3) mod 10=0 then
write(round(mySiteNumber[1]+(mySiteNumber[NumVarSites]-mySiteNumber[1])*
((horizIndex+3)/horizSize)):6,' ');
end;
writeln;
writeln(' Midpoint of sliding window');
writeln;
end;
{====================================================================================================}
begin
randomize; {randomize the seed for the random number function}
writeln('=================================================================');
writeln;
writeln(' Welcome to DNA Slider.');
writeln;
writeln('For help on formatting the input file and other documentation, ');
writeln(' or a version of this program for a different operating system, ');
writeln(' see http://udel.edu/~mcdonald/aboutdnaslider.html.');
writeln;
write('Enter the name of your outgroup species: ');
readln(NameSpeciesB);
writeln;
write('Enter the name of your ingroup species: ');
readln(NameSpeciesA);
writeln;
rangeBad:=TRUE;
while rangeBad do
begin
write('Number of sequences in the ingroup species: ');
readln(NumSequences);
if (NumSequences>1) and (NumSequences<=maxNumSequences) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 2 to ', maxNumSequences:4);
end;
rangeBad:=TRUE;
while rangeBad do
begin
write('Length of the sequence, in nucleotides: ');
readln(NumSites);
if (NumSites>1) and (NumSites<=maxNumSites) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 10 to ', maxNumSites:5);
end;
rangeBad:=TRUE;
while rangeBad do
begin
write('Number of recombination parameters to try (1 to ', maxNumRecomb:3,'): ');
readln(NumRecomb);
if (NumRecomb>0) and (NumRecomb<=maxNumRecomb) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 1 to ', maxNumRecomb:3);
end;
for recombIndex:=1 to NumRecomb do
begin
rangeBad:=TRUE;
while rangeBad do
begin
write('Recombination parameter ', recombIndex:3, ': ');
readln(RecombArray[recombIndex]);
if (RecombArray[recombIndex]>-0.00001) and (RecombArray[recombIndex]<=maxRecomb) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 0 to ', maxRecomb:5);
end;
end;
rangeBad:=TRUE;
while rangeBad do
begin
write('Number of replicate simulations to run: ');
readln(NumReps);
if (NumReps>0) and (NumReps<=maxNumReps) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 1 to ', maxNumReps:7);
end;
writeln;
rangeBad:=TRUE;
while rangeBad do
begin
writeln('The input file must be a plain text file (with the extension ".txt")');
writeln(' in the same folder as this program.');
writeln;
write('Enter the name of the input file: ');
readln(InputFileString);
if InputFileString[length(InputFileString)-3]<>'.' then
InputFileString:=InputFileString+'.txt';
{The next few lines get the full path to the program, remove the name of the program, then
add the name of the input file. The result is that "inputFileString" is converted from
"input.txt" to "/Users/username/sliderfolder/input.txt" (on a Mac) or "C:\sliderfolder\input.txt" (on Windows).}
inputPath:=paramStr(0);
lastSlashPos:=0;
for stringIndex:=1 to length(inputPath) do
begin
if (inputPath[stringIndex]='/') or (inputPath[stringIndex]='\') then
lastSlashPos:=stringIndex;
end;
delete(inputPath, lastSlashPos+1, length(inputPath)-lastSlashPos);
inputFileFullString:=inputPath+inputFileString;
assign(myInput, inputfileFullstring);
{$I-}
reset(myInput);
{$I+}
if IOresult=0 then {IOresult=0 means the file to be reset exists}
rangeBad:=FALSE;
if rangeBad=TRUE then
begin
writeln;
writeln('I can''t find the file ', inputFileString, ', try again!');
end;
end;
rangeBad:=TRUE;
while rangeBad do
begin
writeln;
write('Enter the name of the output file: ');
readln(OutputFileString);
if OutputFileString[length(OutputFileString)-3]<>'.' then
OutputFileString:=OutputFileString+'.txt';
OutputFileFullString:=inputPath+OutputFileString;
assign(myOutput, OutputFileFullString);
{$I-}
reset(myOutput);
{$I+}
rangeBad:=FALSE;
if IOresult=0 then {IOresult=0 means the file to be reset exists}
begin
writeln('File ', OutputFileString, ' already exists. Do you want to overwrite it?');
write('Enter "y" or "n": ');
readln(overwriteYes);
if overwriteYes='Y' then
overwriteYes:='y';
if overwriteYes<>'y' then
rangeBad:=TRUE;
end;
end;
rewrite(myOutput);
DoAnalyzeInput;
DoRun;
writeln;
writeln('Now what would you like to do?');
write('Enter "a" to analyze the data again, "g" to draw a graph, "q" to quit: ');
readln(ActionChar);
while ActionChar<>'q' do
begin
if ActionChar='a' then
begin
writeln;
rangeBad:=TRUE;
while rangeBad do
begin
write('Number of recombination parameters to try (1 to ', maxNumRecomb:3,'): ');
readln(NumRecomb);
if (NumRecomb>0) and (NumRecomb<=maxNumRecomb) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 1 to ', maxNumRecomb:3);
end;
for recombIndex:=1 to NumRecomb do
begin
rangeBad:=TRUE;
while rangeBad do
begin
write('Recombination parameter ', recombIndex:3, ': ');
readln(RecombArray[recombIndex]);
if (RecombArray[recombIndex]>-0.000001) and (RecombArray[recombIndex]<=maxRecomb) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 0 to ', maxRecomb:5);
end;
end;
rangeBad:=TRUE;
while rangeBad do
begin
write('Number of replicate simulations to run: ');
readln(NumReps);
if (NumReps>0) and (NumReps<=maxNumReps) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 1 to ', maxNumReps:7);
end;
DoRun;
end;
if ActionChar='g' then
begin
writeln;
rangeBad:=TRUE;
while rangeBad do
begin
write('What window size (in number of variable sites) would you like? ');
readln(graphWindowSize);
if (graphWindowSize>0) and (graphWindowSize<=(NumVarSites)) then
rangeBad:=FALSE;
if rangeBad then
writeln('Enter a number from 1 to ', (NumVarSites):6);
end;
DoGraph;
end;
writeln;
writeln('Now what would you like to do?');
write('Enter "a" to analyze the data again, "g" to draw a graph, "q" to quit: ');
readln(ActionChar);
end;
close(myInput);
close(myOutput);
end.