program DNASlider; {DNA Slider is a public domain program for performing} {simulations-based statistical tests on DNA sequence polymorphism and divergence data.} {The tests are described in McDonald, J.H. 1998. Improved tests for heterogeneity} {across a region of DNA sequence in the ratio of polymorphism to divergence. Molecular} {Biology and Evolution 15: 377-384. The latest version of the} {program is available from http://udel.edu/~mcdonald} {Version 2.0, September 2005: This version. Ported to Windows and Mac OSX using} {the Free Pascal Compiler. The program now uses a simple console interface, } {rather than a graphical user interface. I eliminated the two Goss and Lewontin statistics and} {increased the maximum size of several variables.} {Version 1.11, January, 1998: Enlarged some of the parameters, such as the} {length of the sequence that could be analyzed; made minor changes to the help screens.} {Version 1.1, June 1997: Added help screens and made other improvements to the user interface.} {Version 1.0, December 1996.} {Written by: John H. McDonald} { Department of Biological Sciences} { University of Delaware} { Newark, Delaware 19716} { http://udel.edu/~mcdonald} { mcdonald@udel.edu} const {These constants are maximum values of various parameters; contact me if you need } {me to recompile the program with a larger value for some of these. } MaxNumSequences = 500; {The maximum number of alleles.} MaxNumPoly = 1000; {The maximum number of polymorphisms.} MaxNumFixed = 1000; {The maximum number of fixed differences.} MaxNumRunsObs = 2000; {The maximum number of runs observed in the data.} MaxNumSubs = 2000; {The maximum total number of variable sites.} MaxNumReps = 100000; {The maximum number of replications you can run.} MaxNumSites = 50000; {The maximum number of nucleotide sites in the sequence.} MaxNumRecomb = 20; {The maximum number of recombination parameters.} MaxRecomb = 500; {The maximum value for the recombination parameter.} {The last three constants refer to parameters used in the simulations; if the program crashes } {on big data sets (long sequences with lots of alleles), these may need to be increased.} MinAlleleName = -1000; {The minimum negative name for allele.} MaxAlleleName = 1000; {The maximum positive name for allele.} MaxNumFrags = 10000; {The maximum number of fragments.} VertSize = 20; {The height of the crude console graphs.} HorizSize = 60; {The width of the crude console graphs.} MaxGGraph = 36; {The maximum G-value shown on the console graph.} type AlleleArrayType = array[MinAlleleName..MaxAlleleName] of integer; var InputFileString,OutputFileString, inputFileFullString, outputFileFullString, inputPath: string[250]; {file names} myOutput, myInput: text; {The output and input files.} myDidReadInput, myFirstOutput, rangeBad: Boolean; actionChar, overwriteYes: char; recombIndex, stringIndex, GIndex, lastSlashPos: integer; {The following variables are input from the parameter dialog.} GeneName: string[250]; {The name of the gene.} NameSpeciesA: string[250]; {The name of the species with polymorphism data.} NameSpeciesB: string[250]; {The name of the outgroup species.} NumSequences: integer; {The number of sequences sampled.} NumSites: longint; {The number of nucleotides in the sequence.} NumRecomb: integer; {The number of different recombination parameters used.} RecombArray: array[1..MaxNumRecomb] of real; {The recombination parameters.} NumReps: longint; {The number of replications to run.} GraphWindowSize: integer; {The size of the sliding window for the graph.} {The following variables are calculated from the data set.} NumPoly: longint; {The number of polymorphisms observed.} NumFixed: longint; {The number of fixed differences observed.} NumVarSites: longint; {The total number of polymorphic and fixed sites.} MaxGReal: extended; {The maximum G value in the observed data.} maxAverageGReal: extended; {The maximum average G value in the observed data.} KSReal: extended; {The Kolmogorov-Smirnov value in the observed data.} numRunsReal: longint; {The number of runs in the observed data.} minWindowSize, maxWindowSize: longint; {The smallest and largest windows used for calculating } {the average and maximum sliding G values.} {The next two variables are read from the data file.} mySiteNumber: array[1..MaxNumSites] of integer; {The site number of each polymorphism or} {fixed difference.} myRealPoly: array[1..MaxNumSubs] of char; {P if poly, F if fixed difference.} {The following variables are used in the simulations; they are global variables only because} {they are so large.} SitePolyTime: array[0..MaxNumSites] of real; {The length of time summed across all branches } {within species A, for each site.} SiteFixedTime: array[0..MaxNumSites] of real; {The length of time from coalescence of A, } {to coalescence of A+B, plus B, for each site.} SiteCoalesceTime: array[0..MaxNumSites] of real; {For each site, the oldest coalescence } {of two alleles from species A.} FragAllele: array[0..MaxNumFrags] of integer; {The allele name for each fragment.} FragStart: array[0..MaxNumFrags] of integer; {The starting position of each fragment.} FragEnd: array[0..MaxNumFrags] of integer; {The ending position of each fragment.} AllelePresence: array[MinAlleleName..MaxAlleleName] of integer; {1 if that allele is present, 0 if it is not.} AlleleStart: array[MinAlleleName..MaxAlleleName] of integer; {The starting position of each allele.} AlleleEnd: array[MinAlleleName..MaxAlleleName] of integer; {The ending position of each allele.} SiteSp2Allele: array[0..MaxNumSites] of integer; {For each site, the name of the allele} {in species B.} SitePosition: array[1..MaxNumSubs] of real; {The position of sites in the simulations.} SitePoly: array[1..MaxNumSubs] of integer; {'1' for polymorphism, '0' for fixed.} {The next variables keep track of the significance of the sliding G values for each window size.} sigGCountArray: array[1..maxNumRecomb, 1..maxnumsubs] of longint; maxGWindowArray: array[1..MaxNumSubs] of extended; maxGCountArray: array[1..MaxNumRecomb, 0..1000] of longint; maxGCritOverall, maxGCritRecomb: extended; maxGRecombIndex: integer; {====================================================================================================} {This function returns the greater of two integers.} function GreaterInteger (x, y: integer): integer; var First, Greater: integer; begin First := x; Greater := y; if First > Greater then Greater := First; GreaterInteger := Greater; end; {====================================================================================================} {This function returns the lesser of two integers.} function LesserInteger (x, y: integer): integer; var First, Lesser: integer; begin First := x; Lesser := y; if First < Lesser then Lesser := First; LesserInteger := Lesser; end; {====================================================================================================} {This function returns a number times its natural log, or 0 if the number is 0.} function flnf (x: integer): extended; var F: integer; FResult: extended; begin F := x; FResult := 0; if F > 0 then FResult := F * ln(F); flnf := FResult; end; {====================================================================================================} {This procedure reads in the data from the input file, then calculates four statistics:} {the number of runs (McDonald 1996, Mol. Biol. Evol. 13:253-260),} {the Kolmogorov-Smirnov statistic (Sokal and Rohlf 1981, Biometry, p. 719-720),} {and the maximum sliding G-statistic and mean sliding G-statistic (McDonald 1998, Mol. Biol. Evol. 15: 377-384.} {The two sliding G-statistics are found by calculating the G-statistic} {(Sokal and Rohlf 1981, p. 709) for the 2x2 table comparing} {the number of polymorphisms and fixed differences inside and outside each window.} {All possible window positions are tried, and all window sizes are tried in which} {the smallest expected value in the 2x2 table is 5 or more.} {[or 3 if there are fewer than 11 polymorphisms or fixed differences.]} {The maximum sliding G is the largest of these G statistics.} {The mean sliding G statistic is found by taking the mean across all window positions} {of these G statistics for each window size and taking the largest of these means.} procedure DoAnalyzeInput; var A, B, C, D, Pindex, startIndex, siteIndex, cumPoly: longint; maxAverageGWindowSize,maxGWindowSize, windowSize, stringIndex: integer; averageGReal,GReal, KS: extended; tempString: string[63]; numSubsOK, stringUnread: boolean; begin readln(myInput, geneName);{The first line of the data file contains the name of the gene.} readln(myInput, NumVarSites); {The second line has an integer indicating the number of variable sites.} numSubsOK := TRUE; myDidReadInput:= TRUE; numPoly := 0; numFixed := 0; if numSubsOK then begin for Pindex := 1 to NumVarSites do {each remaining line contains a site number, followed by } {P or F for polymorphic or fixed} begin readln(myInput, mySiteNumber[Pindex], tempString); stringUnread:=TRUE; for stringIndex := 1 to length(tempString) do {lowercase is acceptable input} begin if ((tempString[stringIndex] = 'p') or (tempString[stringIndex] = 'P')) and stringUnread then begin myRealPoly[Pindex] := 'P'; stringUnread:= FALSE; end; if ((tempString[stringIndex] = 'f') or (tempString[stringIndex] = 'F')) and stringUnread then begin myRealPoly[Pindex] := 'F'; stringUnread:= FALSE; end; end; if (myRealPoly[Pindex] = 'P') then numPoly := numPoly + 1; if (myRealPoly[Pindex] = 'F') then numFixed := numFixed + 1; end; end; if myDidReadInput = TRUE then begin {Calculate the number of runs.} numRunsReal := 1; for SiteIndex := 1 to (NumVarSites - 1) do begin if (myRealPoly[SiteIndex] <> myRealPoly[SiteIndex + 1]) then numRunsReal := numRunsReal + 1; end; {Calculate the Kolmogorov-Smirnov statistic.} KSReal := 0; cumPoly := 0; for SiteIndex := 1 to NumVarSites do begin if (myRealPoly[SiteIndex] = 'P') then cumPoly := cumPoly + 1; KS := (abs(cumPoly - (SiteIndex * (numPoly / NumVarSites)))) / NumVarSites; if KS > KSReal then KSReal := KS; end; {Calculate maximum sliding G and mean sliding G.} MaxGReal := 0; MaxAverageGReal := 0; MinWindowSize := round(5.99 + (5 * (GreaterInteger(NumFixed, NumPoly) / LesserInteger(NumFixed, NumPoly)))); if LesserInteger(NumFixed, NumPoly) < 11 then MinWindowSize := round(3.99 + (3 * (GreaterInteger(NumFixed, NumPoly) / LesserInteger(NumFixed, NumPoly)))); MaxWindowSize := NumVarSites - minWindowSize; if minWindowSize < MaxWindowSize then begin for WindowSize := MinWindowSize to maxWindowSize do begin MaxGWindowArray[windowSize] := 0; AverageGReal := 0; for StartIndex := 1 to (1 + NumVarSites - WindowSize) do begin A := 0; for SiteIndex := StartIndex to (StartIndex + WindowSize - 1) do begin if (myRealPoly[siteIndex] = 'P') then A := A + 1; {number of polymorphic sites inside window} end; B := WindowSize - A; {number of fixed differences inside window} C := NumPoly - A; {number of polymorphic sites outside window} D := NumFixed - B; {number of fixed differences outside window} GReal := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) - flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D)); AverageGReal := AverageGReal + (GReal / (1 + NumVarSites - windowSize)); if GReal > MaxGWindowArray[WindowSize] then MaxGWindowArray[WindowSize] := GReal; if GReal > MaxGReal then begin MaxGReal := GReal; maxGWindowSize := WindowSize; end; end; if AverageGReal > MaxAverageGReal then begin MaxAverageGReal := AverageGReal; maxAverageGWindowSize := windowSize; end; end; end; {Output the results to the output file.} if myFirstOutput then begin writeln(myOutput, '============================================================'); writeln(myOutput); writeln(myOutput); myFirstOutput := FALSE; end; writeln(myOutput, 'There are ', numRunsReal : 4, ' runs in your data on ', GeneName, '.'); writeln(myOutput, 'The Kolmogorov-Smirnov statistic is ', KSReal : 8 : 6, '. '); if minWindowSize < maxWindowSize then {There were enough variable sites to do the G tests.} begin writeln(myOutput); writeln(myOutput, 'A window of ', maxAverageGWindowSize : 4, ' variable sites produces the largest average sliding G value.'); writeln(myOutput, 'For a window size of ', maxAverageGWindowSize : 4, ' the average sliding G value is ', maxAverageGReal : 8 : 4, '.'); writeln(myOutput); writeln(myOutput, 'A window of ', maxGWindowSize : 4, ' variable sites produces the largest maximum sliding G value.'); writeln(myOutput, 'For a window size of ', maxGWindowSize : 4, ' the maximum sliding G value is ', maxGReal : 8 : 4, '.'); writeln(myOutput); end; writeln(myOutput); if minWindowSize >= maxWindowSize then {Too few variable sites to do the G tests.} begin if numFixed > numPoly then writeln(myOutput, 'With only ', numPoly : 2, ' polymorphisms you cannot do the sliding window G-tests.'); if numFixed <= numPoly then writeln(myOutput, 'With only ', numFixed : 2, ' fixed differences you cannot do the sliding window G-tests.'); end; end; end; {====================================================================================================} {This procedure simulates neutral evolution, using the parameters entered from the parameter window} {and calculated from the input data. Each simulated sequence starts at the present as a single} {"allele" consisting of a single "fragment." Going back in time, a recombination event will break up} {an allele into two alleles. A coalescence event takes two alleles and combines them into one; the} {effect on the number of fragments depends on the pattern of overlap between the fragments in the two} {alleles. After recombination and coalescence, it is possible for a single allele to consist of two} {or more non-contiguous fragments. As coalescence and recombination are simulated, the total amount} {of time on the within-species part of the phylogeny is recorded for each nucleotide site, as is the} {time on the between-species part. After coalescence and recombination are simulated, the number of} {polymorphic sites in the real data is distibuted across the simulated sequence in proportion to the} {within-species time for each site; fixed differences are likewise distributed.} {The procedure then calculates the four statistics for the simulated data and compares them to} {the statistics for the real data. The procedure outputs the results to the screen and to the} {output file. For the maximum sliding G test, the output includes the significance level for each} {size of window applied to the real data. This is so that, for example, the user could see that a} {very large and heuristically unenlightening window gave the largest G and thus the "best"} {significance, but a smaller window had almost as great a G and yielded a much more detailed} {picture of the peaks and valleys of polymorphism to divergence ratio. } procedure DoRun; var TimePoly: real; {total time on all lineages within species A back to the most} {recent common ancestor of them all} TimeCoal: real; {time from the present back to the most recent common ancestor} {of the sampled sequences in species A} AlleleIndex: longint; {index used when running through the different alleles} TimeFixed: real; {time from the most recent common ancestor of species A, back to} {the coalescence of A+B, plus species B} TimeSplit: real; {time from the present back to the split between species A+B;} {note that while this is the most recent possible time for} {the coalescence of A+B sequences, on average that will happen} {1 time unit [2N generations] earlier)} RecombIndex: integer; {index of the recombination parameter being used} Recomb: real; {recombination parameter (4Nr) } SigGCount, SigRunsCount, SigKSCount, sigAverageGCount: array[1..maxNumRecomb] of longint; {number of simulations with the same or more extreme value than} {that observed} SiteIndex: longint; {index used when stepping through each site} RepIndex: longint; {index used to indicate which replicate is being run} LineagePolyTime: real; {the time during which there is more than one allele for at} {least one site} AlleleMaxA: integer; {the largest allele name in species A} AlleleMinA: integer; {the smallest allele name in species A} AlleleMaxB: integer; {the largest allele name in species B} AlleleMinB: integer; {the smallest allele name in species B} MaxFrag: longint; {the largest fragment name} MinFrag: longint; {the smallest fragment name} AlleleCountA: longint; {the number of alleles in species A} AlleleCountB: longint; {the number of alleles in species B} TotalAlleleLengthA: longint; {the number of possible recombination sites in all the alleles} {in species A} TotalAlleleLengthB: longint; {the number of possible recombination sites in all the alleles} {in species B} TotalFragLengthA: longint; {the total number of sites in all fragments in species A} TotalFragLengthB: longint; {the total number of sites in all fragments in species B} FragIndex: longint; {index used for stepping through the fragments} EventTime: real; {the time to the next coalescence or recombination event} RecombYes: string[1]; {'y' if the next event is a recombination, 'n' if it is} {a coalescence} RecombProb: real; {the probability that the next event is a recombination} FirstAllele: integer; {the first of two alleles to be coalesced} TempAllele: integer; {used in picking the two alleles to be coalesced} SecondAllele: integer; {the second of two alleles to be coalesced} TempMaxFrag: longint; {when coalescing two alleles, the largest name of a fragment} FragIndex2: longint; {index used in coalescing two fragments} PositionOne: longint; {when coalescing two alleles, the starting position of the first} {fragment} PositionTwo: longint; {ending position of first fragment} PositionThree: longint; {starting position of second fragment} PositionFour: longint; {ending position of second fragment} NewFragOne: longint; {when coalescing, name of first new fragment} NewFragTwo: longint; {when coalescing, name of second new fragment} SiteIndex2: longint; {index used for stepping through sites} RecombSite: real; {number used in choosing which allele to recombine within} RecombPosition: longint; {position within allele at which recombination takes place} FragName: longint; {when getting information about fragments, name of fragment} SiteRandom: real; {random number from 0 to 1, used to assign position of fixed} {and polymorphic sites in simulations} SortTest: real; {test variable used in sorting} SortIndex: longint; {index used during the sorting process} SmallPosition: longint; {variable used in sorting} TempPoly: integer; {'1' for polymorphism, '0' for fixed; after sorting} {values used for calculating the statistics for the simulated data} MaxGSim, GSim: extended; NumRunsSim, cumPoly, cumGCountOld, cumGCountNew: longint; KS, KSSim, maxAverageGSim, averageGSim: extended; StartIndex: longint; A, B, C, D: longint; windowSize: integer; begin {Write some stuff to the output file.} writeln(myOutput); writeln(myOutput, 'Number of sequences from ', NameSpeciesA, ': ', NumSequences : 4); writeln(myOutput, 'Outgroup species: ', NameSpeciesB); writeln(myOutput, 'Length of ', GeneName, ': ', NumSites : 5); writeln(myOutput, 'Number of polymorphisms: ', NumPoly : 4); writeln(myOutput, 'Number of fixed differences: ', NumFixed : 4); writeln(myOutput, 'Number of replicate simulations: ', NumReps : 5); writeln(myOutput); writeln(myOutput, 'Recomb. max G >= runs <= K-S >= avg. G >='); if minWindowSize < maxWindowSize then writeln(myOutput, 'parameter ', MaxGReal : 8 : 4, numRunsReal : 8, ' ', KSReal : 13 : 6, maxAverageGReal : 11 : 4) else writeln(myOutput, 'parameter ', ' ------ ', numRunsReal : 8, KSReal : 13 : 6, ' ------- '); {Some parameters are estimated from the input data, using equation 5 of Hudson 1990, Oxford Surv.} {Evol. Biol. 7:1-44. The mean time for J alleles to coalesce into J-1 alleles is 1 over the number} {of combinations of J items taken two at a time, or 1/((J*(J-1))/2). Of course,} {time is measured in units of 2N generations, where N is the number of diploid individuals.} TimePoly := 0; TimeCoal := 0; NumVarSites := NumFixed + NumPoly; for AlleleIndex := 2 to NumSequences do begin TimePoly := TimePoly + (2 / (AlleleIndex - 1)); TimeCoal := TimeCoal + (2 / (AlleleIndex * (AlleleIndex - 1))); end; {Assuming the mutation rate hasn't changed, and that the species are close enough that very few} {multiple hits have occurred, the ratio of the fixation time to the polymorphism time} {can be estimated from the ratio of the number of fixed differences to the number of polymorphisms.} TimeFixed := TimePoly * (NumFixed / NumPoly); {Assuming that the population size in the ancestral species was the same as in the two descendant} {species, the average time for an allele from species A to coalesce with an allele from species B,} {once the two species have coalesced into one species, is 2N generations, or 1 time unit.} {Therefore the time back to when species A and B coalesced is estimated as the time when alleles} {from A coalesced with alleles from B, minus 1.} TimeSplit := ((TimeFixed + TimeCoal) / 2) - 1; {Initialize a couple of things. } SitePolyTime[0] := 0; SiteFixedTime[0] := 0; {Do a set of replicate simulations for each recombination parameter chosen.} RecombIndex := 1; writeln; writeln(' Progress Bar'); {set up a progress bar on the screen} writeln('replicate 1 ',(numReps div 4):7,' ',(numReps div 2):7, ' ', 3*(numreps div 4):7, ' ',numReps:7); repeat begin Recomb := RecombArray[RecombIndex]; writeln; write('recomb #', recombIndex:2, ' '); {Initialize variables used in counting the number of simulations with each number of runs.} SigGCount[recombIndex] := 0; SigRunsCount[recombIndex] := 0; SigKSCount[recombIndex] := 0; sigAverageGCount[recombIndex] := 0; for windowSize := minWindowSize to maxWindowSize do begin sigGCountArray[recombIndex, windowSize] := 0; for GIndex:=0 to 1000 do begin maxGCountArray[recombIndex, GIndex]:=0; end; end; {Here is where each replicate simulation starts.} RepIndex := 0; repeat RepIndex := RepIndex + 1; if numReps>49 then {write a progress bar} begin if round(50.0*repIndex/numReps)>round(50.0*(repIndex-1)/numReps) then write('|'); end; {More initializing. } LineagePolyTime := 0; AlleleMaxA := NumSequences; AlleleMinA := 1; MaxFrag := NumSequences; AlleleCountA := NumSequences; TotalAlleleLengthA := (NumSites - 1) * NumSequences; {no recomb. at end of last site,} {thus Numsites-1. } TotalFragLengthA := NumSites * NumSequences; {Initialize values for species A.} for FragIndex := 1 to NumSequences do begin FragAllele[FragIndex] := FragIndex; FragStart[FragIndex] := 0; FragEnd[FragIndex] := NumSites; AllelePresence[FragIndex] := 1; AlleleStart[FragIndex] := 0; AlleleEnd[FragIndex] := NumSites; end; for SiteIndex := 1 to NumSites do begin SitePolyTime[SiteIndex] := 0; SiteFixedTime[SiteIndex] := 0; SiteCoalesceTime[SiteIndex] := 0; end; {Initialize the rest of the arrays with zeros.} for FragIndex := (NumSequences + 1) to MaxNumFrags do begin FragAllele[FragIndex] := 0; FragStart[FragIndex] := 0; FragEnd[FragIndex] := 0; end; for FragIndex := (NumSequences + 1) to MaxAlleleName do begin AllelePresence[FragIndex] := 0; AlleleStart[FragIndex] := 0; AlleleEnd[FragIndex] := 0; end; for FragIndex := MinAlleleName to 0 do begin AllelePresence[FragIndex] := 0; AlleleStart[FragIndex] := 0; AlleleEnd[FragIndex] := 0; end; {This part of the program coalesces the alleles within species A, back until the species split.} while LineagePolyTime < TimeSplit do begin if (Recomb < 0.01) and (AlleleCountA = 1) then {If there's no recombination and } LineagePolyTime := TimeSplit * 100; {all the alleles within species A} {are coalesced, go on to coalesce} {with species B.} if (Recomb > 0.01) or (AlleleCountA > 1) then begin {The next equation is modified from Hudson 1983, p. 197 (Theor. Pop. Biol 23:183-201).} {See also Hudson 1990, p. 42. It determines the time back to the next event, either a recombination} {or coalescence event. The recombination parameter is the parameter for one allele that is} {NumSites long, and therefore has (NumSites-1) possible recombination sites. } EventTime := (-1 * ln(random)) / ((Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) + (AlleleCountA * (AlleleCountA - 1))); LineagePolyTime := LineagePolyTime + EventTime; end; if LineagePolyTime < TimeSplit then {The two species are still separate.} begin {The next equation is modified from Hudson 1983, p. 197.} RecombProb := (Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) / ((Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) + (AlleleCountA * (AlleleCountA - 1))); {Use random number to decide whether event is recombination or coalescence. } RecombYes := 'n'; if random < RecombProb then RecombYes := 'y'; {Here is where a coalescence event is simulated.} if RecombYes = 'n' then begin {The two alleles to coalesce are chosen at random.} FirstAllele := 0; while FirstAllele = 0 do begin TempAllele := round(AlleleMinA + ((AlleleMaxA - (AlleleMinA - 1)) * random)); if AllelePresence[TempAllele] > 0.5 then FirstAllele := TempAllele; end; SecondAllele := 0; while SecondAllele = 0 do begin TempAllele := round(AlleleMinA + ((AlleleMaxA - (AlleleMinA - 1)) * random)); if (AllelePresence[TempAllele] > 0.5) and (TempAllele <> FirstAllele) then SecondAllele := TempAllele; end; {Each fragment is compared with every other fragment. If a fragment consists of allele two, it is} {renamed allele one. If an allele two fragment overlaps with an allele one fragment, the overlapping} {part of allele two disappears, the overlapping part of allele one becomes a new fragment, and a} {new fragment is made from the non-overlapping part of each fragment. The little comment diagrams} {that illustrate this process will look best in a fixed-width font such as Geneva or Courier.} FragIndex := 0; TempMaxFrag := MaxFrag; while FragIndex <= MaxFrag do begin FragIndex := FragIndex + 1; if FragAllele[FragIndex] = FirstAllele then begin for FragIndex2 := 1 to MaxFrag do begin if FragAllele[FragIndex2] = SecondAllele then {One fragment has} {allele one, one fragment has allele two.} begin PositionOne := FragStart[FragIndex]; PositionTwo := FragEnd[FragIndex]; PositionThree := FragStart[FragIndex2]; PositionFour := FragEnd[FragIndex2]; if (PositionTwo > PositionThree) and (PositionFour > PositionOne) then {There is overlap between the two fragments.} begin for SiteIndex := (1 + greaterinteger(PositionOne,PositionThree)) to lesserinteger(PositionTwo, PositionFour) do {Add appropriate times to site array. } begin SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex] + LineagePolyTime; SiteCoalesceTime[SiteIndex] := LineagePolyTime; end; FragAllele[FragIndex2] := 0; NewFragOne := MaxFrag + 1; NewFragTwo := MaxFrag + 2; if (PositionOne < PositionThree) and (PositionTwo < PositionFour) then begin FragStart[FragIndex] := PositionThree; {fragment allele } FragAllele[NewFragOne] := FirstAllele; { 1 ---------- 1 } FragStart[NewFragOne] := PositionOne; { 2 ----------- 2 } FragEnd[NewFragOne] := PositionThree; { V } FragAllele[NewFragTwo] := SecondAllele; { 1 ------- 1 } FragStart[NewFragTwo] := PositionTwo; { 3 --- 1 } FragEnd[NewFragTwo] := PositionFour; { 4 ---- 2 } TempMaxFrag := NewFragTwo; end; if (PositionOne < PositionThree) and (PositionTwo = PositionFour) then begin FragStart[FragIndex] := PositionThree; { 1 ---------- 1 } FragAllele[NewFragOne] := FirstAllele; { 2 ------- 2 } FragStart[NewFragOne] := PositionOne; { V } FragEnd[NewFragOne] := PositionThree; { 1 ------- 1 } TempMaxFrag := NewFragOne; { 3 --- 1 } end; if (PositionOne < PositionThree) and (PositionTwo > PositionFour) then begin FragStart[FragIndex] := PositionThree; { 1 ------------- 1 } FragEnd[FragIndex] := PositionFour; { 2 ------- 2 } FragAllele[NewFragOne] := FirstAllele; { V } FragStart[NewFragOne] := PositionOne; { 1 ------- 1 } FragEnd[NewFragOne] := PositionThree; { 3 --- 1 } FragAllele[NewFragTwo] := FirstAllele; { 4 --- 1 } FragStart[NewFragTwo] := PositionFour; FragEnd[NewFragTwo] := PositionTwo; TempMaxFrag := NewFragTwo; end; if (PositionOne = PositionThree) and (PositionTwo < PositionFour) then begin FragAllele[NewFragOne] := SecondAllele; { 1 ---------- 1 } FragStart[NewFragOne] := PositionTwo; { 2 ------------- 2 } FragEnd[NewFragOne] := PositionFour; { V } TempMaxFrag := NewFragOne; { 1 ---------- 1 } end; { 3 --- 2 } if (PositionOne = PositionThree) and (PositionTwo > PositionFour) then begin FragEnd[FragIndex] := PositionFour; { 1 ------------- 1 } FragAllele[NewFragOne] := FirstAllele; { 2 ---------- 2 } FragStart[NewFragOne] := PositionFour; { V } FragEnd[NewFragOne] := PositionTwo; { 1 ---------- 1 } TempMaxFrag := NewFragOne; { 3 --- 1 } end; if (PositionOne > PositionThree) and (PositionTwo < PositionFour) then begin FragAllele[NewFragOne] := SecondAllele; { 1 ------- 1 } FragStart[NewFragOne] := PositionThree; { 2 ------------- 2 } FragEnd[NewFragOne] := PositionOne; { V } FragAllele[NewFragTwo] := SecondAllele; { 1 ------- 1 } FragStart[NewFragTwo] := PositionTwo; { 3 --- 2 } FragEnd[NewFragTwo] := PositionFour; { 4 --- 2 } TempMaxFrag := NewFragTwo; end; if (PositionOne > PositionThree) and (PositionTwo = PositionFour) then begin FragAllele[NewFragOne] := SecondAllele; { 1 ---------- 1 } FragStart[NewFragOne] := PositionThree; { 2 ------------- 2 } FragEnd[NewFragOne] := PositionOne; { V } TempMaxFrag := NewFragOne; { 1 ---------- 1 } end; { 3 --- 2 } if (PositionOne > PositionThree) and (PositionTwo > PositionFour) then begin FragEnd[FragIndex] := PositionFour; { 1 ----------- 1 } FragAllele[NewFragOne] := FirstAllele; { 2 ----------- 2 } FragStart[NewFragOne] := PositionFour; { V } FragEnd[NewFragOne] := PositionTwo; { 1 -------- 1 } FragAllele[NewFragTwo] := SecondAllele; { 3 --- 1 } FragStart[NewFragTwo] := PositionThree; { 4 --- 2 } FragEnd[NewFragTwo] := PositionOne; TempMaxFrag := NewFragTwo; end; end; {Note that no new fragments need to be made if} {both fragments start and end at the same position.} end; end; MaxFrag := TempMaxFrag; end; end; for FragIndex := 1 to MaxFrag do {Rename all fragments from allele two} {to allele one.} begin if FragAllele[FragIndex] = SecondAllele then FragAllele[FragIndex] := FirstAllele; end; end; {Here is where a recombination event is simulated. The first step is to pick which allele to} {recombine within. The probability that an allele will be the site of a recombination event} {is proportional to the size of that allele. Note that the size of an allele is one less than the} {number of sites in the allele, because recombination must occur within an allele, not at its end.} if RecombYes = 'y' then begin RecombSite := random * TotalAlleleLengthA; FirstAllele := 0; AlleleIndex := AlleleMinA - 1; TotalAlleleLengthA := 0; while FirstAllele = 0 do begin AlleleIndex := AlleleIndex + 1; if AllelePresence[AlleleIndex] = 1 then TotalAlleleLengthA := TotalAlleleLengthA + (AlleleEnd[AlleleIndex] - (AlleleStart[AlleleIndex] + 1)); if TotalAlleleLengthA >= RecombSite then FirstAllele := AlleleIndex; end; {Pick a new name for the recombined part of the allele. } AlleleIndex := 0; SecondAllele := 0; while (AlleleIndex < MaxAlleleName) and (SecondAllele < 1) do begin AlleleIndex := AlleleIndex + 1; if AllelePresence[AlleleIndex] = 0 then begin SecondAllele := AlleleIndex; if SecondAllele > AlleleMaxA then AlleleMaxA := SecondAllele; end; end; {Choose a recombination position at random, then rename every fragment past that point from allele} {one to allele two. If the recombination position is within a fragment, a new fragment is created} {for the part of the fragment past the recombination position. } RecombPosition := AlleleStart[FirstAllele] + (1 + round((AlleleEnd[FirstAllele] - AlleleStart[FirstAllele] - 1) * random)); NewFragOne := MaxFrag; for FragIndex := 1 to MaxFrag do begin if FragAllele[FragIndex] = FirstAllele then begin if FragStart[FragIndex] >= RecombPosition then {Entire fragment is} {past recombination position.} begin FragAllele[FragIndex] := SecondAllele; end; if (FragStart[FragIndex] < RecombPosition) and (FragEnd[FragIndex] > RecombPosition) then {Recombination position is within fragment.} begin NewFragOne := NewFragOne + 1; FragAllele[NewFragOne] := SecondAllele; FragStart[NewFragOne] := RecombPosition; FragEnd[NewFragOne] := FragEnd[FragIndex]; FragEnd[FragIndex] := RecombPosition; end; end; end; MaxFrag := NewFragOne; end; {Having completed the coalescence or recombination event, it's time to see which alleles are present.} {The first step is to use up any empty fragment names, which speeds up the running of the program.} FragIndex := 1; while FragIndex < MaxFrag do begin if FragAllele[FragIndex] = 0 then begin MaxFrag := MaxFrag - 1; for FragIndex2 := FragIndex to MaxFrag do begin FragAllele[FragIndex2] := FragAllele[FragIndex2 + 1]; FragStart[FragIndex2] := FragStart[FragIndex2 + 1]; FragEnd[FragIndex2] := FragEnd[FragIndex2 + 1]; end; FragAllele[MaxFrag + 1] := 0; end; FragIndex := FragIndex + 1; end; {Next some stuff is initialized. } for AlleleIndex := AlleleMinA to AlleleMaxA do begin AllelePresence[AlleleIndex] := 0; AlleleStart[AlleleIndex] := 0; AlleleEnd[AlleleIndex] := 0; end; AlleleCountA := 0; AlleleMinA := MaxAlleleName; AlleleMaxA := MinAlleleName; TotalFragLengthA := 0; {Each fragment is compared with every other fragment. If a fragment consists of allele two, it} {is renamed allele one. If an allele two fragment overlaps with an allele one fragment, the} {overlapping part of allele two disappears, the overlapping part of allele one becomes a new} {fragment, and a new fragment is made from the non-overlapping part of each fragment.} for FragIndex := 1 to MaxFrag do begin if (FragAllele[FragIndex]) > 0.5 then begin FragName := FragAllele[FragIndex]; TotalFragLengthA := TotalFragLengthA + (FragEnd[FragIndex] - FragStart[FragIndex]); AlleleMaxA := greaterinteger(FragName, AlleleMaxA); AlleleMinA := lesserinteger(FragName, AlleleMinA); if AllelePresence[FragName] > 0.5 then {Not the first fragment with that allele name.} begin AlleleStart[FragName] := LesserInteger(FragStart[FragIndex], AlleleStart[FragName]); AlleleEnd[FragName] := GreaterInteger(FragEnd[FragIndex], AlleleEnd[FragName]); end; if AllelePresence[FragName] < 0.5 then {The first fragment with that allele name.} begin AllelePresence[FragName] := 1; AlleleStart[FragName] := FragStart[FragIndex]; AlleleEnd[FragName] := FragEnd[FragIndex]; AlleleCountA := AlleleCountA + 1; end; end; end; {The cumulative size of the alleles is measured, for use in choosing which allele to recombine.} TotalAlleleLengthA := 0; for AlleleIndex := AlleleMinA to AlleleMaxA do begin if AllelePresence[AlleleIndex] > 0.5 then TotalAlleleLengthA := TotalAlleleLengthA + (AlleleEnd[AlleleIndex] - (AlleleStart[AlleleIndex] + 1)); end; end; end; {Now it's time to simulate recombination and coalescence in species B going back to} {the species split. First, initialize values for species B.} LineagePolyTime := 0; AlleleCountB := 1; TotalAlleleLengthB := NumSites - 1; TotalFragLengthB := NumSites; AlleleMinB := -1; AlleleMaxB := -1; MinFrag := MaxFrag + 1; MaxFrag := MinFrag; FragAllele[MinFrag] := -1; FragStart[MinFrag] := 0; FragEnd[MinFrag] := NumSites; AllelePresence[-1] := 1; AlleleStart[-1] := 0; AlleleEnd[-1] := NumSites; for SiteIndex := 1 to NumSites do SiteSp2Allele[SiteIndex] := -1; while LineagePolyTime < TimeSplit do begin if Recomb < 0.01 then LineagePolyTime := TimeSplit * 100; if (Recomb > 0.01) then begin EventTime := (-1 * ln(random)) / ((Recomb * (TotalAlleleLengthB / (NumSites - 1.0))) + (AlleleCountB * (AlleleCountB - 1))); LineagePolyTime := LineagePolyTime + EventTime; end; if LineagePolyTime < TimeSplit then begin RecombProb := (Recomb * (TotalAlleleLengthB / (NumSites - 1.0))) / ((Recomb * (TotalAlleleLengthB / (NumSites - 1.0))) + (AlleleCountB * (AlleleCountB - 1))); RecombYes := 'n'; if random < RecombProb then RecombYes := 'y'; {Here is where a coalescence event is simulated within species B. } if RecombYes = 'n' then begin {Next the two alleles to coalesce are chosen at random. } FirstAllele := 0; while FirstAllele = 0 do begin TempAllele := round((AlleleMinB) - 0.5 + ((AlleleMaxB - (AlleleMinB - 1)) * random)); if AllelePresence[TempAllele] > 0.5 then FirstAllele := TempAllele; end; SecondAllele := 0; while SecondAllele = 0 do begin TempAllele := round((AlleleMinB) - 0.5 + ((AlleleMaxB - (AlleleMinB - 1)) * random)); if (AllelePresence[TempAllele] > 0.5) and (TempAllele <> FirstAllele) then SecondAllele := TempAllele; end; {Each fragment is compared with every other fragment. If a fragment consists of allele two,} {it is renamed allele one.} for FragIndex := MinFrag to MaxFrag do begin if FragAllele[FragIndex] = SecondAllele then FragAllele[FragIndex] := FirstAllele; end; end; {Here is where a recombination event is simulated in species B.} if RecombYes = 'y' then begin RecombSite := random * TotalAlleleLengthB; FirstAllele := 0; AlleleIndex := AlleleMinB - 1; TotalAlleleLengthB := 0; while FirstAllele = 0 do begin AlleleIndex := AlleleIndex + 1; if AllelePresence[AlleleIndex] = 1 then TotalAlleleLengthB := TotalAlleleLengthB + (AlleleEnd[AlleleIndex] - (AlleleStart[AlleleIndex] + 1)); if TotalAlleleLengthB >= RecombSite then FirstAllele := AlleleIndex; end; AlleleIndex := 0; SecondAllele := 0; while (AlleleIndex > MinAlleleName) and (SecondAllele > -1) do begin AlleleIndex := AlleleIndex - 1; if AllelePresence[AlleleIndex] = 0 then begin SecondAllele := AlleleIndex; if SecondAllele < AlleleMinB then AlleleMinB := SecondAllele; end; end; RecombPosition := AlleleStart[FirstAllele] + (1 + round((AlleleEnd[FirstAllele] - AlleleStart[FirstAllele] - 1) * random)); NewFragOne := MaxFrag; for FragIndex := MinFrag to MaxFrag do begin if FragAllele[FragIndex] = FirstAllele then begin if FragStart[FragIndex] >= RecombPosition then begin FragAllele[FragIndex] := SecondAllele; end; if (FragStart[FragIndex] < RecombPosition) and (FragEnd[FragIndex] > RecombPosition) then begin NewFragOne := NewFragOne + 1; FragAllele[NewFragOne] := SecondAllele; FragStart[NewFragOne] := RecombPosition; FragEnd[NewFragOne] := FragEnd[FragIndex]; FragEnd[FragIndex] := RecombPosition; end; end; end; MaxFrag := NewFragOne; end; for AlleleIndex := AlleleMinB to AlleleMaxB do begin AllelePresence[AlleleIndex] := 0; AlleleStart[AlleleIndex] := 0; AlleleEnd[AlleleIndex] := 0; end; AlleleCountB := 0; AlleleMinB := MaxAlleleName; AlleleMaxB := MinAlleleName; TotalFragLengthB := 0; {The fragments are checked and the allele information is recorded. } for FragIndex := MinFrag to MaxFrag do begin if ((FragAllele[FragIndex]) < -0.5) then begin FragName := FragAllele[FragIndex]; TotalFragLengthB := TotalFragLengthB + (FragEnd[FragIndex] - FragStart[FragIndex]); AlleleMaxB := greaterinteger(FragName, AlleleMaxB); AlleleMinB := lesserinteger(FragName, AlleleMinB); for SiteIndex := (FragStart[FragIndex] + 1) to FragEnd[FragIndex] do SiteSp2Allele[SiteIndex] := FragName; if AllelePresence[FragName] > 0.5 then begin AlleleStart[FragName] := lesserinteger(FragStart[FragIndex], AlleleStart[FragName]); AlleleEnd[FragName] := greaterinteger(FragEnd[FragIndex], AlleleEnd[FragName]); end; if AllelePresence[FragName] < 0.5 then begin AllelePresence[FragName] := 1; AlleleStart[FragName] := FragStart[FragIndex]; AlleleEnd[FragName] := FragEnd[FragIndex]; AlleleCountB := AlleleCountB + 1; end; end; end; {The cumulative size of the alleles is measured, for use in choosing which allele to recombine. } TotalAlleleLengthB := 0; for AlleleIndex := AlleleMinB to AlleleMaxB do begin if AllelePresence[AlleleIndex] > 0.5 then TotalAlleleLengthB := TotalAlleleLengthB + (AlleleEnd[AlleleIndex] - (AlleleStart[AlleleIndex] + 1)); end; end; end; {Now the recombination and coalescence are simulated within the ancestral species.} LineagePolyTime := TimeSplit; AlleleMinA := AlleleMinB; AlleleCountA := AlleleCountA + AlleleCountB; TotalAlleleLengthA := TotalAlleleLengthA + TotalAlleleLengthB; TotalFragLengthA := TotalFragLengthA + TotalFragLengthB; {This part of the program repeats until all of the alleles are coalesced.} while TotalFragLengthA > NumSites do begin EventTime := (-1 * ln(random)) / ((Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) + (AlleleCountA * (AlleleCountA - 1))); LineagePolyTime := LineagePolyTime + EventTime; RecombProb := (Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) / ((Recomb * (TotalAlleleLengthA / (NumSites - 1.0))) + (AlleleCountA * (AlleleCountA - 1))); RecombYes := 'n'; if random < RecombProb then RecombYes := 'y'; {Here is where a coalescence event is simulated. } if RecombYes = 'n' then begin FirstAllele := 0; while FirstAllele = 0 do begin TempAllele := round((AlleleMinA - 0.5) + ((AlleleMaxA - (AlleleMinA - 1)) * random)); if AllelePresence[TempAllele] > 0.5 then FirstAllele := TempAllele; end; SecondAllele := 0; while SecondAllele = 0 do begin TempAllele := round((AlleleMinA - 0.5) + ((AlleleMaxA - (AlleleMinA - 1)) * random)); if (AllelePresence[TempAllele] > 0.5) and (TempAllele <> FirstAllele) then SecondAllele := TempAllele; end; {Each fragment is compared with every other fragment. If a fragment consists of allele two, it} {is renamed allele one. If an allele two fragment overlaps with an allele one fragment, the} {overlapping part of allele two disappears, the overlapping part of allele one becomes a new} {fragment, and a new fragment is made from the non-overlapping part of each fragment.} FragIndex := 0; TempMaxFrag := MaxFrag; while FragIndex <= MaxFrag do begin FragIndex := FragIndex + 1; if FragAllele[FragIndex] = FirstAllele then begin for FragIndex2 := 1 to MaxFrag do begin if FragAllele[FragIndex2] = SecondAllele then begin PositionOne := FragStart[FragIndex]; PositionTwo := FragEnd[FragIndex]; PositionThree := FragStart[FragIndex2]; PositionFour := FragEnd[FragIndex2]; if (PositionTwo > PositionThree) and (PositionFour > PositionOne) then begin for SiteIndex := (1 + greaterinteger(PositionOne, PositionThree)) to lesserinteger(PositionTwo, PositionFour) do {Add appropriate times to site array. } begin if ((SiteSp2Allele[SiteIndex] <> FirstAllele) and (SiteSp2Allele[SiteIndex] <> SecondAllele)) and (SiteSp2Allele[SiteIndex] <> 0) then {coalescence within species} begin SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex] + LineagePolyTime; SiteCoalesceTime[SiteIndex] := LineagePolyTime; end; if (SiteSp2Allele[SiteIndex] = FirstAllele) or (SiteSp2Allele[SiteIndex] = SecondAllele) then {Coalescence between species. } begin SiteFixedTime[SiteIndex] := LineagePolyTime; SiteSp2Allele[SiteIndex] := 0; end; end; FragAllele[FragIndex2] := 0; NewFragOne := MaxFrag + 1; NewFragTwo := MaxFrag + 2; if (PositionOne < PositionThree) and (PositionTwo < PositionFour) then begin FragStart[FragIndex] := PositionThree; FragAllele[NewFragOne] := FirstAllele; FragStart[NewFragOne] := PositionOne; FragEnd[NewFragOne] := PositionThree; FragAllele[NewFragTwo] := SecondAllele; FragStart[NewFragTwo] := PositionTwo; FragEnd[NewFragTwo] := PositionFour; TempMaxFrag := NewFragTwo; end; if (PositionOne < PositionThree) and (PositionTwo = PositionFour) then begin FragStart[FragIndex] := PositionThree; FragAllele[NewFragOne] := FirstAllele; FragStart[NewFragOne] := PositionOne; FragEnd[NewFragOne] := PositionThree; TempMaxFrag := NewFragOne; end; if (PositionOne < PositionThree) and (PositionTwo > PositionFour) then begin FragStart[FragIndex] := PositionThree; FragEnd[FragIndex] := PositionFour; FragAllele[NewFragOne] := FirstAllele; FragStart[NewFragOne] := PositionOne; FragEnd[NewFragOne] := PositionThree; FragAllele[NewFragTwo] := FirstAllele; FragStart[NewFragTwo] := PositionFour; FragEnd[NewFragTwo] := PositionTwo; TempMaxFrag := NewFragTwo; end; if (PositionOne = PositionThree) and (PositionTwo < PositionFour) then begin FragAllele[NewFragOne] := SecondAllele; FragStart[NewFragOne] := PositionTwo; FragEnd[NewFragOne] := PositionFour; TempMaxFrag := NewFragOne; end; if (PositionOne = PositionThree) and (PositionTwo > PositionFour) then begin FragEnd[FragIndex] := PositionFour; FragAllele[NewFragOne] := FirstAllele; FragStart[NewFragOne] := PositionFour; FragEnd[NewFragOne] := PositionTwo; TempMaxFrag := NewFragOne; end; if (PositionOne > PositionThree) and (PositionTwo < PositionFour) then begin FragAllele[NewFragOne] := SecondAllele; FragStart[NewFragOne] := PositionThree; FragEnd[NewFragOne] := PositionOne; FragAllele[NewFragTwo] := SecondAllele; FragStart[NewFragTwo] := PositionTwo; FragEnd[NewFragTwo] := PositionFour; TempMaxFrag := NewFragTwo; end; if (PositionOne > PositionThree) and (PositionTwo = PositionFour) then begin FragAllele[NewFragOne] := SecondAllele; FragStart[NewFragOne] := PositionThree; FragEnd[NewFragOne] := PositionOne; TempMaxFrag := NewFragOne; end; if (PositionOne > PositionThree) and (PositionTwo > PositionFour) then begin FragEnd[FragIndex] := PositionFour; FragAllele[NewFragOne] := FirstAllele; FragStart[NewFragOne] := PositionFour; FragEnd[NewFragOne] := PositionTwo; FragAllele[NewFragTwo] := SecondAllele; FragStart[NewFragTwo] := PositionThree; FragEnd[NewFragTwo] := PositionOne; TempMaxFrag := NewFragTwo; end; end; end; end; MaxFrag := TempMaxFrag; end; end; for FragIndex := 1 to MaxFrag do {Rename all fragments from allele two} {to allele one. } begin if FragAllele[FragIndex] = SecondAllele then FragAllele[FragIndex] := FirstAllele; end; for SiteIndex := 1 to NumSites do {Rename all alleles from species B} {from allele two to allele one.} begin if SiteSp2Allele[SiteIndex] = SecondAllele then SiteSp2Allele[SiteIndex] := FirstAllele; end; end; {Here is where a recombination event is simulated in the ancestral species.} if RecombYes = 'y' then begin RecombSite := random * TotalAlleleLengthA; FirstAllele := 0; AlleleIndex := AlleleMinA - 1; TotalAlleleLengthA := 0; while FirstAllele = 0 do begin AlleleIndex := AlleleIndex + 1; if AllelePresence[AlleleIndex] = 1 then TotalAlleleLengthA := TotalAlleleLengthA + (AlleleEnd[AlleleIndex] - (AlleleStart[AlleleIndex] + 1)); if TotalAlleleLengthA >= RecombSite then FirstAllele := AlleleIndex; end; {Pick a new name for the recombined part of the allele, if it is from species A} {(has a positive allele name). } if FirstAllele > 0 then begin AlleleIndex := 0; SecondAllele := 0; while (AlleleIndex < MaxAlleleName) and (SecondAllele < 1) do begin AlleleIndex := AlleleIndex + 1; if AllelePresence[AlleleIndex] = 0 then begin SecondAllele := AlleleIndex; if SecondAllele > AlleleMaxA then AlleleMaxA := SecondAllele; end; end; end; {Pick a new name for the recombined part of the allele, if it is from species B } {(has a negative allele name). } if FirstAllele < 0 then begin AlleleIndex := 0; SecondAllele := 0; while (AlleleIndex > (MaxAlleleName * (-1))) and (SecondAllele > -1) do begin AlleleIndex := AlleleIndex - 1; if AllelePresence[AlleleIndex] = 0 then begin SecondAllele := AlleleIndex; if SecondAllele < AlleleMinA then AlleleMinA := SecondAllele; end; end; end; RecombPosition := AlleleStart[FirstAllele] + (1 + round((AlleleEnd[FirstAllele] - AlleleStart[FirstAllele] - 1) * random)); NewFragOne := MaxFrag; for FragIndex := 1 to MaxFrag do begin if FragAllele[FragIndex] = FirstAllele then begin if FragStart[FragIndex] >= RecombPosition then begin FragAllele[FragIndex] := SecondAllele; end; if (FragStart[FragIndex] < RecombPosition) and (FragEnd[FragIndex] > RecombPosition) then begin NewFragOne := NewFragOne + 1; FragAllele[NewFragOne] := SecondAllele; FragStart[NewFragOne] := RecombPosition; FragEnd[NewFragOne] := FragEnd[FragIndex]; FragEnd[FragIndex] := RecombPosition; end; end; end; MaxFrag := NewFragOne; for SiteIndex := (RecombPosition + 1) to NumSites do begin if SiteSp2Allele[SiteIndex] = FirstAllele then SiteSp2Allele[SiteIndex] := SecondAllele; end; end; FragIndex := 1; while FragIndex < MaxFrag do begin if FragAllele[FragIndex] = 0 then begin MaxFrag := MaxFrag - 1; for FragIndex2 := FragIndex to MaxFrag do begin FragAllele[FragIndex2] := FragAllele[FragIndex2 + 1]; FragStart[FragIndex2] := FragStart[FragIndex2 + 1]; FragEnd[FragIndex2] := FragEnd[FragIndex2 + 1]; end; FragAllele[MaxFrag + 1] := 0; end; FragIndex := FragIndex + 1; end; {Next some stuff is initialized. } for AlleleIndex := AlleleMinA to AlleleMaxA do begin AllelePresence[AlleleIndex] := 0; AlleleStart[AlleleIndex] := 0; AlleleEnd[AlleleIndex] := 0; end; AlleleCountA := 0; AlleleCountB := 0; AlleleMinA := MaxAlleleName; AlleleMaxA := MinAlleleName; TotalFragLengthA := 0; {The fragments are checked and the allele information is recorded. } for FragIndex := 1 to MaxFrag do begin if ((FragAllele[FragIndex]) > 0.5) or ((FragAllele[FragIndex]) < -0.5) then begin FragName := FragAllele[FragIndex]; TotalFragLengthA := TotalFragLengthA + (FragEnd[FragIndex] - FragStart[FragIndex]); AlleleMaxA := greaterinteger(FragName, AlleleMaxA); AlleleMinA := lesserinteger(FragName, AlleleMinA); if AllelePresence[FragName] > 0.5 then begin AlleleStart[FragName] := LesserInteger(FragStart[FragIndex], AlleleStart[FragName]); AlleleEnd[FragName] := GreaterInteger(FragEnd[FragIndex], AlleleEnd[FragName]); end; if AllelePresence[FragName] < 0.5 then begin AllelePresence[FragName] := 1; AlleleStart[FragName] := FragStart[FragIndex]; AlleleEnd[FragName] := FragEnd[FragIndex]; AlleleCountA := AlleleCountA + 1; end; end; end; {The cumulative size of the alleles is measured, for use in choosing which allele to recombine. } TotalAlleleLengthA := 0; for AlleleIndex := AlleleMinA to AlleleMaxA do begin if AllelePresence[AlleleIndex] > 0.5 then TotalAlleleLengthA := TotalAlleleLengthA + (AlleleEnd[AlleleIndex] - (AlleleStart[AlleleIndex] + 1)); end; end; {The coalescence process is now done.} {Having finished with the coalescence process, the times associated with the remaining fragments} {need to be added to the site arrays.} for FragIndex := 1 to MaxFrag do begin if FragAllele[FragIndex] <> 0 then begin for SiteIndex := (FragStart[FragIndex] + 1) to FragEnd[FragIndex] do begin SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex] + SiteCoalesceTime[SiteIndex]; if SiteFixedTime[SiteIndex] > SiteCoalesceTime[SiteIndex] then SiteFixedTime[SiteIndex] := SiteFixedTime[SiteIndex] + (SiteFixedTime[SiteIndex] - SiteCoalesceTime[SiteIndex]); end; end; end; {Make the times in the two site arrays into cumulative times, to aid in the distribution of } {polymorphic and fixed sites. } for SiteIndex := 1 to NumSites do begin SitePolyTime[SiteIndex] := SitePolyTime[SiteIndex - 1] + SitePolyTime[SiteIndex]; SiteFixedTime[SiteIndex] := SiteFixedTime[SiteIndex - 1] + SiteFixedTime[SiteIndex]; end; {Sprinkle polymorphisms, with the probability of a site getting a polymorphism equalling the} {proportion of the total polymorphism time that is at that site. } for SiteIndex := 1 to NumPoly do begin SiteRandom := random * SitePolyTime[NumSites]; for SiteIndex2 := 1 to NumSites do begin if (SiteRandom >= SitePolyTime[SiteIndex2 - 1]) and (SiteRandom < SitePolyTime[SiteIndex2]) then begin SitePosition[SiteIndex] := random + SiteIndex2; SitePoly[SiteIndex] := 1; end; end; end; {Sprinkle fixed differences, with the probability of a site getting a fixed difference equalling} {the proportion of the total fixed time that is at that site. } for SiteIndex := (NumPoly + 1) to NumVarSites do begin SiteRandom := random * SiteFixedTime[NumSites]; for SiteIndex2 := 1 to NumSites do begin if (SiteRandom >= SiteFixedTime[SiteIndex2 - 1]) and (SiteRandom < SiteFixedTime[SiteIndex2]) then begin SitePosition[SiteIndex] := random + SiteIndex2; SitePoly[SiteIndex] := 0; end; end; end; {Sort the simulated site positions based on their random numbers.} for SiteIndex := 1 to (NumVarSites - 1) do begin SortTest := 2 * NumSites; for SortIndex := SiteIndex to NumVarSites do begin if (SitePosition[SortIndex] < SortTest) then begin SortTest := SitePosition[SortIndex]; SmallPosition := SortIndex; end; end; TempPoly := SitePoly[SmallPosition]; SitePosition[SmallPosition] := SitePosition[SiteIndex]; SitePosition[SiteIndex] := SortTest; SitePoly[SmallPosition] := SitePoly[SiteIndex]; SitePoly[SiteIndex] := TempPoly; end; {An array has now been created with polymorphic and fixed sites on it.} {The next step is to calculate the four statistics for this simulated data set.} {Calculate the mean and maximum sliding G for the simulated data. } MaxGSim := 0; maxAverageGSim := 0; for WindowSize := MinWindowSize to maxWindowSize do begin averageGSim := 0; for StartIndex := 1 to (1 + NumVarSites - WindowSize) do begin A := 0; for SiteIndex := StartIndex to (StartIndex + WindowSize - 1) do begin A := A + SitePoly[SiteIndex]; end; B := WindowSize - A; C := NumPoly - A; D := NumFixed - B; GSim := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) - flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D)); averageGSim := averageGSim + (GSim / (1 + NumVarSites - WindowSize)); if GSim > MaxGSim then MaxGSim := GSim; end; if AverageGSim > maxAverageGSim then maxAverageGSim := averageGSim; end; if MaxGSim > (MaxGReal * 0.999999) then {the .9999999 makes it count values that are } {greater than or equal to the observed} SigGCount[recombIndex] := SigGCount[recombIndex] + 1; if maxAverageGSim > (maxAverageGReal * 0.999999) then SigAverageGCount[recombIndex] := SigAverageGCount[recombIndex] + 1; for windowSize := minWindowSize to maxWindowSize do begin if MaxGSim >= maxGWindowArray[windowSize] then sigGCountArray[recombIndex, windowSize] := sigGCountArray[recombIndex, windowSize]+1; end; if maxGSim>100 then maxGSim:=100; maxGCountArray[recombIndex, round(10*maxGSim)]:=maxGCountArray[recombIndex, round(10*maxGSim)]+1; {Count the number of runs for the simulated data. } numRunsSim := 1; for SiteIndex := 1 to (NumVarSites - 1) do begin if (SitePoly[SiteIndex] <> SitePoly[SiteIndex + 1]) then numRunsSim := numRunsSim + 1; end; if numRunsSim <= numRunsReal then SigRunsCount[recombIndex] := SigRunsCount[recombIndex] + 1; {Compute the Kolmorgov-Smirnov statistic for the simulated data. } KSSim := 0; cumPoly := 0; for SiteIndex := 1 to NumVarSites do begin cumPoly := cumPoly + sitePoly[siteIndex]; KS := (abs(cumPoly - (SiteIndex * (numPoly / NumVarSites)))) / NumVarSites; if KS > KSSim then KSSim := KS; end; if KSSim > (KSReal * 0.999999) then SigKSCount[recombIndex] := SigKSCount[recombIndex] + 1; until (RepIndex = NumReps); RecombIndex := RecombIndex + 1; end; until (RecombIndex > NumRecomb); {Write some stuff to the console.} writeln; writeln; writeln('Number of sequences from ', NameSpeciesA, ': ', NumSequences : 4); writeln('Outgroup species: ', NameSpeciesB); writeln('Length of ', GeneName, ': ', NumSites : 5); writeln('Number of polymorphisms: ', NumPoly : 4); writeln('Number of fixed differences: ', NumFixed : 4); writeln('Number of replicate simulations: ', NumReps : 5); writeln; writeln('Recomb. max G >= runs <= K-S >= avg. G >='); if minWindowSize < maxWindowSize then writeln('parameter ', MaxGReal : 8 : 4, numRunsReal : 8,' ', KSReal : 13 : 6, maxAverageGReal : 11 : 4) else writeln('parameter ', ' ------ ', numRunsReal : 8, KSReal : 13 : 6, ' ------- '); for recombIndex:=1 to numRecomb do begin if minWindowSize < maxWindowSize then {G tests could be done} begin writeln(myOutput, RecombArray[recombIndex] : 6 : 2, (SigGCount[recombIndex] / RepIndex) : 12 : 4, (sigRunsCount[recombIndex] / RepIndex) : 12 : 4, (sigKSCount[recombIndex] / RepIndex) : 12 : 4, (sigAverageGCount[recombIndex] / RepIndex) : 12 : 4); writeln(RecombArray[recombIndex] : 6 : 2, (SigGCount[recombIndex] / RepIndex) : 12 : 4, (sigRunsCount[recombIndex] / RepIndex) : 12 : 4, (sigKSCount[recombIndex] / RepIndex) : 12 : 4, (sigAverageGCount[recombIndex] / RepIndex) : 12 : 4); end else {G tests couldn't be done} begin writeln(myOutput, RecombArray[recombIndex] : 6 : 2, ' ------ ', (sigRunsCount[recombIndex] / RepIndex) : 12 : 4, (sigKSCount[recombIndex] / RepIndex) : 12 : 4, ' ------ '); writeln(RecombArray[recombIndex] : 6 : 2, ' ------ ', (sigRunsCount[recombIndex] / RepIndex) : 12 : 4, (sigKSCount[recombIndex] / RepIndex) : 12 : 4, ' ------ '); end; end; writeln; writeln(myOutput); writeln('Recomb. Maximum G critical values'); write('parameter P<0.05 P<0.01'); writeln(myOutput, 'Recomb. Maximum G critical values'); write(myOutput, 'parameter P<0.05 P<0.01'); if minWindowSize0.95) then begin write(' ', (GIndex/10):6:1); write(myOutput, ' ', (GIndex/10):6:1); if GIndex>maxGCritRecomb then maxGCritRecomb:=GIndex; end; if (cumGCountOld/NumReps<=0.99) and (cumGCountNew/NumReps>0.99) then begin write(' ', (GIndex/10):6:1); write(myOutput, ' ', (GIndex/10):6:1); end; cumGCountOld:=cumGCountNew; end; if maxGCritRecomb>maxGCritOverall then begin maxGCritOverall:=maxGCritRecomb; maxGRecombIndex:=recombIndex; end; end; end; writeln; writeln(myOutput); writeln(myOutput); writeln(myOutput); maxGCritOverall:=maxGCritOverall/10; end; {====================================================================================================} {This procedure draws a crude console graph of the polymorphism/divergence ratio.} {It also saves the numbers necessary to draw a better graph using a graphing program.} procedure DoGraph; var A, B, C, D, siteIndex, startIndex, midPoint, horizIndex, vertIndex, yIndex, newV, upperCritV, lowerCritV, cumGCount: longint; PgraphArray: array[0..horizSize, 0..vertSize] of char; makeGraphTable: char; GGraph: extended; firstLine: boolean; begin upperCritV:=-1; lowerCritV:=-1; write('Would you like to save the numbers used in the graph? Enter "y" or "n": '); readln(makeGraphTable); if makeGraphTable='y' then begin writeln(myOutput); writeln(myOutput); writeln(myOutput, '===========Sliding window of ', graphWindowSize:3,' variable sites.========'); writeln(myOutput); if graphWindowSize>=minWindowSize then writeln(myOutput, ' Start Midpoint End prop. poly. G-value P-value'); if graphWindowSize=minWindowSize then begin {calculate the upper polymorphism value corresponding to the P<0.05 critical values} firstLine:=TRUE; for startIndex:=round(GraphWindowSize*NumPoly/NumVarSites) to GraphWindowSize do begin A:=startIndex; B := GraphWindowSize - A; C := NumPoly - A; D := NumFixed - B; GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) - flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D)); if GGraph>=maxGCritOverall then begin newV := round(vertSize*A/GraphWindowSize); if firstLine=TRUE then begin upperCritV:=newV; for horizIndex:=1 to (horizSize-1) do begin PgraphArray[horizIndex, newV]:='.'; end; firstLine:=FALSE; end; end; end; {calculate the lower polymorphism value corresponding to the P<0.05 critical values} firstLine:=TRUE; for startIndex:=round(GraphWindowSize*NumPoly/NumVarSites) downto 0 do begin A:=startIndex; B := GraphWindowSize - A; C := NumPoly - A; D := NumFixed - B; GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) - flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D)); if GGraph>=maxGCritOverall then begin newV := round(vertSize*A/GraphWindowSize); if firstLine=TRUE then begin lowerCritV:=newV; for horizIndex:=1 to (horizSize-1) do begin PgraphArray[horizIndex, newV]:='.'; end; firstLine:=FALSE; end; end; end; end; {calculate proportion of varying sites that are polymorphic for sliding windows} {for the console graph, put an 'X' in PgraphArray at the appropriate place.} for StartIndex := 1 to (1 + NumVarSites - GraphWindowSize) do begin A := 0; for SiteIndex := StartIndex to (StartIndex + GraphWindowSize - 1) do begin if (myRealPoly[SiteIndex] = 'P') then A := A + 1; end; B := GraphWindowSize - A; C := NumPoly - A; D := NumFixed - B; GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) - flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D)); midPoint := round(horizSize*(((mySiteNumber[StartIndex] + mySiteNumber[StartIndex + graphWindowSize - 1]) / 2.0)/mySiteNumber[NumVarSites])); newV := round(vertSize*A/GraphWindowSize); PgraphArray[midPoint, newV]:='X'; cumGCount:=0; for GIndex:=0 to round(GGraph*10) do {calculate the cum. prob. for this poly. value} begin cumGCount:=cumGCount+maxGCountArray[maxGRecombIndex, GIndex]; end; if (makeGraphTable='y') and (graphWindowSize>=minWindowSize) then writeln(myOutput, startIndex : 4, mySiteNumber[StartIndex] : 9, ((mySiteNumber[StartIndex] + mySiteNumber[StartIndex + GraphWindowSize - 1]) / 2.0) : 10 : 1, mySiteNumber[StartIndex + GraphWindowSize - 1] : 8, ((A * 1.0) / GraphWindowSize) : 9 : 3, GGraph:12:3, 1-cumGCount/numReps:10:6); if (makeGraphTable='y') and (graphWindowSize=minWindowSize) then begin writeln(myOutput); writeln(myOutput); writeln(myOutput,'All possible G-values and P-values for a window of ', graphWindowSize:3, ' variable sites.'); writeln(myOutput); writeln(myOutput, 'prop. poly. G-value P-value'); for A:=0 to GraphWindowSize do begin B := GraphWindowSize - A; C := NumPoly - A; D := NumFixed - B; GGraph := 2 * (flnf(A) + flnf(B) + flnf(C) + flnf(D) + flnf(A + B + C + D) - flnf(A + B) - flnf(C + D) - flnf(A + C) - flnf(B + D)); cumGCount:=0; for GIndex:=0 to round(GGraph*10) do {calculate the cum. prob. for this poly. value} begin cumGCount:=cumGCount+maxGCountArray[maxGRecombIndex, GIndex]; end; writeln(myOutput, A/(GraphWindowSize):7:3,' ', GGraph:10:3, ' ', 1-cumGCount/numReps:10:6); end; end; {draw the polymorphism graph} writeln; writeln; for yIndex:=0 to vertSize do begin vertIndex:=vertSize-yIndex; if vertIndex mod 4=0 then write(' ', (vertIndex/vertSize):4:2,' |'); if vertIndex=((vertSize div 2)+1) then write('poly |'); if (vertIndex mod 4>0) and (vertIndex<>((vertSize div 2)+1)) then write(' |'); for horizIndex:=1 to (horizSize-1) do write(PgraphArray[horizIndex, vertIndex]); if (vertIndex=upperCritV) or (vertIndex=lowerCritV) then writeln('| P<0.05'); if (vertIndex<>upperCritV) and (vertIndex<>lowerCritV) then writeln('|'); end; write(' '); for horizIndex:=-3 to horizSize-3 do begin if (horizIndex+3) mod 10=0 then write(round(mySiteNumber[1]+(mySiteNumber[NumVarSites]-mySiteNumber[1])* ((horizIndex+3)/horizSize)):6,' '); end; writeln; writeln(' Midpoint of sliding window'); writeln; end; {====================================================================================================} begin randomize; {randomize the seed for the random number function} writeln('================================================================='); writeln; writeln(' Welcome to DNA Slider.'); writeln; writeln('For help on formatting the input file and other documentation, '); writeln(' or a version of this program for a different operating system, '); writeln(' see http://udel.edu/~mcdonald/aboutdnaslider.html.'); writeln; write('Enter the name of your outgroup species: '); readln(NameSpeciesB); writeln; write('Enter the name of your ingroup species: '); readln(NameSpeciesA); writeln; rangeBad:=TRUE; while rangeBad do begin write('Number of sequences in the ingroup species: '); readln(NumSequences); if (NumSequences>1) and (NumSequences<=maxNumSequences) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 2 to ', maxNumSequences:4); end; rangeBad:=TRUE; while rangeBad do begin write('Length of the sequence, in nucleotides: '); readln(NumSites); if (NumSites>1) and (NumSites<=maxNumSites) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 10 to ', maxNumSites:5); end; rangeBad:=TRUE; while rangeBad do begin write('Number of recombination parameters to try (1 to ', maxNumRecomb:3,'): '); readln(NumRecomb); if (NumRecomb>0) and (NumRecomb<=maxNumRecomb) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 1 to ', maxNumRecomb:3); end; for recombIndex:=1 to NumRecomb do begin rangeBad:=TRUE; while rangeBad do begin write('Recombination parameter ', recombIndex:3, ': '); readln(RecombArray[recombIndex]); if (RecombArray[recombIndex]>-0.00001) and (RecombArray[recombIndex]<=maxRecomb) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 0 to ', maxRecomb:5); end; end; rangeBad:=TRUE; while rangeBad do begin write('Number of replicate simulations to run: '); readln(NumReps); if (NumReps>0) and (NumReps<=maxNumReps) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 1 to ', maxNumReps:7); end; writeln; rangeBad:=TRUE; while rangeBad do begin writeln('The input file must be a plain text file (with the extension ".txt")'); writeln(' in the same folder as this program.'); writeln; write('Enter the name of the input file: '); readln(InputFileString); if InputFileString[length(InputFileString)-3]<>'.' then InputFileString:=InputFileString+'.txt'; {The next few lines get the full path to the program, remove the name of the program, then add the name of the input file. The result is that "inputFileString" is converted from "input.txt" to "/Users/username/sliderfolder/input.txt" (on a Mac) or "C:\sliderfolder\input.txt" (on Windows).} inputPath:=paramStr(0); lastSlashPos:=0; for stringIndex:=1 to length(inputPath) do begin if (inputPath[stringIndex]='/') or (inputPath[stringIndex]='\') then lastSlashPos:=stringIndex; end; delete(inputPath, lastSlashPos+1, length(inputPath)-lastSlashPos); inputFileFullString:=inputPath+inputFileString; assign(myInput, inputfileFullstring); {$I-} reset(myInput); {$I+} if IOresult=0 then {IOresult=0 means the file to be reset exists} rangeBad:=FALSE; if rangeBad=TRUE then begin writeln; writeln('I can''t find the file ', inputFileString, ', try again!'); end; end; rangeBad:=TRUE; while rangeBad do begin writeln; write('Enter the name of the output file: '); readln(OutputFileString); if OutputFileString[length(OutputFileString)-3]<>'.' then OutputFileString:=OutputFileString+'.txt'; OutputFileFullString:=inputPath+OutputFileString; assign(myOutput, OutputFileFullString); {$I-} reset(myOutput); {$I+} rangeBad:=FALSE; if IOresult=0 then {IOresult=0 means the file to be reset exists} begin writeln('File ', OutputFileString, ' already exists. Do you want to overwrite it?'); write('Enter "y" or "n": '); readln(overwriteYes); if overwriteYes='Y' then overwriteYes:='y'; if overwriteYes<>'y' then rangeBad:=TRUE; end; end; rewrite(myOutput); DoAnalyzeInput; DoRun; writeln; writeln('Now what would you like to do?'); write('Enter "a" to analyze the data again, "g" to draw a graph, "q" to quit: '); readln(ActionChar); while ActionChar<>'q' do begin if ActionChar='a' then begin writeln; rangeBad:=TRUE; while rangeBad do begin write('Number of recombination parameters to try (1 to ', maxNumRecomb:3,'): '); readln(NumRecomb); if (NumRecomb>0) and (NumRecomb<=maxNumRecomb) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 1 to ', maxNumRecomb:3); end; for recombIndex:=1 to NumRecomb do begin rangeBad:=TRUE; while rangeBad do begin write('Recombination parameter ', recombIndex:3, ': '); readln(RecombArray[recombIndex]); if (RecombArray[recombIndex]>-0.000001) and (RecombArray[recombIndex]<=maxRecomb) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 0 to ', maxRecomb:5); end; end; rangeBad:=TRUE; while rangeBad do begin write('Number of replicate simulations to run: '); readln(NumReps); if (NumReps>0) and (NumReps<=maxNumReps) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 1 to ', maxNumReps:7); end; DoRun; end; if ActionChar='g' then begin writeln; rangeBad:=TRUE; while rangeBad do begin write('What window size (in number of variable sites) would you like? '); readln(graphWindowSize); if (graphWindowSize>0) and (graphWindowSize<=(NumVarSites)) then rangeBad:=FALSE; if rangeBad then writeln('Enter a number from 1 to ', (NumVarSites):6); end; DoGraph; end; writeln; writeln('Now what would you like to do?'); write('Enter "a" to analyze the data again, "g" to draw a graph, "q" to quit: '); readln(ActionChar); end; close(myInput); close(myOutput); end.