In this assignment you will be examining how different population parameters affect coalescence, as discussed in chapter 9 of the textbook.

For each question you will load and run code to produce plots and then answer some follow-up questions.
Make sure to read all the way to the end of each question to answer the follow-up questions.

1. Size in static populations

First you will need to install the R package ape (Analysis of Phylogenetics and Evolution). Either use the install button in the GUI or run the following:

install.packages("ape")

Now run the code below to get the necessary functions into your environment.

plot.tree = function (phy,mut.rate=0,x.offset=0,muts=T)
{
N <- length(phy$tip.label)
x.pos = c(1:N,rep(NA,N-2))+x.offset
y.pos = rep(0,2*N-1)

for(i in seq(2*N-1,N+1,-1)){
parent = which(phy$edge[,1]==i)
point=phy$edge[parent,2]
x.pos[i]=mean(x.pos[point])

parent = parent[1]
child=phy$edge[parent,2]
height=phy$edge.length[parent]
y.pos[i]=y.pos[i]+height

while(child>N){
parent = which(phy$edge[,1]==child)[1]
child=phy$edge[parent,2]
height=phy$edge.length[parent]
y.pos[i]=y.pos[i]+height
}
}

nodes=cbind(x.pos,y.pos)
max.depth=max(nodes[,2])*1.2
mut.rate=15/max.depth

top=phy$edge[1,1]
lines(rep(nodes[top,1],2),c(10^10,nodes[top,2]))
sto.muts=0
for(i in 1:(2*N-2)){
top=phy$edge[i,1]
bot=phy$edge[i,2]
lines(c(nodes[top,1],nodes[bot,1],nodes[bot,1]),
c(nodes[top,2],nodes[top,2],nodes[bot,2]))
if(muts==T){
n.mut=rpois(1,phy$edge.length[i]*mut.rate)
sto.muts[i]=n.mut
if(n.mut>0){
y.top=nodes[top,2]
y.bot=nodes[bot,2]
nudge=(y.top-y.bot)/7
y=runif(n.mut,y.bot+nudge,y.top-nudge)
points(rep(nodes[bot,1],n.mut),y,pch=20)
}

}
}
if(muts==F) return(NULL)

ancestors=mrca(phy)[1:36,1:36]
freqs=rep(0,60)
for(i in 1:35){
for(j in (i+1):36){
common=ancestors[i,j]

diffs=0

diffs = diffs + sto.muts[which(phy$edge[,2]==i)]
parent.L = phy$edge[which(phy$edge[,2]==i),1]
while(parent.L!=common) {
diffs = diffs + sto.muts[which(phy$edge[,2]==parent.L)]
parent.L = phy$edge[which(phy$edge[,2]==parent.L),1]
}

diffs = diffs + sto.muts[which(phy$edge[,2]==j)]

parent.R = phy$edge[which(phy$edge[,2]==j),1]
while(parent.R!=common) {
diffs = diffs + sto.muts[which(phy$edge[,2]==parent.R)]
parent.R = phy$edge[which(phy$edge[,2]==parent.R),1]
}
freqs[diffs+1]=freqs[diffs+1]+1
}
}
if(.Platform$OS.type=='windows') quartz=function() windows()
quartz()
dev.next()
tot=sum(freqs)
running=cumsum(freqs)
top=min(which(running==tot))
y=freqs[1:top]/sum(freqs)*100
my.hist(y)
}

my.rcoal2 = function (n=20, curr.popsize=1000,old.popsize=1,when=10)
{
tip.label = NULL
nbr <- 2 * n - 2
edge <- matrix(NA, nbr, 2)
x=0
for (i in seq(n,2,-1)){
x[(n+1)-i]=-2*curr.popsize*log(1-runif(1))/(i*(i-1))
}

rescale=which(cumsum(x)>when)
x[rescale]=x[rescale]*old.popsize/curr.popsize


edge.length <- numeric(nbr)
h <- numeric(2 * n - 1)
node.height <- cumsum(x)
pool <- 1:n
nextnode <- 2L * n - 1L
for (i in 1:(n - 1)) {
y <- sample(pool, size = 2)
ind <- (i - 1) * 2 + 1:2
edge[ind, 2] <- y
edge[ind, 1] <- nextnode
edge.length[ind] <- node.height[i] - h[y]
h[nextnode] <- node.height[i]
pool <- c(pool[!pool %in% y], nextnode)
nextnode <- nextnode - 1L
}

phy <- list(edge = edge, edge.length = edge.length)
if (is.null(tip.label))
tip.label <- paste("t", 1:n, sep = "")
phy$tip.label <- sample(tip.label)
phy$Nnode <- n - 1L
class(phy) <- "phylo"
phy <- reorder(phy)
phy$edge[phy$edge[, 2] <= n, 2] <- 1:n
phy
}

get.tree.depth = function (tree)
{
depth=0
N=NROW(tree$edge)
for(i in 1:N){
ichild=tree$edge[i,2]
depth=depth+tree$edge.length[i]
if(ichild<=20) return(depth)
}
return(depth)
}

Fig9.4and5=function (popsize=100)
{
library(ape)
tree=my.rcoal2(n=20,popsize,popsize,0)
depth = get.tree.depth(tree)
plot(c(0,20,0,20),c(depth,0,depth,0),type='n',axes=F,xlab='',ylab='')
plot.tree(tree,muts=F)
axis(2)
}

Fig9.6 = function (popsize1=100,popsize2=1000)
{
library(ape)
tree1=my.rcoal2(n=20,popsize1,popsize1,0)
depth1 = get.tree.depth(tree1)
tree2=my.rcoal2(n=20,popsize2,popsize2,0)
depth2 = get.tree.depth(tree2)
max.depth=max(c(depth1,depth2))
plot(c(0,46,0,46),c(max.depth,0,max.depth,0),type='n',axes=F,xlab='',ylab='')
plot.tree(tree1,muts=F)
plot.tree(tree2,x.offset=25,muts=F)
axis(2)
}

You should now see Fig9.4and5, Fig9.6, get.tree.depth, my.rcoal2, and plot.tree listed under functions in your global environment.

Part A

Run Fig9.6 twice and copy the resulting plots to your document.

Part B

If you didn’t know which population was larger (right or left side plot), how could you figure this out from the plots that you just made?

Part C

If you changed the size of the second population to 10000 instead of 1000, what would you expect to happen to the plot? hint: if you are uncertain, run Fig9.6(100,10000)

2. Mismatch distributions

Load the following scripts for Fig9.7.

my.hist = function (y)
{
n.rects=length(y)
plot(c(-.5,n.rects),c(0,max(y)),type='n',axes=F,xlab='No. of Differences',ylab='Percent')
for(i in 1:n.rects){
rect(i-1.5,0,i-.5,y[i],col='gray')
}
axis(1,at=0:100,pos=0)
axis(2)
}

Fig9.7 = function ()
{
library(ape)
tree=my.rcoal2(n=36,100,100,0)
depth=get.tree.depth(tree)
plot(c(0,37,0,37),c(depth,0,depth,0),type='n',axes=F,xlab='',ylab='')
plot.tree(tree,muts=T)
}

This script simulates coalescence under a constant population size, so it should give you a ragged mismatch distribution (though by chance, this may not always happen).

Part A

Run the Fig9.7 script until you get a nice ragged mismatch distribution, and copy the tree and the mismatch distribution to your document.  Be sure that you copy both the tree and the mismatch distribution to your homework.

Warning: if you are using RStudio, the mismatch distribution will appear in a separate window and the tree will be in the lower right panel.  If you run multiple times, be sure to close the tree window before running again by clicking on the red "x" above the plot in the lower right pane (otherwise you will get your plots confused). If you are using R, both plots will appear in a separate window with the mismatch distribution blocking the tree. Just drag the histogram window out of the way.  If you are running multiple times, close both the mismatch distribution and the tree plot so that you don't get your plots confused.


Part B

What is being compared to produce the mismatch distribution?

Part C

What aspect of the tree causes the mismatch distribution to appear “ragged”?

3. Population expansion

Load the script for Fig9.12.

Fig9.12 = function (old.popsize=2,when=10)
{
library(ape)
tree<-my.rcoal2(n=36,1000,old.popsize,when)
depth=get.tree.depth(tree)
plot(c(0,37,0,37),c(depth,0,depth,0),type='n',axes=F,xlab='',ylab='',
main=paste('Old size = ',old.popsize, 'Expansion at ',when))
mut.rate=(old.popsize-2)*(0.01-.25)/998+.25
plot.tree(tree)
}

This script is more general than Fig9.7 in that it can do simulations of constant population size but it can also simulate population expansion.

Part A

Run the script for a static (non-increasing) population: Fig9.12(1000,0) and for an expanding population: Fig9.12() and copy the trees and mismatch distributions to your document.. Your run with the defaults (i.e., Fig9.12()) is doing an expanding population.

The first number is the founding population size, while the second number is the timing for the population expansion. So 1000,0 simulates a population that has always been at a size of 1,000 (that’s the size for the current population), while 2,10 (the default) simulates a population that started with only two individuals 10 “mutational time units” ago and then underwent expansion.

Note: as with the previous simulations, they are stochastic and so you may not get what we are looking for on the first or even the second run.  If so, run again, but be sure to close the graphic windows before rerunning so that you do not get your  graphs mixed up.

Part B

Explain why the mismatch distribution for the expanding population no longer appears ragged.

Part C

Explain how you could use the shape of the mismatch distribution to figure out if a population has experienced an expansion event in the past.