American Journal of Physical Anthropology

This page gives the "R" code for analyses reported  in Konigsberg, Frankenberg, and Liversidge (2016).

If you do not already have "R" you can get it from:

http://cran.r-project.org/

You will also need to download/install some libraries. 

First things first, anything shown in fixed-width green text is intended to be copied and pasted into your R "Gui" (graphical user interface).


 
Figure 1

Fig. 1. Plot of percentages of 2,232 boys in yearly age classes who have the mandibular canine erupted. Percentages are shown in the “normal probability” scale. The fitted line is from a weighted regression, while the dashed lines represent the estimated median eruption age and plus and minus one standard deviation. Data are from Klein et al. (1937). The dashed/dotted lines represent the reported statistics from Klein et al. based on fitting a regression line by eye.

Here's the code for Figure 1 (note: you will need to download the two libraries listed at the beginning of the code):

Fig.01 = function ()
{
library(shape)
library(pBrackets)

weight=function (r=0,n=50)
{
    p = r/n
    if(p==0) p = 1/(2*n)
    if(p==1) p = 1 - 1/(2*n)
    return((dnorm(qnorm(p))^2)/(p*(1-p)))
}

p.vals = c(.01,.05,.1,.2,.5,1,2,5,seq(10,90,10),95,98,99,99.8,99.9,99.99)/100
age = c(6.75,seq(7.5,15.5,1))

n = c(171,197,231,253,270,262,299,267,199,83)
n.1 = round(n*c(0.4,0.5,1.7,12.1,45,76.5,91,98.5,98.7,100)/100,0)
percent=qnorm(n.1/n)
print(100*(1-1/(2*n[1])))

percent[10] = qnorm(1-1/(2*n[1]))

plot(c(6,16),qnorm(c(0.0001,0.9999)),axes=F,xlab='Age',
   ylab=expression('Percent of individuals with erupted C'[x]),type='n')
points(age[1:9],percent[1:9],pch=19)
points(age[10],percent[10],pch=17)
box()
par(cex=0.75)
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
par(cex=1)
axis(1)
axis(4,las=1,at=(-4):4)


w = 1
for(i in 1:10){
  w[i] = weight(n.1[i],n[i])
}

sto = as.vector(coef(lm(percent~age,weights=w*n)))
abline(sto)
print(round(sto,3))
text(6,qnorm(0.9998),expression(paste('z = ', -8.107 + 0.753%*%'Age')),adj=0)
rect(0,qnorm(99.92/100),9.75,5)

med.age = -sto[1]/sto[2]
lines(c(0,med.age),rep(0,2),lty=2)
lo.age = (-1-sto[1])/sto[2]
lines(c(0,lo.age),rep(-1,2),lty=2)
hi.age = (1-sto[1])/sto[2]
lines(c(0,hi.age),rep(1,2),lty=2)

Arrows(c(lo.age,med.age,hi.age),
       c(-1,0,1),
       c(lo.age,med.age,hi.age),
       rep(qnorm(0.1/100),3),
       arr.adj=0,lty=2,arr.type='triangle',arr.width=.25,arr.length=0.25)

K38.mu = 10.7
K38.sd = 1.15
lines(rep(K38.mu,2),c(-6,qnorm(0.05/100)),lty=4,lwd=2)
lines(rep(K38.mu-K38.sd,2),c(-6,qnorm(0.05/100)),lty=4,lwd=2)
lines(rep(K38.mu+K38.sd,2),c(-6,qnorm(0.05/100)),lty=4,lwd=2)

text(6,1,expression(paste(84^th,' percentile')),adj=c(0,0))
text(6,0,expression(paste(50^th,' percentile')),adj=c(0,0))
text(6,-1,expression(paste(16^th,' percentile')),adj=c(0,0))

text((lo.age+med.age)/2,qnorm(0.03),expression(-sigma))
text((hi.age+med.age)/2,qnorm(0.03),expression(+sigma))
brackets(lo.age,qnorm(0.01),hi.age,qnorm(0.01))

}

Fig.01()


Figure 2

Fig. 2. A redrawing of Fig. 1 using a logarithmic scale for age and adding information from Moorrees et al. (1963) on the timing of completion of the canine crown. The open points are the digitized values for the mean and plus and minus one and two standard deviations. These values are from Shackelford et al. (2012). The dashed lines are what were used to infer the probit regression line. See Shackelford et al. (2012) for further details on how the probit regression line was obtained.

Fig.02 = function ()
{

weight=function (r=0,n=50)
{
    p = r/n
    if(p==0) p = 1/(2*n)
    if(p==1) p = 1 - 1/(2*n)
    return((dnorm(qnorm(p))^2)/(p*(1-p)))
}

p.vals = c(.01,.05,.1,.2,.5,1,2,5,seq(10,90,10),95,98,99,99.8,99.9,99.99)/100
age = c(6.75,seq(7.5,15.5,1))

n = c(171,197,231,253,270,262,299,267,199,83)
n.1 = round(n*c(0.4,0.5,1.7,12.1,45,76.5,91,98.5,98.7,100)/100,0)
percent=qnorm(n.1/n)
percent[10] = qnorm(1-1/(2*n[1]))

plot(log(c(3,16)),qnorm(c(0.0001,0.9999)),axes=F,xlab='Age',ylab='Percent',type='n')
points(log(age),percent,pch=19)
box()
par(cex=0.75)
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
par(cex=1)
axis(1,at=log(3:16),labels=3:16)

n = c(171,197,231,253,270,262,299,267,199,83)
n.1 = round(n*c(0.4,0.5,1.7,12.1,45,76.5,91,98.5,98.7,100)/100,0)
n.0 = n - n.1

w = 1
for(i in 1:10){
  w[i] = weight(n.1[i],n[i])
}

sto = as.vector(coef(lm(percent~log(age),weights=w*n)))
abline(sto)

MFH.sd = 0.042*log(10)
MFH.mu = 1.562387

lo.age2 = log(exp(MFH.mu-2*MFH.sd)-0.75)
lo.age = log(exp(MFH.mu-MFH.sd)-0.75)
hi.age = log(exp(MFH.mu+MFH.sd)-0.75)
hi.age2 = log(exp(MFH.mu+2*MFH.sd)-0.75)
b = 4/(hi.age2-lo.age2)
a = -log(exp(MFH.mu)-0.75)*b
med.age = -a/b
print(round(exp(c(lo.age2,lo.age,med.age,hi.age,hi.age2)),2))
abline(a,b)

lines(c(0,lo.age2),rep(-2,2),lty=2)
lines(c(0,lo.age),rep(-1,2),lty=2)
lines(c(0,med.age),rep(0,2),lty=2)
lines(c(0,hi.age),rep(1,2),lty=2)
lines(c(0,hi.age2),rep(2,2),lty=2)

lines(rep(lo.age2,2),c(-3.5,-2),lty=2)
lines(rep(lo.age,2),c(-3.5,-1),lty=2)
lines(rep(med.age,2),c(-3.5,0),lty=2)
lines(rep(hi.age,2),c(-3.5,1),lty=2)
lines(rep(hi.age2,2),c(-3.5,2),lty=2)

lines(rep(lo.age2,2),c(-6,-3.5))
lines(rep(lo.age,2),c(-6,-3.5))
lines(rep(med.age,2),c(-6,-3.5))
lines(rep(hi.age,2),c(-6,-3.5))
lines(rep(hi.age2,2),c(-6,-3.5))

MFH = log(c(3.18,3.58,4.03,4.5,5.03))
par(cex=1.25)
points(MFH,rep(qnorm(0.0001),5))
par(cex=1)
legend(2.3,-2.5,pch=19,legend='Klein et al. (1937)',bty='n')
legend(1.07,3.18,pch=1,legend='Moorrees et al. (1963)',bty='n')
}

Fig.02()


Figure 3

Fig 3. Empirical cumulative density plots (with percentages shown on a normal probability scale) for M1 and M3 exact ages of eruption in 34 rhesus macaques. Data are from Hurme and Van Wagenen (1961). P-values under the null of normal distributions for both teeth’s eruption ages are shown using the Jarque-Bera test (Jarque and Bera 1987).

The next Figure requires a small data set on rhesus macaque dental eruption.   Click on the blue "rhesus.csv" below and save to your working directory.  Then paste the green text.

rhesus.csv

rhesus = read.csv('rhesus.csv')

Fig.03 = function ()
{
library(normtest)
set.seed(123456)
dat = rhesus[,c(2,16)]
dat = dat[complete.cases(dat),]
p.vals = c(.01,.05,.1,.2,.5,1,2,5,seq(10,90,10),95,98,99,99.8,99.9,99.99)/100
x = sort(dat[,1])
p = round(ajb.norm.test(x,n=10000)$p,4)
y = qnorm(c((1:33)/34,1-1/(2*34)))

plot(c(1,8),qnorm(c(0.01,0.99)),axes=F,xlab='Age',
   ylab=expression('Percent of individuals with erupted tooth'),type='n')
points(x,y)
text(1.608,-1.43,paste('J-B test, p = ',p), adj=0)

sdev=sd(x)
mux=mean(x)
lo = qnorm(pnorm(1,mux,sdev))
hi = qnorm(pnorm(2,mux,sdev))
lines(c(1,2),c(lo,hi),lty=2,lwd=2)


x2 = sort(dat[,2])
p = round(ajb.norm.test(x2,n=10000)$p,4)
points(x2,y,pch=19)

sdev=sd(x2)
mux=mean(x2)
lo = qnorm(pnorm(4,mux,sdev))
hi = qnorm(pnorm(9,mux,sdev))
lines(c(4,9),c(lo,hi),lty=2,lwd=2)


box()
# par(cex=0.75)
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
par(cex=1)
axis(1)
par(cex=1.25)
legend(3.4,2.1,c(expression(M[1]),expression(M[3])),lty=rep(2,2),pch=c(1,19),bty='n')
par(cex=1)
text(6,-.567,paste('J-B test, p = ',format(p,nsmall=4)), adj=0)
}

Fig.03()


Figure 4

Fig. 4 Graphic representations of distributions (sold lines) that depart from the normal distribution (dashed lines). A. Pearson type-II distribution (symmetric B eta), B. Pearson type-VII distribution (student-t), C. and D. Pearson type-III distributions.

Fig.04 = function()
{
c0 = 1
f = function(t) {
# c1 = 0, c2 = -1
exp(log(1-t^2)/2)
}
f2 = function(t) {
# c1 = 0, c2 = 1
exp(-log(1+t^2)/2)
}
f3 = function(t) {
# c1 = -1, c2 = 0
exp(-t)
}
f4 = function(t) {
# c1 = 1, c2 = 0
exp(t)
}

par(mfrow=c(2,2))
x=seq(-1,1,.01)

denom = integrate(f,lower=-1,upper=1)$val
plot(x,f(x)/denom,type='l',xlim=c(-6,6),xlab='Deviates',ylab='Density',
  main=expression(paste(c[1],' = 0, ',c[2],' = -1')),lwd=2)
x=seq(-6,6,.01)
lines(x,dnorm(x),lty=2)
text(5,.6,'A',cex=1.5)

denom = integrate(f2,lower=-6,upper=6)$val
plot(x,dnorm(x),type='l',xlim=c(-6,6),xlab='Deviates',ylab='Density',
  main=expression(paste(c[1],' = 0, ',c[2],' = 1')),lty=2)
lines(x,f2(x)/denom,lwd=2)
text(-5,.38,'B',cex=1.5)

denom = integrate(f3,lower=-6,upper=6)$val
plot(x,dnorm(x),type='l',xlim=c(-6,6),xlab='Deviates',ylab='Density',
  main=expression(paste(c[1],' = -1, ',c[2],' = 0')),lty=2)
lines(x,f3(x)/denom,lwd=2)


denom = integrate(f4,lower=-6,upper=6)$val
plot(x,dnorm(x),type='l',xlim=c(-6,6),xlab='Deviates',ylab='Density',
  main=expression(paste(c[1],' = 1, ',c[2],' = 0')),lty=2)
lines(x,f4(x)/denom,lwd=2)
text(-5,.38,'D',cex=1.5)

}

Fig.04()


Figure 5

Fig. 5. A redrawing of Fig. 3 where the M1 and M3 eruption ages have been centered on their respective means.

Fig.05 = function ()
{
par(mfrow=c(1,1))
dat = rhesus[,c(2,16)]
dat = dat[complete.cases(dat),]
p.vals = c(.01,.05,.1,.2,.5,1,2,5,seq(10,90,10),95,98,99,99.8,99.9,99.99)/100
x = sort(dat[,1])
x = x - mean(x)
y = qnorm(c((1:33)/34,1-1/(2*34)))

plot(c(-1.5,2),qnorm(c(0.01,0.99)),axes=F,xlab='Age centered on mean',
   ylab=expression('Percent of individuals with erupted tooth'),type='n')
lines(x,y)
points(x,y)
x2 = sort(dat[,2])
x2 = x2 - mean(x2)
lines(x2,y)
points(x2,y,pch=19)
box()
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
axis(1)

par(cex=1.25)
legend(-1.2,1.9,c(expression(M[1]),expression(M[3])),lty=rep(1,2),pch=c(1,19),bty='n')
par(cex=1)
}

Fig.05()



Figure 6

Fig. 6. A redrawing of Fig. 5 with the M1 and M3 eruption ages combined into one sorted list of 68 observations (after centering on their respective means).

Fig.06 = function ()
{
library(moments)
library(normtest)
set.seed(123456)
dat = rhesus[,c(2,16)]
dat = dat[complete.cases(dat),]
p.vals = c(.01,.05,.1,.2,.5,1,2,5,seq(10,90,10),95,98,99,99.8,99.9,99.99)/100
x = dat[,1]
x = x - mean(x)
x2 = dat[,2]
x2 = x2 - mean(x2)
x = sort(c(x,x2))
y = qnorm(c((1:67)/68,1-1/(2*68)))

plot(c(-1.5,2),qnorm(c(0.01,0.995)),axes=F,xlab='Age centered on mean',
   ylab=expression('Percent of individuals with erupted tooth'),type='n')
box()
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
points(x,y)
par(cex=1)
axis(1)
sdev=sd(x)
lo = qnorm(pnorm(-1.5,0,sdev))
hi = qnorm(pnorm(2,0,sdev))
lines(c(-1.5,2),c(lo,hi),lty=2,lwd=2)
p = ajb.norm.test(x,n=10000)$p
text(-1.1,2.15,paste('J-B test, p = ',format(p,nsmall=4)), adj=0,cex=1.25)

print(shapiro.test(x))
print(anscombe.test(x,alt='l'))
print(agostino.test(x[-(67:68)]))
}

Fig.06()


Figure 7

Fig. 7. A redrawing of Fig. 3 (here panel A), Fig. 5 (here panel B), and Fig. 6 (here panel C) with ages on the logarithmic scale.

Fig.07 = function ()
{
library(normtest)
par(mfrow=c(2,2),cex=0.75)

set.seed(123456)
dat = rhesus[,c(2,16)]
dat = dat[complete.cases(dat),]
p.vals = c(.01,.05,.1,.2,.5,1,2,5,seq(10,90,10),95,98,99,99.8,99.9,99.99)/100
x = sort(log(dat[,1]))
y = qnorm(c((1:33)/34,1-1/(2*34)))

plot(log(c(1,8)),qnorm(c(0.01,0.99)),axes=F,xlab='Age',
   ylab=expression('Percent of individuals with erupted tooth'),type='n')
mtext('  A',adj=0,cex=1.5)
points(x,y)


sdev=sd(x)
mux=mean(x)
lo = qnorm(pnorm(0.01,mux,sdev))
hi = qnorm(pnorm(0.5,mux,sdev))
lines(c(0.01,.5),c(lo,hi),lty=2,lwd=2)
p = round(ajb.norm.test(x,n=10000)$p,4)

x2 = sort(log(dat[,2]))
points(x2,y,pch=19)


sdev=sd(x2)
mux=mean(x2)
lo = qnorm(pnorm(1.5,mux,sdev))
hi = qnorm(pnorm(2.5,mux,sdev))
lines(c(1.5,2.5),c(lo,hi),lty=2,lwd=2)
p2 = round(ajb.norm.test(x2,n=10000)$p,4)


box()
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
axis(1,at=log(1:8),labels=1:8)
par(cex=1)
legend(0.9,2,c(expression(M[1]),expression(M[3])),lty=rep(2,2),pch=c(1,19),bty='n')
par(cex=0.75)
text(0.37,.15,paste('J-B test,\n p = ',p), adj=0)
text(0.86,-1.68,paste('J-B test,\n p = ',p2), adj=0)

############################  PANEL B  ##############################

plot(c(-0.2,0.3),qnorm(c(0.01,0.99)),axes=F,xlab='Age centered on mean (log-scale)',
   ylab=expression('Percent of individuals with erupted tooth'),type='n')
mtext('  B',adj=0,cex=1.5)
mux=mean(x)
x=x-mux
lines(x,y)
points(x,y)
x2 = log(sort(dat[,2]))
mux2=mean(x2)
x2=x2-mux2
lines(x2,y)
points(x2,y,pch=19)
box()
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
axis(1)
par(cex=1)
legend(-0.13,2,c(expression(M[1]),expression(M[3])),lty=rep(1,2),pch=c(1,19),bty='n')
par(cex=0.75)

############################  PANEL C  ##############################

x = sort(c(x,x2))
y = qnorm(c((1:67)/68,1-1/(2*68)))
plot(c(-0.2,0.3),qnorm(c(0.01,0.99)),axes=F,xlab='Age centered on mean (log-scale)',
   ylab=expression('Percent of individuals with erupted tooth'),type='n')
mtext('  C',adj=0,cex=1.5)
points(x,y)

box()
axis(2,at=qnorm(p.vals),labels=100*p.vals,las=1)
axis(1)
sdev=sd(x)
lo = qnorm(pnorm(-0.3,0,sdev))
hi = qnorm(pnorm(0.3,0,sdev))
lines(c(-0.3,0.3),c(lo,hi),lty=2,lwd=2)
p = ajb.norm.test(x,n=10000)$p
text(-0.16,1.1,paste('J-B test,\n p = ',p), adj=0,cex=1)
}

Fig.07()


Lagrange Multiplier Test

This test is at the heart of the paper, so you will definitely want the script.

LM.test=function (dat,collapse=1:10,log.it=T)
{
library(MASS)
age=dat[,1]
if(log.it==T) age=log(age)
stage=dat[,2]
#  Number of individuals
n=NROW(stage)
stage=collapse[stage]
#  Number of states
m=NROW(table(stage))
if(m==2) stage=stage-1
if(m>=3) {

results=polr(factor(stage)~age,method='probit')
beta=as.vector(results$coef)
alpha=as.vector(results$zeta)


E1sum=0
E2sum=0
BOX=matrix(rep(0,(m+2)*(m+2)),nc=(m+2))
for(i in 1:n){
 
  vec=rep(0,m+2)

  if(stage[i]==1){
     hj=alpha[1]-beta*age[i]
     norm=dnorm(hj)/pnorm(hj)
     E1=-(1-hj^2)*norm
     E2=-(3+hj^2)*hj*norm
     vec[1]=norm
     vec[m]=-vec[1]*age[i]
  }

  if(stage[i]==m){
    hjm1=alpha[m-1]-beta*age[i]
    norm=1/(1-pnorm(hjm1))
    E1=((1-hjm1^2)*dnorm(hjm1))*norm
    E2=((3+hjm1^2)*hjm1*dnorm(hjm1))*norm
    vec[m-1]=-dnorm(hjm1)*norm
    vec[m]=-vec[m-1]*age[i]
   
  }

  if(stage[i]>1 & stage[i]<m){
     hj=alpha[stage[i]]-beta*age[i]
     hjm1=alpha[stage[i]-1]-beta*age[i]
     norm=1/(pnorm(hj)-pnorm(hjm1))
     E1=((1-hjm1^2)*dnorm(hjm1)-(1-hj^2)*dnorm(hj))*norm
     E2=((3+hjm1^2)*hjm1*dnorm(hjm1)-(3+hj^2)*hj*dnorm(hj))*norm
     vec[stage[i]-1]=-dnorm(hjm1)*norm
     vec[stage[i]]=dnorm(hj)*norm
     vec[m]=(dnorm(hjm1)-dnorm(hj))*norm*age[i]
    }

    vec[m+1]=E1
    vec[m+2]=E2
    BOX=BOX+(vec%o%vec)

    E1sum=E1sum+E1
    E2sum=E2sum+E2
}
   J=diag(m+2)[,(m+1):(m+2)]
   E=c(E1sum,E2sum)
   x2=as.vector(t(E)%*%t(J)%*%solve(BOX)%*%J%*%E)
   return(pchisq(x2,2,lower.tail=F))
}

#  For binary

cons=as.vector(glm(stage~age,family=binomial(link="probit"))$coeff)
beta=cons[2]
cons=cons[1]
vec=c(0,0)
mat=matrix(rep(0,4*4),nc=4)

for(i in 1:n){
   h=cons+beta*age[i]
   sto=dnorm(h)
   wi=sto
   p=pnorm(h)
   wi=wi/(p*(1-p))
   if(is.finite(wi)){
   vec=vec+wi*(stage[i]-p)*h*c(h,3+h^2)
   Xi=c(age[i],h,h^2-1,h*(3+h^2))
   mat=mat+sto*wi*(Xi%o%Xi)
   }
}

J=diag(4)[,3:4]
x2= as.vector(vec%*%t(J)%*%solve(mat)%*%J%*%vec)
return(pchisq(x2,2,lower.tail=F))
}



Some Simulation Scripts

sim.two = function (niters=10000,nids=500)
{
library(MASS)
set.seed(1234567)

mu=c(3.4,4.8)
sdev=.8

prob=matrix(rep(0,2*niters),nc=2)

a3=0.03
b3=0.01
age=0

for(i in 1:nids){
  U=log(runif(1))
  age[i]=log(1-b3*U/a3)/b3+17
}

p1=0
p2=0
for(i in 1:nids){
  p1[i]=1-plnorm(age[i],mu[1],sdev)
}

for(iters in 1:niters){
 trait=rep(2,nids)
 for(i in 1:nids){
   u=runif(1)
  if(u<=p1[i]) trait[i]=1
}
   prob[iters,1]=LM.test(cbind(age,trait),collapse=c(1,2),log.it=T)
   prob[iters,2]=LM.test(cbind(age,trait),collapse=c(1,2),log.it=F)
   cat(c(iters,prob[iters,],'\n'))
   flush.console()
  }
 return(prob)
}


sim.two.outlier = function (niters=10000,nids=500)
{
library(MASS)
set.seed(1234567)

mu=c(3.4,4.8)
sdev=.8

prob=matrix(rep(0,2*niters),nc=2)

a3=0.03
b3=0.01
age=0

for(i in 1:nids){
  U=log(runif(1))
  age[i]=log(1-b3*U/a3)/b3+17
}

p1=0
p2=0
for(i in 1:nids){
  p1[i]=1-plnorm(age[i],mu[1],sdev)
}

for(iters in 1:niters){
trait=rep(2,nids)
for(i in 1:nids){
   u=runif(1)
  if(u<=p1[i]) trait[i]=1
}
   prob[iters,1]=LM.test(cbind(age,trait),collapse=c(1,2),log.it=T)
   trait[which.max(age)]=1

   prob[iters,2]=LM.test(cbind(age,trait),collapse=c(1,2),log.it=T)
   cat(c(iters,prob[iters,],'\n'))
   flush.console()
  }
 return(prob)
}

sim.three = function (niters=10000,nids=5000)
{
# Warning: "nids" needs to be pretty darn large (like =4,000) in order for
# the model to go all "asymptotic-y" and fit the null.  To fit the null,
# keep "log.it" as true.  For alternate, change to false so that data is
# simulated under log-normal transitions, but analyzed under normal transitions.

library(MASS)
set.seed(1234567)

mu=c(3.4,4.8)
sdev=.8

prob=matrix(rep(0,2*niters),nc=2)

a3=0.03
b3=0.01
age=0

for(i in 1:nids){
  U=log(runif(1))
  age[i]=log(1-b3*U/a3)/b3+17
}

p1=0
p2=0
for(i in 1:nids){
  p1[i]=1-plnorm(age[i],mu[1],sdev)
  p2[i]=1-plnorm(age[i],mu[2],sdev)
}


for(iters in 1:niters){


trait=rep(2,nids)

for(i in 1:nids){
   u=runif(1)
  if(u<=p1[i]) trait[i]=1
  else{if(u>=p2[i]) trait[i]=3}
}

   prob[iters,1]=LM.test(cbind(age,trait),collapse=c(1,2,3),log.it=T)
   prob[iters,2]=LM.test(cbind(age,trait),collapse=c(1,2,3),log.it=F)
   cat(c(iters,prob[iters,],'\n'))
   flush.console()
  }
 return(prob)
}

And now run simulations and store results.  Warning: this will take some time to run.

results.sim.two.500 = sim.two()
results.sim.two.out = sim.two.outlier()
results.sim.three.5000 = sim.three()


Figure 8

Fig. 8. Results from 10,000 simulations of a binary age “indicator” which is following a lognormal transition between the two states. The upper two panels show the goodness-of-fit pvalues against a log-normal transition model while the bottom two panels show the goodness-of-fit against a normal transition model. Panels to the left give histograms of the p-values while panels to the right give the empirical cumulative density functions. The line of identity is represented in the upper right panel by filled circles and in the lower right panel by a dashed line.

Fig.08 = function ()
{
par(mfrow=c(2,2))
hist(results.sim.two.500[,1],nc=35,prob=T,main='Log-normal transition (2 stages)',xlab='P-value')
plot(ecdf(results.sim.two.500[,1]),xlab='P-value',main='',ylab='Cumulative Density')
x=seq(0,1,.1)
points(x,x,pch=19)
hist(results.sim.two.500[,2],nc=35,prob=T,main='Normal transition (2 stages)',xlab='P-value')
plot(ecdf(results.sim.two.500[,2]),xlab='P-value',main='',ylab='Cumulative Density')
abline(0,1,lty=2)
}

Fig.08()


Figure 9

Fig. 9. As in Fig. 8 but for simulation of a three stage system with two log-normal transition distributions.

Fig.09 = function ()
{
par(mfrow=c(2,2))
hist(results.sim.three.5000[,1],nc=35,prob=T,main='Log-normal transition (3 stages)',xlab='P-value')
plot(ecdf(results.sim.three.5000[,1]),xlab='P-value',main='',ylab='Cumulative Density')
x=seq(0,1,.1)
points(x,x,pch=19)
hist(results.sim.three.5000[,2],nc=35,prob=T,main='Normal transition (3 stages)',xlab='P-value')
plot(ecdf(results.sim.three.5000[,2]),xlab='P-value',main='',ylab='Cumulative Density')
abline(0,1,lty=2)
}

Fig.09()



Figure 10

Fig. 10. Simulation results as shown in Fig.8, but with one “outlier” created in the bottom panes by giving the oldest individual out of the 500 individuals in each iteration an “indicator” score of “1”.

Fig.10 = function ()
{
par(mfrow=c(2,2))
hist( results.sim.two.out[,1],nc=35,prob=T,main='Log-normal transition (2 stages)',xlab='P-value')
plot(ecdf( results.sim.two.out[,1]),xlab='P-value',main='',ylab='Cumulative Density')
x=seq(0,1,.1)
points(x,x,pch=19)
hist( results.sim.two.out[,2],nc=35,prob=T,main='Log-normal transition (with one outlier)',xlab='P-value')
plot(ecdf( results.sim.two.out[,2]),xlab='P-value',main='',ylab='Cumulative Density')
abline(0,1,lty=2)
}

Fig.10()


Collapse

You will need this script to write the various partitions of, in this case, the integer 10.  But  first you make a little object using "n.part2" that does some housekeeping.
VERY, VERY IMPORTANT POINT: when you run "Collapse" and are asked "Down to how many stages?" answer with 2.  That will give all collapses down to where your score becomes a simple binary.

n.part2 = function (n = 30)
{
    index = function(i, j, n) return(i + (2 * n - j) * (j - 1)/2)
    count.p = rep(0, n * (n + 1)/2)
    for (i in 1:n) count.p[index(i, 1, n)] = 1
    for (i in 2:n) {
        for (j in i:n) {
            count.p[index(j, i, n)] = count.p[index(j - 1, i -
                1, n)]
            if ((j - i) > 0 & (j - i) >= i)

                count.p[index(j, i, n)] = count.p[index(j, i,
                  n)] + count.p[index(j - i, i, n)]
        }
    }
    return(count.p)
}

sto = n.part2()

Collapse = function (n=10)
{

count.part=function(n, m) round(exp(lgamma(n)-lgamma(m)-lgamma(n-m+1)),0)
index = function (i, j, n) (i + (2 * n - j) * (j - 1)/2)
first.hind = function (n, m) c(rep(1, m - 1), n - m + 1)
next.perm = function (a)
{
    n = length(a)
    L2 = function(a, n) {
        for (j in (n - 1):1) {
            if (a[j] < a[j + 1])
                return(j)
        }
        return(0)
    }
    L3 = function(a, n, j) {
        for (el in n:1) {
            if (a[j] < a[el])
                return(el)
        }
    }
    L4 = function(a, n, j) {
        k = j
        for (el in n:1) {
            k = k + 1

            if (k < el) {
                sto = a[el]
                a[el] = a[k]
                a[k] = sto
            }
            else (return(a))
        }
    }
    j = L2(a, n)
    if (j == 0)
        return(NULL)
    el = L3(a, n, j)
    sto = a[el]
    a[el] = a[j]
    a[j] = sto
    a = L4(a, n, j)
    return(a)
}

next.hind = function (n, m, current)
for (i in (m - 1):1) {
    if (current[m] - current[i] >= 2) {
        sto = current[i] + 1
        for (j in i:(m - 1)) current[j] = sto
        current[m] = n - sum(current[1:(m - 1)])
        return(current)
    }
}


# get max n used to calculate number of partitions (using n.part2)

max.n=round(sqrt(length(sto)*2),0)

if(n>max.n) return(print(noquote(' Need to rerun n.part2() with larger N')))
tot=0
print(noquote("  # of     # of  Total"))
print(noquote("  phases   ways  # ways"))
tot[1]=count.part(n,n-1)
print(noquote(formatC(c(n-1,tot[1],tot[1]),width=6)))

for(i in (n-2):2){
  n.p=count.part(n,i)
  tot[n-i]=tot[n-i-1]+n.p
  print(noquote(formatC(c(i,n.p,tot[n-i]),width=6)))
}

cat('Down to how many stages? \n')
downto=scan("",n=1)
n.elems=tot[n-downto]*n
parts=matrix(rep(0,n.elems),nc=n)
ip=0
for(m in (n-1):downto){
lg.mp1=lgamma(m+1)

n.parts=sto[index(n,m,max.n)]
sto.part=first.hind(n,m)
current=sto.part
ip=ip+1
parts[ip,]=rep(1:m,current)
n.perm=exp(lg.mp1-sum(lgamma(as.vector(table(current))+1)))
if(n.perm>1){
for(j in 2:n.perm){
  ip=ip+1
  current=next.perm(current)
  parts[ip,]=rep(1:m,current)
}
}
if(n.parts>1){
for(i in 2:n.parts){
  sto.part=next.hind(n,m,sto.part)
  current=sto.part
  ip=ip+1
  parts[ip,]=rep(1:m,current)
  n.perm=exp(lg.mp1-sum(lgamma(as.vector(table(current))+1)))
  if(n.perm>1){
  for(j in 2:n.perm){
  ip=ip+1
  current=next.perm(current)
  parts[ip,]=rep(1:m,current)
}
}
}

}
}
return(parts)
}


part10 = Collapse()


Getting Some Data - It's finally time for some real data


Click on the link below to get Todd pubic symphysis scores for 754 individuals from the Terry Anatomical Collection.  These data were collected under NSF grant BCS-9727386 to Lyle Konigsberg.  The complete data sets are available from  http://konig.la.utk.edu/paleod.htm

Terry.csv

If you have the *.csv extension associated with a spreadsheet program, then this will open in a spreadsheet.  If so, save the file (with the extension *.csv) in your R working directory.  And then start R and save the data to an object, say (copy and paste the line below to your "R" Gui):

Terry = read.csv('terry.csv')

And now get comparable data from females that were used in the Gilbert and McKern study.  Note that the original scoring from that study is, so far as we know, unavailable.  The scores in the file below were made by Konigsberg.

Gilbert.csv

And then:

Gilbert = read.csv('gilbert.csv')


Getting the Data into two objects (one for females and one for males)

We willl get the object for males first as it is simpler (because there are no males in the Gilbert and McKern data set) and then for females:

males = Terry[Terry$Sex=='M',3:4]
females = rbind(Terry[Terry$Sex=='M',3:4],Gilbert[,2:3])


Get p-values for normal and log-normal transitions

Now we're going to get a bunch of p-values for plotting in Figures.

run.collapse = function (file=rbind(males,females),log.it=T) 
{
options(warn=-1)
prob=vector()
for(i in 1:510){
prob[i]=LM.test(file,part10[i,],log.it=log.it)
cat(i,prob[i],'\r')
flush.console()
}
prob[511]=LM.test(file,1:10,log.it=log.it)

cat("\n\n")
return(prob)
}

probs.female.strt
= run.collapse(females,F)
probs.female.log = run.collapse(females,T)
probs.male.strt = run.collapse(males,F)
probs.male.log = run.collapse(males,T)


Figure 11

Fig. 11. Plots of the goodness-of-fit p-values for females and for males against log-normal and normal transition distributions where each point represents one of the 511 possible scorings of from ten Todd phases down to collapsed scorings of 9, 8, 7,…, and 2 stages. The diagonal line is the line of identity.

Fig.11 = function ()
{
par(mfrow=c(1,2))
plot(probs.female.strt,probs.female.log,xlim=c(0,1),ylim=c(0,1),main='Females',
ylab='p-value (Log normal)',xlab='p-value (Normal)')
abline(0,1)
plot(probs.male.strt,probs.male.log,xlim=c(0,1),ylim=c(0,1),main='Males',
ylab='p-value (Log normal)',xlab='p-value (Normal)')
abline(0,1)
}

Fig.11()


Figure 12

Fig. 12. Plot of the 511 p-values for females from the goodness-of-fit tests against log-normal transition distributions. The number of phases is randomly “jittered” and shown with counts in bold where there were too many values below 0.05 to represent them distinctly. The filled point for the one eight-stage system (Todd VIII-X collapsed into one phase) is at a p-value of 0.785.

par(mfrow=c(1,1))

Fig.12 = function ()
{
set.seed(12345)
p10=LM.test(females,log.it=T)
plot(10,p10,xlim=c(1,10),ylim=c(0,1),
ylab='Goodness-of-fit p-value',xlab='Number of Phases',axes=F,
main='Females')
box()
axis(2)
axis(1,at=2:10)
for(i in 1:8) lines(rep(1.5+i,2),c(0,1),lty=2)
to.plot=cbind(part10[-10,10],probs.female.log[-c(10,511)])
in.9 = to.plot[to.plot[,1]==9,2]
print(paste('Highest p-value from 9 & 10 stages = ',max(c(in.9,p10))))
print('T2 p-value')
print(probs.female.log[239])
points(seq(8.55,9.45,length.out=9),in.9)
in.2 = to.plot[to.plot[,1]==2,2]
points(jitter(rep(2,9)),in.2)
to.plot=to.plot[to.plot[,1]>2 & to.plot[,1]<9,]
in.it=to.plot[,2]<0.05
counts=as.vector(table(to.plot[in.it,1]))
to.plot=cbind(jitter(to.plot[!in.it,1]),to.plot[!in.it,2])
points(to.plot)
points(8,probs.female.log[10],pch=19)
text(8,probs.female.log[10]+.07,paste('p = \n',round(probs.female.log[10],4)))

for(i in 3:9) text(i,0.0,counts[i-2],font=2)
}

Fig.12()



Figure 13

Fig. 13. As in Fig. 12 but for males. The filled point for the one eight-stage system (Todd VIIIX collapsed into one phase) is at a p-value of 0.996.

Fig.13 = function ()
{
set.seed(12345)
p10=LM.test(males,log.it=T)
plot(10,p10,xlim=c(1,10),ylim=c(0,1),
ylab='Goodness-of-fit p-value',xlab='Number of Phases',axes=F,
main='Males')
box()
axis(2)
axis(1,at=2:10)
for(i in 1:8) lines(rep(1.5+i,2),c(0,1),lty=2)
print('T2 p-value')
print(probs.male.log[239])
to.plot=cbind(part10[-10,10],probs.male.log[-c(10,511)])
in.9 = sort(to.plot[to.plot[,1]==9,2])
points(seq(8.55,9.45,length.out=8),sample(in.9[-9]))
points(9,in.9[9])
in.2 = to.plot[to.plot[,1]==2,2]
points(rep(2,9),in.2)
to.plot=to.plot[to.plot[,1]>2 & to.plot[,1]<9,]
in.it=to.plot[,2]<0.05
counts=as.vector(table(to.plot[in.it,1]))
to.plot=cbind(jitter(to.plot[!in.it,1]),to.plot[!in.it,2])
points(to.plot)
points(8,probs.male.log[10],pch=19)
text(8,probs.male.log[10]-.07,paste('p = \n',round(probs.male.log[10],4)))

for(i in 3:9) text(i,0.0,counts[i-2],font=2)
}

Fig.13()


Figure 14

Fig. 14. As in Fig. 12 but for females and males combined. The filled point for the one eightstage system (Todd VIII-X collapsed into one phase) is at a p-value of 0.773.

probs.combo.log = run.collapse()

Fig.14 = function ()
{
set.seed(12345)
p10=LM.test(rbind(males,females),log.it=T)
plot(10,p10,xlim=c(1,10),ylim=c(0,1),
ylab='Goodness-of-fit p-value',xlab='Number of Phases',axes=F,
main='Males & Females')
box()
axis(2)
axis(1,at=2:10)
for(i in 1:8) lines(rep(1.5+i,2),c(0,1),lty=2)
print('T2 p-value')
print(probs.combo.log[239])
to.plot=cbind(part10[-10,10],probs.combo.log[-c(10,511)])
in.9 = to.plot[to.plot[,1]==9,2]
points(seq(8.55,9.45,length.out=9),in.9)
in.2 = to.plot[to.plot[,1]==2,2]
points(jitter(rep(2,9)),in.2)
to.plot=to.plot[to.plot[,1]>2 & to.plot[,1]<9,]
in.it=to.plot[,2]<0.05
counts=as.vector(table(to.plot[in.it,1]))
to.plot=cbind(jitter(to.plot[!in.it,1]),to.plot[!in.it,2])
points(to.plot)
points(8,probs.combo.log[10],pch=19)
text(8,probs.combo.log[10]+.07,paste('p = \n',round(probs.combo.log[10],4)))

for(i in 3:9) text(i,0.0,counts[i-2],font=2)
}

Fig.14()


Figure 15

Fig. 15. Plot of the log-normal transitions between the eight stages for females and for males.

Fig.15 = function ()
{
mu.f=c(2.489,2.795,2.921,3.057,3.326,3.408,3.478)
sd.f=0.364
ages=seq(0.01,90,0.01)
plot(ages,dlnorm(ages,mu.f[1],sd.f),type='l',xlab='Age-at-transition',ylab='Density')
for(i in 2:7){
 lines(ages,dlnorm(ages,mu.f[i],sd.f))
}
mu.m=c(2.538,2.750,2.897,3.022,3.268,3.397,3.503)
sd.m=0.373

for(i in 1:7){
 lines(ages,dlnorm(ages,mu.m[i],sd.f),lty=2)
}
legend(52,.07,legend=c('Females','Males'),lty=1:2,bty='n')
}

Fig.15()




Getting Some More Data - from Liversidge (2011)


Click on the link below to get Moorrees, Fanning, and Hunt scores on  for the M2 on 1,135 girls.  These data were simulated after analyzing the original data from Liversidge (2011)..

simulated_girls.csv

And then copy and paste the line below to your "R" Gui:

simulated.girls = read.csv('simulated_girls.csv')

And now get comparable simulated data from 1,167 boys:

simulated_boys.csv

And then:

simulated.boys = read.csv('simulated_boys.csv')



Figure 16

Fig. 16. In the analysis of the original data there were outliers who were identified and subsequently removed in the M2 dental development data for 1,135 females.  As this is simulated data there are no outliers.

Fig.16 = function ()
{
dat=cbind(simulated.girls[,1],jitter(simulated.girls[,2],factor=0.5))
plot(dat,cex=0.3,ylab='M2 Stage',main='Females',xlab='Age')
}

Fig.16()


Figure 17

Fig. 17. As per Figure 16, there are no outliers as this is simulated data.  The plot is for M2 dental development data for 1,169 (simulated) males.  

Fig.17 = function ()
{
dat=cbind(simulated.boys[,1],jitter(simulated.boys[,2],factor=0.5))
plot(dat,cex=0.3,ylab='M2 Stage',main='Males',xlab='Age')
}

Fig.17()


Figure 18

Fig. 18. Comparison of the exponential transition model to the log-normal transition model for the transition between Todd I-VII versus Todd VIII-X.

Fig.18 = function ()
{
    a = 19.63708118
    lambda = 0.06385613
    ages = seq(a, 100, length.out = 101)
    plot(ages, dexp(ages-a, lambda), type = "l", xlim = c(0, 100),
        ylab = "Density", xlab = "Age at transition to phase VIII",lwd=2,lty=2)
    lines(c(0, a), c(0, 0),lwd=2,lty=2)
    b = 1/lambda
#   modal age-at-transition
    lines(rep(a, 2), c(0, dexp(0, lambda)),lwd=2,lty=2)
#   median age-at-transition
#    lines(rep(a+b*log(2), 2), c(0, dexp(b*log(2), lambda)), col = "blue",lwd=2)
#   mean age-at-transition
#    lines(rep(a + b, 2), c(0, dexp(b, lambda)),col='brown',lwd=2)
    legend(60,.03,c('Exponential','Log-normal'),lwd=rep(2,2),lty=c(2,1),bty='n')
    ages=seq(0,100,.1)
#   log-normal
    lines(ages,dlnorm(ages,8.344885/2.408063,1/2.408063),lwd=2)
}

Fig.18()



Table 1

Table 1. Eruption ages for the right mandibular first molar and right mandibular third molar in 34 rhesus macaques (from Hurme and Van Wegenen, 1961)

Table.01 = function ()
{
set.seed(123456)
dat = rhesus[complete.cases(rhesus[,c(2,16)]),c(1,2,16)]
dat = dat[sort(dat[,1],index.ret=T)$ix,]
obs.age = NA
n.erupted = NA
for(i in 1:17) {
   obs.age[i] = round(dat[i,2]+rnorm(1,0,.083),2)
   n.erupted[i] = as.numeric(obs.age[i]>dat[i,2])
}

for(i in 18:34) {
   obs.age[i] = round(dat[i,3]+rnorm(1,0,.083),2)
   n.erupted[i] = as.numeric(obs.age[i]>dat[i,3]) + 1
}

out = cbind(dat,obs.age,n.erupted)
row.names(out)=1:34
return(out)
}

Table.01()



Table 2

Table 2. Number of collapsed scorings for ordinal systems with n stages.

Table.02 = function ()
{
print.symmetric = function(x)
{
# This function from CRAN library "heavy" ver. 0.2
  ll <- lower.tri(x, diag = TRUE)
  x[ll] <- format(x[ll])
  x[!ll] <- ""
  print(x,quote = F)
}

count = function(n,m) exp(lgamma(n)-lgamma(m)-lgamma(n-m+1))

individs = matrix(0,nc=13,nr=13)

for(n in 3:15){
   for(m in seq(n-1,2,-1)){
     individs[n-2,m-1] = count(n,m)
   }
}
print.symmetric(individs)

for(i in 1:13) individs[i,] = cumsum(individs[i,])

print.symmetric(individs)
}

Table.02()




Table 3

Table 3. Cumulative log-probit parameters for transitions between Todd phases I, II, III, IV, V, VI, VII, and VIII – X.

Table.03 = function ()
{
library(VGAM)
collapse=c(1:7,8,8,8)

Get.results = function(dat){
age=log(dat[,1])
stage=dat[,2]
stage=collapse[stage]
parms=coef(vglm(stage~age,cumulative(link='probit',parallel = TRUE)))
parms[8]=-1/parms[8]
lnMu=round(parms[-8]*parms[8],3)
w=exp(parms[8]^2)
median=exp(lnMu)
mode=median/w
avg=median*sqrt(w)
results = cbind(lnMu,round(mode,1),round(median,1),round(avg,1))
rownames(results)=c('1-2','2-3','3-4','4-5','5-6','6-7','7-8+')
colnames(results)=c('lnMu','Modal','Median','Mean')
print(results)
print(round(parms[8],3))
}

Get.results(females)
Get.results(males)
Get.results(rbind(males,females))

}

Table.03()


Table 4

Table 4. Probability values from the specification test for transition models using the second molar data with 15 stages.  These values differ from the article because the data ae simulated.  There are also no outliers because of the simulation.

Table.04 = function ()
{
options(warn=-1)
FemDat=simulated.girls
print(round(LM.test(cbind(FemDat[,1]+0.75,FemDat[,2]),coll=1:15,log.it=T),4))
print(round(LM.test(cbind(FemDat[,1]+0.75,FemDat[,2]),coll=1:15,log.it=F),4))
cat('\n')

MalDat=simulated.boys
print(round(LM.test(cbind(MalDat[,1]+0.75,MalDat[,2]),coll=1:15,log.it=T),4))
print(round(LM.test(cbind(MalDat[,1]+0.75,MalDat[,2]),coll=1:15,log.it=F),4))
cat('\n')

dat=rbind(FemDat,MalDat)
print(round(LM.test(cbind(dat[,1]+0.75,dat[,2]),coll=1:15,log.it=T),4))
# Raw scale (not logged) is singular so cannot be analyzed
}

Table.04()


Table 5

Table 5. Cumulative log-probit parameters for transitions between the 15 Moorrees, Fanning, and Hunt (1963) stages for the second mandibular molar.  Values differ slightly from the article as these values are from analyses of simulated data.

Table.05 = function ()
{
library(VGAM)
options(warn=-1)
rows=c('1-2','2-3','3-4','4-5','5-6','6-7','7-8','8-9','9-10','10-11','11-12',
       '12-13','13-14','14-15')

FemDat=simulated.girls
theta = coef(vglm(FemDat[,2]~log(FemDat[,1]+0.75),cumulative(link='probit',parallel = TRUE)))
theta[15] = -1/theta[15]
theta[-15] = theta[-15]*theta[15]
mu=as.vector(theta)
sdev=mu[15]
mu=mu[-15]

mod = round(exp(mu)/exp(sdev^2)-.75,1)
med = round(exp(mu)-.75,1)
mu2 = round(exp(mu)*exp(0.5*sdev^2)-.75,1)
mu = round(mu,3)

out=data.frame(cbind(mu,mod,med,mu2))
rownames(out)=rows
print(out)
cat(round(sdev,3))
cat('\n\n')

MalDat=simulated.boys
theta = coef(vglm(MalDat[,2]~log(MalDat[,1]+0.75),cumulative(link='probit',parallel = TRUE)))
theta[15] = -1/theta[15]
theta[-15] = theta[-15]*theta[15]
mu=as.vector(theta)
sdev=mu[15]
mu=mu[-15]
mod = round(exp(mu)/exp(sdev^2)-.75,1)
med = round(exp(mu)-.75,1)
mu2 = round(exp(mu)*exp(0.5*sdev^2)-.75,1)
mu = round(mu,3)

out=data.frame(cbind(mu,mod,med,mu2))
rownames(out)=rows
print(out)
cat(round(sdev,3))
cat('\n')
}


Table.05()


Reference cited