MLNI  


This is R code from Konigsberg LW, and Adams BJ. 2014. Estimating the number of individuals represented by commingled human remains: A critical evaluation of methods. In: Adam BJ, and Byrd JE, editors. Commingled human remains: Methods in recovery, analysis, and identification. New York: Academic Press. p 193-220.

 


Figure 1 - Analytical calculation of the MNI against recovery rate based on an actual size of 40 individuals  

 

MNI = function (N=40,r=.3)
{
cumb.sq=pbinom(0:N,N,r)^2
p=c(cumb.sq[1],diff(cumb.sq))
return(which.max(p)-1)
}

Fig.01 = function ()
{
x=0
y=0
x=seq(0,100,.01)
for(i in 1:10001){
 y[i]=MNI(r=x[i]/100)
}
plot(x,y,type='l',ylab='MNI (max[L,R])',xlab='Recovery Rate (%)')
lines(c(0,100),c(0,40),lty=2,lwd=2)
legend(11.5,33,legend=c('MNI','rN'),bty='n',lty=1:2,lwd=1:2)
}

Fig.01()


Figure 3 - Approximate Bayesian computation (ABC) of the number of individuals in which the simulated number of counted bones pairs (P*) must equal the observed count.  "Hypg (uniform recovery)" is the analytical result assuming a uniform prior for the recovery probability and hypergeometric distribution for the number of bone pairs.  "Hypg (uniform N)" assumes a uniform distribution for the number of individuals.

 

bones=structure(c(53, 40, 40, 39, 28, 58, 44, 39, 45, 25, 18, 10, 12,
11, 6), .Dim = c(5, 3), .Dimnames = list(NULL, c("L", "R",
"P")))

ABC = function (LE=0)
{
library(SuppDists)
L=bones[1,1]
R=bones[1,2]
P=bones[1,3]
N.sto=0
ip=0
iter=0
cat('\n')
while(ip<50000){
N = floor(runif(1,0,251))+L+R-P
iter=iter+1
test.P = rghyper(1,L,R,N)
if(abs(test.P-P)<=LE){
  ip=ip+1
  cat(paste(ip,' of 50,000','\r'))
  flush.console()
  N.sto[ip]=N
 }
}
return(N.sto)
cat('\n')
}

N=ABC()

Fig.03 = function ()
{
library(SuppDists)

y=as.numeric(table(N))/50000
x=sort(unique(N))
plot(x,y,type='h',xlab='N',ylab='Probability',
main='L = 53, R = 58, P = 18              P* = 18')
L = bones[1,1]
R = bones[1,2]
P = bones[1,3]

analyt=function (top.up=250)
{
pdf2 = function(N) {dghyper(P,L,R,N)*P/(N+1)}
pdf4 = function(N,integ) {dghyper(P,L,R,N)/integ}
bot=L+R-P
top=bot+top.up

integ=sum(pdf4(bot:top,integ=1))
lines(bot:top,pdf4(bot:top,integ=integ),lwd=2)
lines(bot:top,pdf2(bot:top),lty=2)
}

analyt()
legend(235,.012,legend=c('Hypg (uniform recovery)','Hypg (uniform N)'),
      bty='n',lty=c(2,1),lwd=c(1,2))
}

Fig.03()


Figure 4 - As in Figure 3, but allowing the simulated pairs counts to range as far as three above or below the actual count.  Note how the histogram from ABC (the vertical lines) has begun to "grow" in the tails relative to the analytical calculations.

 

N3 = ABC(3)

Fig.04 = function ()
{

library(SuppDists)

y=as.numeric(table(N3))/50000
x=sort(unique(N3))
plot(x,y,type='h',xlab='N',ylab='Probability',ylim=c(0,.015),
main='L = 53, R = 58, P = 18              P* = 15 to 21')
L = bones[1,1]
R = bones[1,2]
P = bones[1,3]

analyt=function (top.up=250)
{


pdf2 = function(N) {dghyper(P,L,R,N)*P/(N+1)}
pdf4 = function(N,integ) {dghyper(P,L,R,N)/integ}


bot=L+R-P
top=bot+top.up

integ=sum(pdf4(bot:top,integ=1))

lines(bot:top,pdf4(bot:top,integ=integ),lwd=2)
lines(bot:top,pdf2(bot:top),lty=2)
}

analyt()
legend(235,.012,legend=c('Hypg (uniform recovery)','Hypg (uniform N)'),
      bty='n',lty=c(2,1),lwd=c(1,2))
}

Fig.04()


Pages 210-213 - These pages contain some calculations and equations based on O'Brien and Storlie's data.  Calculations differ slightly from their work as I used a more direct method for calculating Mahalanobis distances and related statistics.  First we need some datasets:

 


Taph.Ref = structure(list(ID=structure(c(1,3,5,7,9,11,12,13,14,15,16,18,20,22,24,25,26,2,4,6,8,10,11,12,13,14,15,17,19,21,23,24,25,
26),.Label=c("21271L","21271R","40082L","40082R","42162L","42162R","42174L","42174R","53505L","53505R","8255B","8263B","8363B",
"8403B","8409B","86329L","86329R","87751L","87751R","87752L","87752R","87753L","87753R","9271B","9981B","9982B"),class="factor"),SIDE=structure(c(1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2),.Label=c("LEFT",
"RIGHT"),class="factor"),HM6=c(34.73,29.37,37.27,36.32,33.47,35.77,35.31,40.48,36.71,36.06,35.86,35.13,34.74,
34.98,37.58,37.74,34.82,34.36,29.66,36.69,36.65,33.39,35.57,36.39,40.37,36.3,36.52,35.88,35.02,35.16,35.35,
37.96,37.57,34.85),HM7=c(33.03,30.22,36.84,35.4,34.24,36.37,34.39,41.28,36.81,36.43,36.87,34.7,35.56,34.11,
38.16,37.28,34.81,33.9,30.75,37.08,34.91,33.89,35.5,36.11,40.9,36.63,36.27,36.65,34.46,35.36,35.02,38.28,
37.02,34.59),HM8=c(15.02,13.36,14.15,11.77,14.05,14.46,11.63,15.91,13.25,13.56,15.93,12.93,15.4,15.2,14.41,
14.17,15.29,15.35,13.96,14.08,12.02,13.88,13.96,11.64,15.46,13.01,13.25,15.15,13.21,15.42,15.21,14.58,13.98,
15.17),HM11=c(30.48,26.8,32.13,30.31,28.98,30,30.67,33.02,31.63,30.57,31.67,29.35,29.9,30.3,31.77,31.3,30.77,
29.94,26.85,32.05,29.77,28.82,30.27,30.48,33.16,31.91,30.32,31.8,29.39,29.51,30.12,31.41,31.22,30.65),HM14=c(23.38,
19.86,26.65,23.71,23.76,23.83,24.97,25.85,25.52,24.04,24.2,22.86,22.39,23.49,24.84,26,23.31,23.67,20.28,26.6,
23.87,24.1,24.24,25.36,26.22,25.79,24.17,24.17,22.76,22.82,23.62,24.62,25.61,23.14),HM15=c(7.65,6.53,8.54,
8.13,7.36,8.04,9.3,9.41,8.83,9.23,8.12,8.02,7.88,7.62,7.9,8.95,8.24,7.62,6.56,8.56,8.38,7.48,8.01,9.3,9.9,
9.16,8.9,8.32,7.95,7.75,7.83,7.69,9.19,8.05),Source=structure(c(1,1,1,1,1,2,2,2,2,2,1,1,1,1,2,2,2,1,1,1,1,1,2,2,2,2,2,1,1,1,1,2,2,
2),.Label=c("MSB","UW"),class="factor")),.Names=c("ID","SIDE","HM6","HM7","HM8","HM11","HM14","HM15","Source"),class="data.frame",row.names=c(NA,-34))
Taph.test=structure(list(Individual=structure(c(27,28,29,30,31,
32,27,28,29),.Label=c("21271L","21271R","40082L",
"40082R","42162L","42162R","42174L","42174R","53505L","53505R",
"8255B","8263B","8363B","8403B","8409B","86329L","86329R",
"87751L","87751R","87752L","87752R","87753L","87753R","9271B",
"9981B","9982B","1","2","3","4","5","6"),class="factor"),
SIDE=structure(c(1,1,1,1,1,1,2,2,2),.Label=c("LEFT",
"RIGHT"),class="factor"),HM6=c(34.73,29.37,37.27,
36.32,33.47,35.77,34.36,29.66,36.69),HM7=c(33.03,
30.22,36.84,35.4,34.24,36.37,33.9,30.75,37.08),HM8=c(15.02,
13.36,14.15,11.77,14.05,14.46,15.35,13.96,14.08),
HM11=c(30.48,26.8,32.13,30.31,28.98,30,29.94,26.85,
32.05),HM14=c(23.38,19.86,26.65,23.71,23.76,23.83,
23.67,20.28,26.6),HM15=c(7.65,6.53,8.54,8.13,7.36,
8.04,7.62,6.56,8.56)),.Names=c("Individual","SIDE",
"HM6","HM7","HM8","HM11","HM14","HM15"),row.names=c("1",
"2","3","4","5","6","7","8","9"),class="data.frame")
Taph.test2=structure(list(Individuals=structure(27:35,.Label=c("21271L",
"21271R","40082L","40082R","42162L","42162R","42174L","42174R",
"53505L","53505R","8255B","8263B","8363B","8403B","8409B",
"86329L","86329R","87751L","87751R","87752L","87752R","87753L",
"87753R","9271B","9981B","9982B","4","5","6","7","8",
"9","1","2","3"),class="factor"),SIDE=structure(c(1,
1,1,1,1,1,2,2,2),.Label=c("LEFT","RIGHT"),class="factor"),
HM6=c(36.32,33.47,35.77,35.31,40.48,36.71,34.36,
29.66,36.69),HM7=c(35.4,34.24,36.37,34.39,41.28,
36.81,33.9,30.75,37.08),HM8=c(11.77,14.05,14.46,
11.63,15.91,13.25,15.35,13.96,14.08),HM11=c(30.31,
28.98,30,30.67,33.02,31.63,29.94,26.85,32.05),HM14=c(23.71,
23.76,23.83,24.97,25.85,25.52,23.67,20.28,26.6),HM15=c(8.13,
7.36,8.04,9.3,9.41,8.83,7.62,6.56,8.56)),.Names=c("Individuals",
"SIDE","HM6","HM7","HM8","HM11","HM14","HM15"),row.names=c("1",
"2","3","4","5","6","7","8","9"),class="data.frame")

Then "my.pairs" does posterior probabilities conditioning on left and on right bones.  The default uses the first example in Konigsberg and Adams.  Using "Taph.test2" in the parentheses does the second example.


my.pairs = function (test=Taph.test)
{
data = Taph.Ref

L = Taph.Ref[1:17,3:8]
R = Taph.Ref[18:34,3:8]
D = L-R
mu.D = apply(D,2,mean)

i.V = solve(var(D))

L = test[1:6,-(1:2)]
R = test[7:9,-(1:2)]

LK = matrix(NA,nr=6,nc=3)
for(i in 1:6){
for(j in 1:3){
   d=as.numeric(L[i,]-R[j,])
   LK[i,j] = exp(-.5*t(d)%*%i.V%*%d)
}
}
cat('\n')
cat(' Conditional on right bones: \n')
print(round(LK/rep(1,6)%o%apply(LK,2,sum),0))
cat('\n')
cat(' Conditional on left bones: \n')
print(round(LK/t(rep(1,3)%o%apply(LK,1,sum)),0))
cat('\n')

}

"my.pairs2" does typicalities.  The default uses the first example in Konigsberg and Adams.  Using "Taph.test2" in the parentheses does the second example.


my.pairs2 = function (test=Taph.test)
{
data = Taph.Ref

L = Taph.Ref[1:17,3:8]
R = Taph.Ref[18:34,3:8]
D = L-R
mu.D = apply(D,2,mean)

i.V = solve(var(D))

L = test[1:6,-(1:2)]
R = test[7:9,-(1:2)]

D2 = matrix(NA,nr=6,nc=3)
for(i in 1:6){
for(j in 1:3){
   d=as.numeric(L[i,]-R[j,])
   D2[i,j] = t(d)%*%i.V%*%d
}
}
cat('\n')
cat(' Mahalanobis distances: \n')
print(round(D2,0))
cat('\n')
cat(' Typicality probabiliries: \n')
print(round(1-pchisq(D2,6),4))
cat('\n')

}


Figure 5 - Simulation of sampling left and right bones from 500 individuals in which the latent traits for sampling bones have a standard bivariate normal distribution with a correlation of 0.999.  The gray ellipse is the 95% isodensity ellipse, and sampling thresholds are at 0.5 (vertical dashed line) for the right side and 0.7 (horizontal dashed line) for the left side.  Counts are given for "-L,-R" (neither bone sampled), "L+,R+" (both bones sampled), and "L-,R+" (only right bone sampled).

 


Fig.05 = function (N = 500)
{
library(MASS)
library(ellipse)
set.seed(1234)
MLNI = 0
MNI = 0
r.sides=0.999
plot(ellipse(r.sides),type='l',xlab='Latent trait, Right side',
     ylab='Latent trait, Left side',lty=2)
polygon(ellipse(r.sides),col='light grey')
lines(c(-6,6),rep(0.7,2),lwd=2,lty=2)
lines(rep(0.5,2),c(-6,6),lwd=2,lty=2)
text(1.4,1.41,'+L,+R = 116',cex=1.25)
text(0.6,0.57,'-L,+R = 34',adj=0,cex=1.25)
text(-.996,-.97,'-L,-R = 350',cex=1.25)
sample=matrix(rep(0,N*2),nc=2)
for(i in 1:1){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)
sample[,1]=bones[,1]>=.7
sample[,2]=bones[,2]>=.5
print(table(sample[,1],sample[,2]))

MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
print(c(L,R,P))
}
}

Fig.05()


Figure 6 - As in Figure 5, but with 1,000 iterates of sampling from 500 individuals at left, right correlation values of 0.0, 0.7, and 0.999.  The iterates for correlation values of 0.0 and 0.7 are enclosed by their convex hulls, while the iterates for correlation values of 0.999 are indicated with a line of identity.

 


Fig.06 = function (N = 500)
{
library(MASS)
set.seed(1234)
MLNI = 0
MNI = 0
r.sides=0
for(i in 1:1000){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)

MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
}

plot(MNI,MLNI,xlim=c(110,200),ylim=c(0,800),pch=19,cex=.2,
 xlab='max(L,R)',ylab='MLNI')
lines(c(0,1000),rep(500,2),lty=2,lwd=2)
trace=chull(MNI,MLNI)
lines(MNI[c(trace,trace[1])],MLNI[c(trace,trace[1])])

r.sides=0.7
for(i in 1:1000){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)

MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
}


points(MNI,MLNI,pch=19,cex=.2)
trace=chull(MNI,MLNI)
lines(MNI[c(trace,trace[1])],MLNI[c(trace,trace[1])])

r.sides=0.999
for(i in 1:1000){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)

MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
}
print(median(MNI))
print(median(MLNI))
points(MNI[1:500],MLNI[1:500])
abline(0,1)
text(180,669,expression(paste(rho==0.0)))
text(180,300,expression(paste(rho==0.7)))
text(190,193,expression(paste(rho==0.999)),srt=5,adj=c(0,0))
text(191,512,'Actual N',adj=c(0,0))

}

Fig.06()


References

O'Brien, M., and C. B. Storlie. An alternative bilateral refitting model for zooarchaeological assemblages. Journal of Taphonomy 9 (2011): 245-268.