MLNI
This
is R code from Konigsberg LW, and Adams BJ. 2014. Estimating the number
of individuals represented by commingled human remains: A critical
evaluation of methods. In: Adam BJ, and Byrd JE, editors. Commingled
human remains: Methods in recovery, analysis, and identification. New
York: Academic Press. p 193-220.
Figure 1 - Analytical calculation of the MNI against recovery rate based on an actual size of 40 individuals
MNI = function (N=40,r=.3)
{
cumb.sq=pbinom(0:N,N,r)^2
p=c(cumb.sq[1],diff(cumb.sq))
return(which.max(p)-1)
}
Fig.01 = function ()
{
x=0
y=0
x=seq(0,100,.01)
for(i in 1:10001){
y[i]=MNI(r=x[i]/100)
}
plot(x,y,type='l',ylab='MNI (max[L,R])',xlab='Recovery Rate (%)')
lines(c(0,100),c(0,40),lty=2,lwd=2)
legend(11.5,33,legend=c('MNI','rN'),bty='n',lty=1:2,lwd=1:2)
}
Fig.01()
Figure
3 - Approximate Bayesian computation (ABC) of the number of individuals
in which the simulated number of counted bones pairs (P*) must equal
the observed count. "Hypg (uniform recovery)" is the analytical
result assuming a uniform prior for the recovery probability and
hypergeometric distribution for the number of bone pairs. "Hypg
(uniform N)" assumes a uniform distribution for the number of
individuals.
bones=structure(c(53, 40, 40, 39, 28, 58, 44, 39, 45, 25, 18, 10, 12,
11, 6), .Dim = c(5, 3), .Dimnames = list(NULL, c("L", "R",
"P")))
ABC = function (LE=0)
{
library(SuppDists)
L=bones[1,1]
R=bones[1,2]
P=bones[1,3]
N.sto=0
ip=0
iter=0
cat('\n')
while(ip<50000){
N = floor(runif(1,0,251))+L+R-P
iter=iter+1
test.P = rghyper(1,L,R,N)
if(abs(test.P-P)<=LE){
ip=ip+1
cat(paste(ip,' of 50,000','\r'))
flush.console()
N.sto[ip]=N
}
}
return(N.sto)
cat('\n')
}
N=ABC()
Fig.03 = function ()
{
library(SuppDists)
y=as.numeric(table(N))/50000
x=sort(unique(N))
plot(x,y,type='h',xlab='N',ylab='Probability',
main='L = 53, R = 58, P = 18 P* = 18')
L = bones[1,1]
R = bones[1,2]
P = bones[1,3]
analyt=function (top.up=250)
{
pdf2 = function(N) {dghyper(P,L,R,N)*P/(N+1)}
pdf4 = function(N,integ) {dghyper(P,L,R,N)/integ}
bot=L+R-P
top=bot+top.up
integ=sum(pdf4(bot:top,integ=1))
lines(bot:top,pdf4(bot:top,integ=integ),lwd=2)
lines(bot:top,pdf2(bot:top),lty=2)
}
analyt()
legend(235,.012,legend=c('Hypg (uniform recovery)','Hypg (uniform N)'),
bty='n',lty=c(2,1),lwd=c(1,2))
}
Fig.03()
Figure 4 - As in Figure 3, but allowing the simulated pairs counts to range as
far as three above or below the actual count. Note how the
histogram from ABC (the vertical lines) has begun to "grow" in the
tails relative to the analytical calculations.
N3 = ABC(3)
Fig.04 = function ()
{
library(SuppDists)
y=as.numeric(table(N3))/50000
x=sort(unique(N3))
plot(x,y,type='h',xlab='N',ylab='Probability',ylim=c(0,.015),
main='L = 53, R = 58, P = 18 P* = 15 to 21')
L = bones[1,1]
R = bones[1,2]
P = bones[1,3]
analyt=function (top.up=250)
{
pdf2 = function(N) {dghyper(P,L,R,N)*P/(N+1)}
pdf4 = function(N,integ) {dghyper(P,L,R,N)/integ}
bot=L+R-P
top=bot+top.up
integ=sum(pdf4(bot:top,integ=1))
lines(bot:top,pdf4(bot:top,integ=integ),lwd=2)
lines(bot:top,pdf2(bot:top),lty=2)
}
analyt()
legend(235,.012,legend=c('Hypg (uniform recovery)','Hypg (uniform N)'),
bty='n',lty=c(2,1),lwd=c(1,2))
}
Fig.04()
Pages 210-213 - These pages contain some calculations and equations based on O'Brien and Storlie's
data. Calculations differ slightly from their work as I used a
more direct method for calculating Mahalanobis distances and related
statistics. First we need some datasets:
Taph.Ref
=
structure(list(ID=structure(c(1,3,5,7,9,11,12,13,14,15,16,18,20,22,24,25,26,2,4,6,8,10,11,12,13,14,15,17,19,21,23,24,25,
26),.Label=c("21271L","21271R","40082L","40082R","42162L","42162R","42174L","42174R","53505L","53505R","8255B","8263B","8363B",
"8403B","8409B","86329L","86329R","87751L","87751R","87752L","87752R","87753L","87753R","9271B","9981B","9982B"),class="factor"),SIDE=structure(c(1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2),.Label=c("LEFT",
"RIGHT"),class="factor"),HM6=c(34.73,29.37,37.27,36.32,33.47,35.77,35.31,40.48,36.71,36.06,35.86,35.13,34.74,
34.98,37.58,37.74,34.82,34.36,29.66,36.69,36.65,33.39,35.57,36.39,40.37,36.3,36.52,35.88,35.02,35.16,35.35,
37.96,37.57,34.85),HM7=c(33.03,30.22,36.84,35.4,34.24,36.37,34.39,41.28,36.81,36.43,36.87,34.7,35.56,34.11,
38.16,37.28,34.81,33.9,30.75,37.08,34.91,33.89,35.5,36.11,40.9,36.63,36.27,36.65,34.46,35.36,35.02,38.28,
37.02,34.59),HM8=c(15.02,13.36,14.15,11.77,14.05,14.46,11.63,15.91,13.25,13.56,15.93,12.93,15.4,15.2,14.41,
14.17,15.29,15.35,13.96,14.08,12.02,13.88,13.96,11.64,15.46,13.01,13.25,15.15,13.21,15.42,15.21,14.58,13.98,
15.17),HM11=c(30.48,26.8,32.13,30.31,28.98,30,30.67,33.02,31.63,30.57,31.67,29.35,29.9,30.3,31.77,31.3,30.77,
29.94,26.85,32.05,29.77,28.82,30.27,30.48,33.16,31.91,30.32,31.8,29.39,29.51,30.12,31.41,31.22,30.65),HM14=c(23.38,
19.86,26.65,23.71,23.76,23.83,24.97,25.85,25.52,24.04,24.2,22.86,22.39,23.49,24.84,26,23.31,23.67,20.28,26.6,
23.87,24.1,24.24,25.36,26.22,25.79,24.17,24.17,22.76,22.82,23.62,24.62,25.61,23.14),HM15=c(7.65,6.53,8.54,
8.13,7.36,8.04,9.3,9.41,8.83,9.23,8.12,8.02,7.88,7.62,7.9,8.95,8.24,7.62,6.56,8.56,8.38,7.48,8.01,9.3,9.9,
9.16,8.9,8.32,7.95,7.75,7.83,7.69,9.19,8.05),Source=structure(c(1,1,1,1,1,2,2,2,2,2,1,1,1,1,2,2,2,1,1,1,1,1,2,2,2,2,2,1,1,1,1,2,2,
2),.Label=c("MSB","UW"),class="factor")),.Names=c("ID","SIDE","HM6","HM7","HM8","HM11","HM14","HM15","Source"),class="data.frame",row.names=c(NA,-34))
Taph.test=structure(list(Individual=structure(c(27,28,29,30,31,
32,27,28,29),.Label=c("21271L","21271R","40082L",
"40082R","42162L","42162R","42174L","42174R","53505L","53505R",
"8255B","8263B","8363B","8403B","8409B","86329L","86329R",
"87751L","87751R","87752L","87752R","87753L","87753R","9271B",
"9981B","9982B","1","2","3","4","5","6"),class="factor"),
SIDE=structure(c(1,1,1,1,1,1,2,2,2),.Label=c("LEFT",
"RIGHT"),class="factor"),HM6=c(34.73,29.37,37.27,
36.32,33.47,35.77,34.36,29.66,36.69),HM7=c(33.03,
30.22,36.84,35.4,34.24,36.37,33.9,30.75,37.08),HM8=c(15.02,
13.36,14.15,11.77,14.05,14.46,15.35,13.96,14.08),
HM11=c(30.48,26.8,32.13,30.31,28.98,30,29.94,26.85,
32.05),HM14=c(23.38,19.86,26.65,23.71,23.76,23.83,
23.67,20.28,26.6),HM15=c(7.65,6.53,8.54,8.13,7.36,
8.04,7.62,6.56,8.56)),.Names=c("Individual","SIDE",
"HM6","HM7","HM8","HM11","HM14","HM15"),row.names=c("1",
"2","3","4","5","6","7","8","9"),class="data.frame")
Taph.test2=structure(list(Individuals=structure(27:35,.Label=c("21271L",
"21271R","40082L","40082R","42162L","42162R","42174L","42174R",
"53505L","53505R","8255B","8263B","8363B","8403B","8409B",
"86329L","86329R","87751L","87751R","87752L","87752R","87753L",
"87753R","9271B","9981B","9982B","4","5","6","7","8",
"9","1","2","3"),class="factor"),SIDE=structure(c(1,
1,1,1,1,1,2,2,2),.Label=c("LEFT","RIGHT"),class="factor"),
HM6=c(36.32,33.47,35.77,35.31,40.48,36.71,34.36,
29.66,36.69),HM7=c(35.4,34.24,36.37,34.39,41.28,
36.81,33.9,30.75,37.08),HM8=c(11.77,14.05,14.46,
11.63,15.91,13.25,15.35,13.96,14.08),HM11=c(30.31,
28.98,30,30.67,33.02,31.63,29.94,26.85,32.05),HM14=c(23.71,
23.76,23.83,24.97,25.85,25.52,23.67,20.28,26.6),HM15=c(8.13,
7.36,8.04,9.3,9.41,8.83,7.62,6.56,8.56)),.Names=c("Individuals",
"SIDE","HM6","HM7","HM8","HM11","HM14","HM15"),row.names=c("1",
"2","3","4","5","6","7","8","9"),class="data.frame")
Then
"my.pairs" does posterior probabilities conditioning on left and on
right bones. The default uses the first example in Konigsberg and
Adams. Using "Taph.test2" in the parentheses does the second
example.
my.pairs = function (test=Taph.test)
{
data = Taph.Ref
L = Taph.Ref[1:17,3:8]
R = Taph.Ref[18:34,3:8]
D = L-R
mu.D = apply(D,2,mean)
i.V = solve(var(D))
L = test[1:6,-(1:2)]
R = test[7:9,-(1:2)]
LK = matrix(NA,nr=6,nc=3)
for(i in 1:6){
for(j in 1:3){
d=as.numeric(L[i,]-R[j,])
LK[i,j] = exp(-.5*t(d)%*%i.V%*%d)
}
}
cat('\n')
cat(' Conditional on right bones: \n')
print(round(LK/rep(1,6)%o%apply(LK,2,sum),0))
cat('\n')
cat(' Conditional on left bones: \n')
print(round(LK/t(rep(1,3)%o%apply(LK,1,sum)),0))
cat('\n')
}
"my.pairs2" does typicalities. The default
uses the first example in Konigsberg and Adams. Using "Taph.test2" in
the parentheses does the second example.
my.pairs2 = function (test=Taph.test)
{
data = Taph.Ref
L = Taph.Ref[1:17,3:8]
R = Taph.Ref[18:34,3:8]
D = L-R
mu.D = apply(D,2,mean)
i.V = solve(var(D))
L = test[1:6,-(1:2)]
R = test[7:9,-(1:2)]
D2 = matrix(NA,nr=6,nc=3)
for(i in 1:6){
for(j in 1:3){
d=as.numeric(L[i,]-R[j,])
D2[i,j] = t(d)%*%i.V%*%d
}
}
cat('\n')
cat(' Mahalanobis distances: \n')
print(round(D2,0))
cat('\n')
cat(' Typicality probabiliries: \n')
print(round(1-pchisq(D2,6),4))
cat('\n')
}
Figure
5 - Simulation of sampling left and right bones from 500 individuals in
which the latent traits for sampling bones have a standard bivariate
normal distribution with a correlation of 0.999. The gray ellipse
is the 95% isodensity ellipse, and sampling thresholds are at 0.5
(vertical dashed line) for the right side and 0.7 (horizontal dashed
line) for the left side. Counts are given for "-L,-R" (neither
bone sampled), "L+,R+" (both bones sampled), and "L-,R+" (only right
bone sampled).
Fig.05 = function (N = 500)
{
library(MASS)
library(ellipse)
set.seed(1234)
MLNI = 0
MNI = 0
r.sides=0.999
plot(ellipse(r.sides),type='l',xlab='Latent trait, Right side',
ylab='Latent trait, Left side',lty=2)
polygon(ellipse(r.sides),col='light grey')
lines(c(-6,6),rep(0.7,2),lwd=2,lty=2)
lines(rep(0.5,2),c(-6,6),lwd=2,lty=2)
text(1.4,1.41,'+L,+R = 116',cex=1.25)
text(0.6,0.57,'-L,+R = 34',adj=0,cex=1.25)
text(-.996,-.97,'-L,-R = 350',cex=1.25)
sample=matrix(rep(0,N*2),nc=2)
for(i in 1:1){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)
sample[,1]=bones[,1]>=.7
sample[,2]=bones[,2]>=.5
print(table(sample[,1],sample[,2]))
MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
print(c(L,R,P))
}
}
Fig.05()
Figure
6 - As in Figure 5, but with 1,000 iterates of sampling from 500
individuals at left, right correlation values of 0.0, 0.7, and 0.999.
The iterates for correlation values of 0.0 and 0.7 are enclosed
by their convex hulls, while the iterates for correlation values of
0.999 are indicated with a line of identity.
Fig.06 = function (N = 500)
{
library(MASS)
set.seed(1234)
MLNI = 0
MNI = 0
r.sides=0
for(i in 1:1000){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)
MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
}
plot(MNI,MLNI,xlim=c(110,200),ylim=c(0,800),pch=19,cex=.2,
xlab='max(L,R)',ylab='MLNI')
lines(c(0,1000),rep(500,2),lty=2,lwd=2)
trace=chull(MNI,MLNI)
lines(MNI[c(trace,trace[1])],MLNI[c(trace,trace[1])])
r.sides=0.7
for(i in 1:1000){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)
MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
}
points(MNI,MLNI,pch=19,cex=.2)
trace=chull(MNI,MLNI)
lines(MNI[c(trace,trace[1])],MLNI[c(trace,trace[1])])
r.sides=0.999
for(i in 1:1000){
bones = mvrnorm(N,c(0,0),Sigma=matrix(c(1,rep(r.sides,2),1),nc=2))
L = sum(bones[,1]>=.7)
R = sum(bones[,2]>=.5)
P = sum(bones[,1]>=.7 & bones[,2]>=.5)
MLNI[i] = max(floor((L+1)*(R+1)/(P+1)-1),L+R-P)
MNI[i] = max(L,R)
}
print(median(MNI))
print(median(MLNI))
points(MNI[1:500],MLNI[1:500])
abline(0,1)
text(180,669,expression(paste(rho==0.0)))
text(180,300,expression(paste(rho==0.7)))
text(190,193,expression(paste(rho==0.999)),srt=5,adj=c(0,0))
text(191,512,'Actual N',adj=c(0,0))
}
Fig.06()
References
O'Brien, M., and C. B. Storlie. An alternative bilateral refitting model for zooarchaeological assemblages. Journal of Taphonomy 9 (2011): 245-268.