Code:
t=read.csv("https://pastebin.com/raw/UY1Em6qW",r=1)/100 # K13 original
fst=as.dist(read.csv(text=",North_Atlantic,Baltic,West_Med,West_Asian,East_Med,Red_Sea,South_Asian,East_Asian,Siberian,Amerindian,Oceanian,Northeast_African,Sub-Saharan
North_Atlantic,,,,,,,,,,,,,
Baltic,19,,,,,,,,,,,,
West_Med,28,36,,,,,,,,,,,
West_Asian,26,32,36,,,,,,,,,,
East_Med,26,35,28,21,,,,,,,,,
Red_Sea,52,62,50,48,39,,,,,,,,
South_Asian,64,65,76,57,60,82,,,,,,,
East_Asian,114,114,122,110,111,127,76,,,,,,
Siberian,111,111,123,109,112,130,83,56,,,,,
Amerindian,138,137,154,138,144,161,120,113,105,,,,
Oceanian,179,181,187,177,176,191,146,166,177,217,,,
Northeast_African,122,127,124,116,108,121,113,145,151,185,203,,
Sub-Saharan,146,150,150,140,135,141,133,164,170,204,220,41,",r=1))/1000
t2=as.matrix(t)%*%as.matrix(fst)
sort(as.matrix(dist(t2))[,"Mari"])
For example multiplying by the FST matrix moves Maris closer to Central Asians and South Asians but further from Europeans and Siberians, because it causes the distances between Eurasian populations to be largely determined by the Mongoloid-Caucasoid axis. It moves Maris closer to Turkmens and further from Kets and Selkups, which matches the results of f2. However I don't know if it's the right method to account for FST, because it sometimes gives weird results. For example it moves Maris closer to Balochi and Makrani than to Estonians, and it also moves Maris closer to Jordanians than to Bulgarians:
Code:
library(tidyverse)
library(ggforce)
k13=read.csv("https://pastebin.com/raw/aLBEQ2cu",r=1,check=F)/100
f2=read.csv("https://drive.google.com/uc?export=download&id=1qnXblYFWLFnOiEj-NbjCVkHcGIsGe64R",r=1)
# g25=read.csv("https://drive.google.com/uc?export=download&id=1wZr-UOve0KUKo_Qbgeo27m-CQncZWb8y",r=1) # modern averages scaled
k13fst=as.dist(read.csv(text=",North_Atlantic,Baltic,West_Med,West_Asian,East_Med,Red_Sea,South_Asian,East_Asian,Siberian,Amerindian,Oceanian,Northeast_African,Sub-Saharan
North_Atlantic,,,,,,,,,,,,,
Baltic,19,,,,,,,,,,,,
West_Med,28,36,,,,,,,,,,,
West_Asian,26,32,36,,,,,,,,,,
East_Med,26,35,28,21,,,,,,,,,
Red_Sea,52,62,50,48,39,,,,,,,,
South_Asian,64,65,76,57,60,82,,,,,,,
East_Asian,114,114,122,110,111,127,76,,,,,,
Siberian,111,111,123,109,112,130,83,56,,,,,
Amerindian,138,137,154,138,144,161,120,113,105,,,,
Oceanian,179,181,187,177,176,191,146,166,177,217,,,
Northeast_African,122,127,124,116,108,121,113,145,151,185,203,,
Sub-Saharan,146,150,150,140,135,141,133,164,170,204,220,41,",r=1))/1000
pop=intersect(rownames(f2),rownames(k13))
# pop=intersect(rownames(g25),rownames(k13))
k13=k13[pop,]
f2=f2[pop,pop]
# g25=g25[pop,]
k13mult=as.matrix(k13)%*%as.matrix(k13fst)
xy=data.frame(x=rank(f2[,"Mari"]),y=rank(as.matrix(dist(k13))[,"Mari"]))
# xy=data.frame(x=rank(as.matrix(dist(g25))[,"Mari"]),y=rank(as.matrix(dist(k13mult))[,"Mari"]))
xy$k=as.factor(cutree(hclust(as.dist(f2)),16))
# xy$k=as.factor(cutree(hclust(dist(g25)),16))
ggplot(xy,aes(x,y))+
ggforce::geom_mark_hull(aes(color=k,fill=k),concavity=1000,radius=unit(.15,"cm"),expand=unit(.15,"cm"),alpha=.2,size=.15)+
geom_abline(linetype="dashed",color="gray80",size=.3)+
geom_point(aes(color=k),size=.5)+
geom_text(aes(color=k),label=rownames(xy),size=2,vjust=-.7)+
scale_x_continuous(breaks=seq(1,200,10),expand=expansion(mult=c(.04,.04)))+
scale_y_continuous(breaks=seq(1,200,10),expand=expansion(mult=c(.04,.04)))+
scale_fill_manual(values=rainbow_hcl(nlevels(xy$k),90,60))+
scale_color_manual(values=rainbow_hcl(nlevels(xy$k),90,60))+
labs(x="Rank of f2 distance to Mari",y="Rank of K13 distance to Mari, not multiplied by FST")+
theme(
axis.text=element_text(size=6),
axis.text.y=element_text(angle=90,vjust=1,hjust=.5),
axis.ticks=element_blank(),
axis.ticks.length=unit(0,"cm"),
axis.title=element_text(size=8),
legend.position="none",
panel.background=element_rect(fill="white"),
panel.border=element_rect(color="gray85",fill=NA,size=.6),
panel.grid.major=element_line(color="gray85",size=.2),
plot.background=element_rect(fill="white"),
plot.subtitle=element_text(size=7),
plot.title=element_text(size=11)
)
ggsave("1.png",w=6,h=6)
However when you multiply the matrix of admixture percentages by the FST matrix, it makes a global PCA based on the K13 spreadsheet have the conventional shape where on PC1 and PC2, the other major cline is between Africans and Europeans:
Bookmarks