Code:
smap()(smartpca -p <(printf %s\\n genotypename:\ $1.bed snpname:\ $1.bim indivname:\ $1.fam evecoutname:\ $1.evec evaloutname:\ $1.eval "${@:2}"))
smap2()(smap "$@" numoutlieriter:\ 0 autoshrink:\ YES lsqproject:\ YES numoutevec:\ 20)
tav()(awk '{n[$1]++;for(i=2;i<=NF;i++){a[$1][i]+=$i}}END{for(i in a){o=i;for(j=2;j<=NF;j++)o=o FS sprintf("%f",a[i][j]/n[i]);print o}}' "FS=${1-$'\t'}")
smapa()(sed 1d $1.evec|awk NF--|tr : \ |awk 'NR==FNR{a[$2]=$1;next}{$1=a[$2]}1' $1.pick -|sed s/\ /:/|tr ' ' ,>$1.csv;sed 's/:[^,]*//' $1.csv|tav ,>$1.ave;awk 'NR==FNR{a[NR]=$0;next}{printf"%s",$1;for(i=2;i<=NF;i++)printf",%.6f",$i*a[i-1]^.5;print""}' $1.{eval,ave}>$1.avescaled)
smap3()(smap2 $1;smapa $1)
igno()(grep -Ev '\.REF|Ignore_|_outlier|_dup|_contam|_father|_mother|_son|_daughter|_brother|_sister|_sibling|_twin|\.rel\.|Neanderthal|Denisova|Vindija')
keep2()(plink --allow-no-sex --bfile $1 --keep <(awk 'NR==FNR{a[$2];next}$2 in a' $2.pick $1.fam) --make-bed --out $2)
x=latlong7;grep -v _dup v44.3_HO_public.anno|sort -t$'\t' -rnk15|awk -F\\t 'NR>1&&!a[$3]++&&$11>=45&&$12>=-22&&$15>=5e5&&$6==0{print$8,$2}'|igno|grep -Ev '_o|_lc'>$x.pick;keep2 v44.3_HO_public $x;smap3 $x
At first because there were duplicate samples, some of the clines on PCs 4 and further were dominated by populations with duplicate samples. Now I got more reasonable plots for the higher dimensions. Except maybe there's still duplicates among the Selkup or Ashkenazi samples.
Code:
library(tidyverse)
library(ggforce)
t=read.csv("latlong7.csv",row.names=1,header=F)
names(t)=paste0("PC",1:ncol(t))
pop=sub("\\.(DG|SDG|SG|WGA)","",sub(":.*","",rownames(t)))
centers=aggregate(t,by=list(pop),mean)
t$pop=pop
set.seed(1488)
color=as.factor(sample(seq(1,length(unique(t$pop)))))
col=rbind(c(60,80),c(25,95),c(30,70),c(60,100),c(20,50),c(70,50))
hues=max(ceiling(length(color)/nrow(col)),12)
pal1=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],x[1],x[2])))
pal2=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],.8*x[1],.3*x[2])))
ggplot(t,aes(x=PC1,y=PC2,group=-1L))+
ggforce::geom_voronoi_tile(aes(fill=color[as.factor(pop)],color=color[as.factor(pop)]),size=.07,max.radius=.003)+
geom_label(data=centers,aes(x=PC1,y=PC2,label=Group.1,color=color,fill=color),alpha=.7,size=2,label.r=unit(.08,"lines"),label.padding=unit(.08,"lines"),label.size=0)+
coord_fixed()+
scale_x_continuous(breaks=seq(-1,1,.02),expand=expansion(mult=.02))+
scale_y_continuous(breaks=seq(-1,1,.02),expand=expansion(mult=.02))+
scale_fill_manual(values=pal1)+
scale_color_manual(values=pal2)+
theme(
axis.text=element_text(color="black",size=6),
axis.ticks=element_blank(),
axis.ticks.length=unit(0,"pt"),
axis.title=element_text(color="black",size=8),
legend.position="none",
panel.background=element_rect(fill="white"),
panel.border=element_rect(color="gray85",fill=NA,size=.4),
panel.grid.major=element_line(color="gray85",size=.2)
)
ggsave("a.png",width=7,height=7)
system("mogrify -trim -bordercolor white -border 16x16 a.png")
Bookmarks