Komintasavalta
03-04-2021, 01:04 PM
Uralic:
$ mkdir g25;printf %s\\n ai\ 1UrhcfNMLW0oMXIbHGUE60v2taCM7PFw1 aa\ 1F2rKEVtu8nWSm7qFhxPU6UESQNsmA-sl mi\ 1HYrDwxEXv82DvDLoq736pS5ZTGJA4dn5 ma\ 1wZr-UOve0KUKo_Qbgeo27m-CQncZWb8y|while read l m;do curl "drive.google.com/uc?export=download&id=$m" -Lso g25/$l;done
$ dist(){ awk -F, 'NR==FNR{for(i=2;i<=NF;i++)a[i]=$i;next}$1{s=0;for(i=2;i<=NF;i++)s+=($i-a[i])^2;print s^.5,$1}' "$2" "$1"|sort -n|awk '{printf"%.3f %s\n",$1,$2}'|sed s,^0,,;}
$ grep -v _contam g25/aa|while read l;do dist g25/ma <(echo $l)|head -n1|awk 'NR==FNR{a[$0];next}$2 in a' <(printf %s\\n Besermyan Estonian Finnish Finnish_East Hungarian Ingrian Karelian Khanty Komi Mansi Mari Mordovian Nenets Nganassan Saami Saami_Kola Selkup Udmurt Vepsian) -|sed s/^/${l%%,*}\ /;done
Baltic_EST_IA .028 Estonian
Baltic_LVA_MN .168 Finnish_East
BEL_GoyetQ2 .198 Finnish_East
Corded_Ware_Baltic .046 Ingrian
Corded_Ware_Baltic_early .103 Ingrian
Corded_Ware_DEU .073 Finnish
Corded_Ware_POL .064 Finnish
Corded_Ware_POL_early .118 Finnish
FIN_Levanluhta_IA .018 Saami
Iberia_ElMiron .226 Finnish_East
Iberia_Northeast_UP_Azilian .281 Finnish_East
Iberia_Northwest_Meso .268 Finnish_East
Iberia_Southeast_Meso .247 Finnish_East
KAZ_Alpamsa_MLBA_Alakul .064 Besermyan
KAZ_Chanchar_LBA .066 Finnish
KAZ_Dali_EBA .159 Udmurt
KAZ_Dali_MLBA .084 Mordovian
KAZ_Georgievsky_LBA .091 Finnish
KAZ_Kairan_MLBA_o .080 Besermyan
KAZ_Karagash_MLBA .068 Ingrian
KAZ_Kumsay_EBA .158 Udmurt
KAZ_Kyzlbulak_MLBA1 .073 Finnish
KAZ_Maitan_MLBA_Alakul_o .109 Komi
KAZ_Mereke_MBA .154 Udmurt
KAZ_Mys_MLBA .075 Finnish
KAZ_Nomad_IA .066 Besermyan
KAZ_Oy_Dzhaylau_MLBA .073 Ingrian
KAZ_Oy_Dzhaylau_MLBA_o .141 Udmurt
KAZ_Satan_MLBA_Alakul .073 Finnish
KAZ_Shoendykol_MLBA_Fedorovo .071 Finnish
KAZ_Taldysay_MLBA1 .075 Finnish
KAZ_Tasbas_IA .079 Besermyan
KAZ_Zevakinskiy_BA .072 Mordovian
KAZ_Zevakinskiy_LBA .063 Besermyan
KAZ_Zevakinskiy_MLBA .068 Besermyan
MDA_Cimmerian .059 Besermyan
MNG_Chemurchek_EBA_2 .124 Udmurt
NOR_Meso .165 Finnish_East
NOR_N_HG .169 Finnish_East
POL_EBA .051 Ingrian
ROU_BA .082 Mordovian
RUS_AfontovaGora3 .237 Udmurt
RUS_Bolshoy_Oleni_Ostrov .082 Mansi
RUS_Bolshoy_Oleni_Ostrov_o .062 Khanty
RUS_Chalmny-Varre .024 Saami
RUS_Karasuk .061 Udmurt
RUS_Karelia_HG .175 Saami_Kola
RUS_Khvalynsk_En .143 Komi
RUS_Krasnoyarsk_BA .074 Nganassan
RUS_Krasnoyarsk_MLBA .065 Finnish
RUS_Krasnoyarsk_MLBA_o .080 Udmurt
RUS_MA1 .178 Udmurt
RUS_Mezhovskaya .054 Komi
RUS_Poltavka .124 Ingrian
RUS_Potapovka_MLBA .091 Ingrian
RUS_Potapovka_MLBA_o .131 Besermyan
RUS_Priobrazhenka_LBA .079 Besermyan
RUS_Samara_HG .172 Saami_Kola
RUS_Sidelkino_HG .159 Saami_Kola
RUS_Sintashta_MLBA_o1 .183 Udmurt
RUS_Sintashta_MLBA_o2 .112 Komi
RUS_Sintashta_MLBA_o3 .157 Udmurt
RUS_Sosonivoy_HG .190 Udmurt
RUS_Srubnaya_Alakul_MLBA .066 Finnish
RUS_Srubnaya_MLBA_o .150 Udmurt
RUS_Steppe_Maykop .158 Udmurt
RUS_Tagar .056 Besermyan
RUS_Tyumen_HG .207 Udmurt
RUS_Veretye_Meso .174 Saami_Kola
RUS_Volga-Kama_N .158 Finnish_East
SWE_PWC_NHG .152 Estonian
TJK_Dashti_Kozy_BA .084 Mordovian
UKR_Catacomb .113 Ingrian
UKR_Cimmerian .075 Komi
UKR_Dereivka_I_En2 .060 Finnish
UKR_EBA .099 Estonian
UKR_Meso .151 Finnish_East
UKR_N .157 Finnish_East
UZB_Kashkarchi_BA .078 Finnish
UZB_Kokcha_BA .086 Finnish
VK2020_NOR_North_LN_HG .166 Finnish_East
VK2020_NOR_North_VA_o1 .065 Saami
VK2020_NOR_North_VA_o2 .035 Komi
VK2020_SWE_Gotland_VA_o .023 Vepsian
Yamnaya_RUS_Kalmykia .126 Mordovian
Yamnaya_RUS_Samara .130 Ingrian
Yamnaya_UKR .123 Mordovian
Turkic:
ARM_LBA .037 Azeri_Dagestan
ARM_Lchashen_MBA .034 Azeri_Dagestan
ARM_MBA .037 Azeri_Dagestan
CHN_Shirenzigou_IA .065 Tatar_Siberian
CZE_Vestonice16 .149 Tatar_Mishar
Gepidian_SRB_ACD .062 Tatar_Lipka
HUN_Avar_Period .038 Turkish_Deliorman
HUN_Prescythian_IA .059 Turkish_Deliorman
Hun_Tian_Shan .058 Bashkir
IRN_Ganj_Dareh_Historic .032 Azeri_Turkey
KAZ_Kanai_MBA .123 Tatar_Siberian_Zabolotniye
KAZ_Karakhanid .034 Karakalpak
KAZ_Kazakh_steppe_EMBA .122 Tatar_Siberian_Zabolotniye
KAZ_Kimak .060 Tatar_Siberian
KAZ_Kipchak .041 Nogai
KAZ_Nomad_MA .034 Karakalpak
KAZ_Pazyryk_IA .027 Tubalar
KAZ_Turk .041 Uzbek
KGZ_Turk .031 Tubalar
Levant_Megiddo_MLBA_o1 .052 Turkish_East
Levant_Megiddo_MLBA_o2 .052 Azeri_Dagestan
MNG_Altai_MLBA .049 Tatar_Siberian
MNG_Center_West_LBA_5 .058 Bashkir
MNG_Chandman_IA .056 Bashkir
MNG_Early_Med_Turk .031 Kirghiz
MNG_Early_Med_Uigur .029 Uzbek
MNG_Early_Xiongnu_rest .039 Altaian
MNG_Early_Xiongnu_west .064 Bashkir
MNG_EIA_2 .056 Bashkir
MNG_EIA_5 .062 Tatar_Siberian
MNG_KHI001 .046 Tubalar
MNG_Khovsgol_BA_low_res .060 Tuvinian
MNG_Mongol .028 Kazakh_China
MNG_Mongun_Taiga_LBA_1 .077 Khakass_Kachins
MNG_Mongun_Taiga_LBA_3 .046 Bashkir
MNG_Munkhkhairkhan_MBA_2 .057 Tubalar
MNG_OLN007 .052 Salar
MNG_Pazyryk_EIA_6 .062 Bashkir
MNG_Sagly_EIA_4 .041 Bashkir
MNG_SHU002 .057 Salar
MNG_UUS001 .047 Kazakh
MNG_Xiongnu_Central_Asian .060 Tatar_Siberian
RUS_Altai_IA .046 Bashkir
RUS_Baikal_MA .078 Tuvinian
RUS_BZK002 .104 Shor_Khakassia
RUS_Khaptsagai_IA .063 Tatar_Siberian
RUS_Marinskaya_IA .050 Bashkir
RUS_Nomad_MA .032 Bashkir
RUS_Okunevo_BA .100 Tatar_Siberian_Zabolotniye
RUS_Yakutia_LUP .096 Shor_Mountain
RUS_Yana_MA .031 Yakut
Saka_Kazakh_steppe .044 Bashkir
Saka_Tian_Shan .073 Tatar_Lipka
Scythian_Aldy_Bel_IA .043 Bashkir
Scythian_RUS_Urals .053 Bashkir
Scythian_Zevakino_Chilikta_IA .048 Bashkir
TUR_IA .022 Turkish_Northwest
TUR_Ottoman .041 Turkmen_Uzbekistan
Turkic PCA:
https://i.ibb.co/sCTVByf/closestisturkic.png
Uralic PCA:
https://i.ibb.co/NrVghxX/closestisuralic.png
Uralic PCA without WHGs, kra001, or Nganasan:
https://i.ibb.co/sFwYwJ5/closestisuralicsmaller.png
Code to generate PCA:
library(tidyverse)
library(colorspace)
t=read.csv("g25-scaled-lines-to-plot",header=F,row.names=1,check.names=F)
k=cutree(hclust(dist(t)),k=12)
p=prcomp(t)
p2=as.data.frame(p$x)
p2$cluster=as.vector(k)
pct=paste0(colnames(p$x)," (",sprintf("%.1f",p$sdev/sum(p$sdev)*100),"%)")
ggplot(p2,aes(x=PC1,y=PC2))+
geom_point(aes(color=as.factor(cluster)),size=.5)+
geom_polygon(data=p2%>%group_by(cluster)%>%slice(chull(PC1,PC2)),alpha=.2,aes(color=as.facto r(cluster),fill=as.factor(cluster)),size=.3)+
geom_text(label=rownames(p2),aes(color=as.factor(c luster)),size=2.5,vjust=-.7)+
theme(
aspect.ratio=3/4,
legend.position="none",
axis.ticks.x=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.length=unit(0,"pt"),
plot.background=element_rect(fill="white"),
panel.background=element_rect(fill="white"),
panel.grid.major=element_line(color="gray75",size=.2),
panel.grid.minor=element_line(color="gray90",size=.13),
text=element_text(color="black"),
axis.text=element_text(color="black")
)+
scale_x_continuous(breaks=seq(-1,1,.1),minor_breaks=seq(-1,1,.02),expand=expansion(mult=.12))+
scale_y_continuous(breaks=seq(-1,1,.1),minor_breaks=seq(-1,1,.02),expand=expansion(mult=.04))+
xlab(pct[1])+ylab(pct[2])+
scale_color_discrete_qualitative(palette="Set 2",c=80,l=40)+
ggsave("output.png")
system("/usr/local/bin/mogrify -trim -bordercolor white -border 16x16 output.png")
$ mkdir g25;printf %s\\n ai\ 1UrhcfNMLW0oMXIbHGUE60v2taCM7PFw1 aa\ 1F2rKEVtu8nWSm7qFhxPU6UESQNsmA-sl mi\ 1HYrDwxEXv82DvDLoq736pS5ZTGJA4dn5 ma\ 1wZr-UOve0KUKo_Qbgeo27m-CQncZWb8y|while read l m;do curl "drive.google.com/uc?export=download&id=$m" -Lso g25/$l;done
$ dist(){ awk -F, 'NR==FNR{for(i=2;i<=NF;i++)a[i]=$i;next}$1{s=0;for(i=2;i<=NF;i++)s+=($i-a[i])^2;print s^.5,$1}' "$2" "$1"|sort -n|awk '{printf"%.3f %s\n",$1,$2}'|sed s,^0,,;}
$ grep -v _contam g25/aa|while read l;do dist g25/ma <(echo $l)|head -n1|awk 'NR==FNR{a[$0];next}$2 in a' <(printf %s\\n Besermyan Estonian Finnish Finnish_East Hungarian Ingrian Karelian Khanty Komi Mansi Mari Mordovian Nenets Nganassan Saami Saami_Kola Selkup Udmurt Vepsian) -|sed s/^/${l%%,*}\ /;done
Baltic_EST_IA .028 Estonian
Baltic_LVA_MN .168 Finnish_East
BEL_GoyetQ2 .198 Finnish_East
Corded_Ware_Baltic .046 Ingrian
Corded_Ware_Baltic_early .103 Ingrian
Corded_Ware_DEU .073 Finnish
Corded_Ware_POL .064 Finnish
Corded_Ware_POL_early .118 Finnish
FIN_Levanluhta_IA .018 Saami
Iberia_ElMiron .226 Finnish_East
Iberia_Northeast_UP_Azilian .281 Finnish_East
Iberia_Northwest_Meso .268 Finnish_East
Iberia_Southeast_Meso .247 Finnish_East
KAZ_Alpamsa_MLBA_Alakul .064 Besermyan
KAZ_Chanchar_LBA .066 Finnish
KAZ_Dali_EBA .159 Udmurt
KAZ_Dali_MLBA .084 Mordovian
KAZ_Georgievsky_LBA .091 Finnish
KAZ_Kairan_MLBA_o .080 Besermyan
KAZ_Karagash_MLBA .068 Ingrian
KAZ_Kumsay_EBA .158 Udmurt
KAZ_Kyzlbulak_MLBA1 .073 Finnish
KAZ_Maitan_MLBA_Alakul_o .109 Komi
KAZ_Mereke_MBA .154 Udmurt
KAZ_Mys_MLBA .075 Finnish
KAZ_Nomad_IA .066 Besermyan
KAZ_Oy_Dzhaylau_MLBA .073 Ingrian
KAZ_Oy_Dzhaylau_MLBA_o .141 Udmurt
KAZ_Satan_MLBA_Alakul .073 Finnish
KAZ_Shoendykol_MLBA_Fedorovo .071 Finnish
KAZ_Taldysay_MLBA1 .075 Finnish
KAZ_Tasbas_IA .079 Besermyan
KAZ_Zevakinskiy_BA .072 Mordovian
KAZ_Zevakinskiy_LBA .063 Besermyan
KAZ_Zevakinskiy_MLBA .068 Besermyan
MDA_Cimmerian .059 Besermyan
MNG_Chemurchek_EBA_2 .124 Udmurt
NOR_Meso .165 Finnish_East
NOR_N_HG .169 Finnish_East
POL_EBA .051 Ingrian
ROU_BA .082 Mordovian
RUS_AfontovaGora3 .237 Udmurt
RUS_Bolshoy_Oleni_Ostrov .082 Mansi
RUS_Bolshoy_Oleni_Ostrov_o .062 Khanty
RUS_Chalmny-Varre .024 Saami
RUS_Karasuk .061 Udmurt
RUS_Karelia_HG .175 Saami_Kola
RUS_Khvalynsk_En .143 Komi
RUS_Krasnoyarsk_BA .074 Nganassan
RUS_Krasnoyarsk_MLBA .065 Finnish
RUS_Krasnoyarsk_MLBA_o .080 Udmurt
RUS_MA1 .178 Udmurt
RUS_Mezhovskaya .054 Komi
RUS_Poltavka .124 Ingrian
RUS_Potapovka_MLBA .091 Ingrian
RUS_Potapovka_MLBA_o .131 Besermyan
RUS_Priobrazhenka_LBA .079 Besermyan
RUS_Samara_HG .172 Saami_Kola
RUS_Sidelkino_HG .159 Saami_Kola
RUS_Sintashta_MLBA_o1 .183 Udmurt
RUS_Sintashta_MLBA_o2 .112 Komi
RUS_Sintashta_MLBA_o3 .157 Udmurt
RUS_Sosonivoy_HG .190 Udmurt
RUS_Srubnaya_Alakul_MLBA .066 Finnish
RUS_Srubnaya_MLBA_o .150 Udmurt
RUS_Steppe_Maykop .158 Udmurt
RUS_Tagar .056 Besermyan
RUS_Tyumen_HG .207 Udmurt
RUS_Veretye_Meso .174 Saami_Kola
RUS_Volga-Kama_N .158 Finnish_East
SWE_PWC_NHG .152 Estonian
TJK_Dashti_Kozy_BA .084 Mordovian
UKR_Catacomb .113 Ingrian
UKR_Cimmerian .075 Komi
UKR_Dereivka_I_En2 .060 Finnish
UKR_EBA .099 Estonian
UKR_Meso .151 Finnish_East
UKR_N .157 Finnish_East
UZB_Kashkarchi_BA .078 Finnish
UZB_Kokcha_BA .086 Finnish
VK2020_NOR_North_LN_HG .166 Finnish_East
VK2020_NOR_North_VA_o1 .065 Saami
VK2020_NOR_North_VA_o2 .035 Komi
VK2020_SWE_Gotland_VA_o .023 Vepsian
Yamnaya_RUS_Kalmykia .126 Mordovian
Yamnaya_RUS_Samara .130 Ingrian
Yamnaya_UKR .123 Mordovian
Turkic:
ARM_LBA .037 Azeri_Dagestan
ARM_Lchashen_MBA .034 Azeri_Dagestan
ARM_MBA .037 Azeri_Dagestan
CHN_Shirenzigou_IA .065 Tatar_Siberian
CZE_Vestonice16 .149 Tatar_Mishar
Gepidian_SRB_ACD .062 Tatar_Lipka
HUN_Avar_Period .038 Turkish_Deliorman
HUN_Prescythian_IA .059 Turkish_Deliorman
Hun_Tian_Shan .058 Bashkir
IRN_Ganj_Dareh_Historic .032 Azeri_Turkey
KAZ_Kanai_MBA .123 Tatar_Siberian_Zabolotniye
KAZ_Karakhanid .034 Karakalpak
KAZ_Kazakh_steppe_EMBA .122 Tatar_Siberian_Zabolotniye
KAZ_Kimak .060 Tatar_Siberian
KAZ_Kipchak .041 Nogai
KAZ_Nomad_MA .034 Karakalpak
KAZ_Pazyryk_IA .027 Tubalar
KAZ_Turk .041 Uzbek
KGZ_Turk .031 Tubalar
Levant_Megiddo_MLBA_o1 .052 Turkish_East
Levant_Megiddo_MLBA_o2 .052 Azeri_Dagestan
MNG_Altai_MLBA .049 Tatar_Siberian
MNG_Center_West_LBA_5 .058 Bashkir
MNG_Chandman_IA .056 Bashkir
MNG_Early_Med_Turk .031 Kirghiz
MNG_Early_Med_Uigur .029 Uzbek
MNG_Early_Xiongnu_rest .039 Altaian
MNG_Early_Xiongnu_west .064 Bashkir
MNG_EIA_2 .056 Bashkir
MNG_EIA_5 .062 Tatar_Siberian
MNG_KHI001 .046 Tubalar
MNG_Khovsgol_BA_low_res .060 Tuvinian
MNG_Mongol .028 Kazakh_China
MNG_Mongun_Taiga_LBA_1 .077 Khakass_Kachins
MNG_Mongun_Taiga_LBA_3 .046 Bashkir
MNG_Munkhkhairkhan_MBA_2 .057 Tubalar
MNG_OLN007 .052 Salar
MNG_Pazyryk_EIA_6 .062 Bashkir
MNG_Sagly_EIA_4 .041 Bashkir
MNG_SHU002 .057 Salar
MNG_UUS001 .047 Kazakh
MNG_Xiongnu_Central_Asian .060 Tatar_Siberian
RUS_Altai_IA .046 Bashkir
RUS_Baikal_MA .078 Tuvinian
RUS_BZK002 .104 Shor_Khakassia
RUS_Khaptsagai_IA .063 Tatar_Siberian
RUS_Marinskaya_IA .050 Bashkir
RUS_Nomad_MA .032 Bashkir
RUS_Okunevo_BA .100 Tatar_Siberian_Zabolotniye
RUS_Yakutia_LUP .096 Shor_Mountain
RUS_Yana_MA .031 Yakut
Saka_Kazakh_steppe .044 Bashkir
Saka_Tian_Shan .073 Tatar_Lipka
Scythian_Aldy_Bel_IA .043 Bashkir
Scythian_RUS_Urals .053 Bashkir
Scythian_Zevakino_Chilikta_IA .048 Bashkir
TUR_IA .022 Turkish_Northwest
TUR_Ottoman .041 Turkmen_Uzbekistan
Turkic PCA:
https://i.ibb.co/sCTVByf/closestisturkic.png
Uralic PCA:
https://i.ibb.co/NrVghxX/closestisuralic.png
Uralic PCA without WHGs, kra001, or Nganasan:
https://i.ibb.co/sFwYwJ5/closestisuralicsmaller.png
Code to generate PCA:
library(tidyverse)
library(colorspace)
t=read.csv("g25-scaled-lines-to-plot",header=F,row.names=1,check.names=F)
k=cutree(hclust(dist(t)),k=12)
p=prcomp(t)
p2=as.data.frame(p$x)
p2$cluster=as.vector(k)
pct=paste0(colnames(p$x)," (",sprintf("%.1f",p$sdev/sum(p$sdev)*100),"%)")
ggplot(p2,aes(x=PC1,y=PC2))+
geom_point(aes(color=as.factor(cluster)),size=.5)+
geom_polygon(data=p2%>%group_by(cluster)%>%slice(chull(PC1,PC2)),alpha=.2,aes(color=as.facto r(cluster),fill=as.factor(cluster)),size=.3)+
geom_text(label=rownames(p2),aes(color=as.factor(c luster)),size=2.5,vjust=-.7)+
theme(
aspect.ratio=3/4,
legend.position="none",
axis.ticks.x=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.length=unit(0,"pt"),
plot.background=element_rect(fill="white"),
panel.background=element_rect(fill="white"),
panel.grid.major=element_line(color="gray75",size=.2),
panel.grid.minor=element_line(color="gray90",size=.13),
text=element_text(color="black"),
axis.text=element_text(color="black")
)+
scale_x_continuous(breaks=seq(-1,1,.1),minor_breaks=seq(-1,1,.02),expand=expansion(mult=.12))+
scale_y_continuous(breaks=seq(-1,1,.1),minor_breaks=seq(-1,1,.02),expand=expansion(mult=.04))+
xlab(pct[1])+ylab(pct[2])+
scale_color_discrete_qualitative(palette="Set 2",c=80,l=40)+
ggsave("output.png")
system("/usr/local/bin/mogrify -trim -bordercolor white -border 16x16 output.png")