From 60a3ddec67fa1b3f5a10e8c63cefff7e2aadb598 Mon Sep 17 00:00:00 2001 From: Tobias Popp <PoppTo72658@th-nuernberg.de> Date: Wed, 25 Nov 2020 14:26:11 +0100 Subject: [PATCH] Initial commit --- .DS_Store | Bin 0 -> 14340 bytes egs/.DS_Store | Bin 0 -> 10238 bytes egs/digits | 1 + egs/kaldi_toy_example | 1 + egs/voxceleb/.DS_Store | Bin 0 -> 6146 bytes egs/voxceleb/v1/.DS_Store | Bin 0 -> 6147 bytes egs/voxceleb/v2/.DS_Store | Bin 0 -> 6147 bytes egs/voxceleb/v2/local/make_voxceleb1.pl | 130 +++++++++++++++ egs/voxceleb/v2/local/make_voxceleb1_v2.pl | 125 ++++++++++++++ egs/voxceleb/v2/local/make_voxceleb2.pl | 70 ++++++++ .../nnet3/xvector/prepare_feats_for_egs.sh | 84 ++++++++++ .../v2/local/nnet3/xvector/run_xvector.sh | 1 + .../nnet3/xvector/tuning/run_xvector_1a.sh | 155 ++++++++++++++++++ egs/voxceleb/v2/local/prepare_for_eer.py | 19 +++ egs/voxceleb/v2/{local => local2} | 0 egs/voxforge/.DS_Store | Bin 0 -> 6147 bytes egs/voxforge/s5/.DS_Store | Bin 0 -> 6147 bytes egs/wsj/.DS_Store | Bin 0 -> 6147 bytes src/.DS_Store | Bin 0 -> 6148 bytes src/matrix/t.bin | Bin 0 -> 160 bytes tools/.DS_Store | Bin 0 -> 6148 bytes 21 files changed, 586 insertions(+) create mode 100644 .DS_Store create mode 100644 egs/.DS_Store create mode 160000 egs/digits create mode 160000 egs/kaldi_toy_example create mode 100644 egs/voxceleb/.DS_Store create mode 100644 egs/voxceleb/v1/.DS_Store create mode 100644 egs/voxceleb/v2/.DS_Store create mode 100755 egs/voxceleb/v2/local/make_voxceleb1.pl create mode 100755 egs/voxceleb/v2/local/make_voxceleb1_v2.pl create mode 100755 egs/voxceleb/v2/local/make_voxceleb2.pl create mode 100755 egs/voxceleb/v2/local/nnet3/xvector/prepare_feats_for_egs.sh create mode 120000 egs/voxceleb/v2/local/nnet3/xvector/run_xvector.sh create mode 100755 egs/voxceleb/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh create mode 100755 egs/voxceleb/v2/local/prepare_for_eer.py rename egs/voxceleb/v2/{local => local2} (100%) create mode 100644 egs/voxforge/.DS_Store create mode 100644 egs/voxforge/s5/.DS_Store create mode 100644 egs/wsj/.DS_Store create mode 100644 src/.DS_Store create mode 100644 src/matrix/t.bin create mode 100644 tools/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..983053dc788bd10926c6731ab98d714eebd335be GIT binary patch literal 14340 zcmZQzU|@7AO)+F(FkoO{;9!8z0z3>1EL;o>3Oo!b03;8>D1smkGsqkU1_nU}1_ovZ z1_1_!<ecI%=cJtcB#`td9u0xf5E#TEzy!^|Ea3cG%us}!f0?!=735?Vmlzn_Vq{`w zVP#|I;N;@w=9O~y@bvQbi5CzkElv$e%}FfDEK7AsEJ-bn7vL|>$S?Oy&d&=dN-fJw zEe}u5&neB#D~=ZsOis*6F3m|SNp;N03C^s7s7@}*&&dhMFU~B<%+HIAC`qj-0m*?3 zLXnDyD$Xw|aRwU}k(r#I2T>&ivctasq`5dOwWv5VKaZ1hHJ`YQqN<j@iKVU66@CFh zAz=}2?x@U^)Vz|+w9M3^(DKZbl8ng2;^fr4l+3*J@UqO}%%q&uki5j)R6#}^PEN2c z2}vobh|;9Y!qUu=N&#gK4zQ@KoD_FVN@7W>Z+;5Mv=o7@91!(Na#B*!AQ|VP)Wnk1 z6sQI@bt&$U;>@a4fkdc6Z5?jzknGI76oF!>RR)IKQlUACNvSykDVz{{OwG8Z!pk6D z67Yr!T8VH=MI`6v=BDPA6hp=A?4_jQK-T&s7MJ)Jq~@ij2%Lc0?JOrH6$z4aOi2Mb zh(p<-U;{WgDm!R^X$EEc4-f{}_sR~S>VScPL4c8ggF%+TfWe8uiy@AofT5D1n_(Km ze1<IyTN$=79AY@laE;*(!!w3=44)akF??tE!N|zS#mK|R%gDzl&Zxqu#;DGy!Dz^6 z%V@`F&*;JE%jnM-z!=CF#Td_+$e6^K%$UPi#8|>u%2>u&&)Ck`$=Jo%%{YZ|F5^7L z`HU+Vw=?c!+{L(?@d)EN#`BC97%wv3VtmZ_hVdigFNk}Y7$G!+FO-H-jDHZ(&cj2+ zXy+j{+Ii5UorecK+IcXeod;XA^H3V?6_9vEL^~w3pkc_t4dITGqaiRF0;3@S4*^j5 zpa5+nI6&zJ2n~{AU}RtbH6cJWs0jhq#|Z8RFo48BT0t~OD~JYZWncucz-EB8GB7eg zw1R3JkO7RKZUk5!tet_80j!;YkpZlofe~yo0|O&OI|Cy`I|C!Mhr%cTGKGPG5u%-e z5uzO;I7*I&z-S1dga9bpFf+i~{soY}JJW3JZGRbX+h23gw*7DNK^pv`V&V#_T6)G7 zwodQ{Gg>Pdr8$S(a0fTrAkA@bdkxkM2e*)~NJvUa%gA!e!+H(qy@}Awyp+@m4p2Wr zfE}V*Nk)Z3-~@*N2bjyPE~CjQu$4oA5zLiQm(k_m5MTmxxD8~CI0X_p1en2G8B=a^ z4uKR-0ah@V+lt$UQ=phrfCbEzvFCQ=5b)*{U;}fxWw~8BIRzMqZ{*``;-6%=#PE>e z9m6k%|BS4RT#Q1D;*1K68jO037T_km6QeVuE2AHyKVu+c5Mu~qEMpvF0%Ib$VV}#G z$5_Bv$ymi$!&u8$&)C7($=J=<!`R0-opA=^EXLW4a~YR0E@xcHxQcNN<5tFP(B}SO z#xsmp8E-+7C%l301L4C+#>a@xYFThmUQT{qI=H%EU|?k^VaR95XUJhFPAV^kCS`_` zu%vt)){W)h;^5-tiVe=lFApwBEGaE^N-T;7@d6S{GLoR|ko^1{I6E;ZEHkw{UO>b- zKd&S)GcUCWq&zq?B{eCrC?-5JFD1X+DZex?r5J1uRDy$plY=u}K%%<Z%-ldn!N}CG zR!5=Q9K<#;Hmj}W<PcXiwDnBLt*ol9sjZs{@gl5(W8jDI;p8j^6fYu1+L*4uyolUW z=i!z0^7ird<B1o*(oY6;vFX}Z#~3XTU<7s5<yACvjm&KvT*2LI9uXcfTsaG=FUJYW zUXc7K1j&%H==l(43T79cM^RQ8lmSr+0I=<{in8h)9LQw>SeQpkRtJ<T!36_EM4!hH zloP?_16V}XgvSh&RUzdDM97lI8k9d_<p)GigvXwP6J91k#GGVZKm`O^0d)oBH8(|B zS+p*EBBVis-j`!w00#)X3(pD8-bM^A3_c7A3`Gpp480697#1>YVc5p7o#8OU8HOti zw-}x<yl42p@R{KY!%s#gMh-?HMkz*RMpbbC-GI@M(U{SM(UH-M(VNkSF#_CkPhm`D z%wjBNEMY8TEN84?Y-MZ%XYhW;8H_U-XEV-WT*A1PaUJ7&#vP0Y84ocYVLZxsh4C)q zbH*2pFBxAkerEi`#LC3QBn(N{@N6Cc;loHK5scIh&CuBECLUgSH&Ep?h*LXi<)m+7 zX@|3NLQaF=WCuxC;PeJdPvGcAO~jDAz#|W-gm7gE0d`RKfz(EdO7ijopu`8^s;SG% z3or?6<$!Rtb>w*jn871gAfpWodE^CH!DCb)uBjOhj{pmJ6br<)vgVN&U;__Ufw=av zJUjvnoScx#NnM_YgOfwqfhvjj6vJhPN8q8SpA7#P*%-MQB^VVM)fsge%^B?(9T{C1 zT^ZfLHB&HS2xBO?W&#a4r7-3)<}(&D7BLnx)-cvFHZV3aHZgWH_A>S}PGFqKIE!%( zc=%}n<4VTWjB6Ozf@`K7j5`?*F`j0;!g!PM5hQuS6D_v738@xBtDo2z${8{l@)%MW z@)^npT^(g?Y^tMRY+^!j9fjsiv_OD_;3zp70;3@?8UmvsFwh|Y>i@GcBs1hPBr;?( zqz<x<lZmORj)IY~DaDl%k{45olXH^t^K(EUk0gv%Pcbv3GNcc>N}6gNNvQXbI+0kX zdwC$!y*xfhv##`*<i$F|grysc=<Z;d_8p!7gH8M5nE!+J89AZTy=n{=3=Rw-3~3Dc z42=vu44~=Wy$t&pK<({&3{M!|F??hA&G3)mKO+MpC!-Lf2%{*Y7^6I+4x=8UKBEDn zCAi(~&KSTL${5ZV!5GPy#F);Q$(Y5M%~%2*4XR^oXY6O3$T*2{GUFV^rQmU(4UGH2 zT}9CN&o##Dj5k0nZN}G(Ul@N9p6&&Y27$9`PH`E5E+TmNl1FpUr)AWhBJGATdB`8e z<iRtH$%8(O$%8zM$z#QXI*iGKJdDXB%L5z6B#>yq;~Th=>_vuK3@;hJF)}f-GxCB* zHRKsJ7!4TB7#$eh7~R1m8orESjNy!tj8Tj+jH!%ij2VoX;DicVlTZQf8n!UDGPZ-e zhLahmFivBf&$xhb5#wUUrQq?6jf|TZw=nKy+y_m*pf2KFqPmFC!WW~)0gu8D%NlsJ z|Ifg{fYvb{WDz~u{|7Y)NAYL~jE2By2#kmjV1@Y70JQ$km4N}*`hSS3QF1f{Mnhm& zh5#drOR$R*bQJ|QAA;7}f!5O{KqWzI@IcG)7{TlOzywGMSe6O0p~wQvff9@ikX3w) f43JfP1GWgz!_#ZD|DT%*o|Ki59qs>*;Ql`ViNRlz literal 0 HcmV?d00001 diff --git a/egs/.DS_Store b/egs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e04067a2d4553d65b59f96c8f73421ef6bf15192 GIT binary patch literal 10238 zcmZQzU|@7AO)+F(P+(wS;9!8z0z3>@0Z1N%F(jFwB3vNz7#IW?7)lt5lgf(=l5+Bs zpmL+sXb6mkz-S1JhQMeDjE2DA3IRrlb2zvm^e8zR0;3@?8UpYT0F@7*4g!dFfVA-$ z8Xz=Ch=GxT0o(;(WME){X<`KT0~kPZAgv%8q!mPiv@$S)SYR{2S{WFjS{cFJ5Rg6r zuu+WQt_g?_*3Q5PwwZx}5o|L910zH`10&RCMraR(5u%-e5o|jH10zH`*vwI4Gz3ON z05t?aeRnp76oyQObcRf{{`<+Kf}G6a5(9(lj7-cdtZeKY99$e+T(Q9!`Q^bSi6y1Q zPKia)AYMRXNk$Ts9g?4)17{~Dg=MCe#|wx!=jW9qX6B_9fs_Ylrlck%7R7{T=B4D9 zJLQ+=r4)nBfl6?2aB^_Q3rJK~o0%KvC>WU<*6Ju!n}gUU#%8s(oE+k+hPIvwxs_Gb zHMMm!Azp$N4GjDcKAfDzz`%gw$DHCaXUG@@(`uL>Z^8V?$;HjXE9LIt?&-x7FCbKw znOg2&P?DLSR~(jFRGgWg7canHoRMGdnVg>&P?QRi4^Pg|Db39*ju#M2PRvOz%}Fdt zb<D{L&a6r;2B}Ui%FoFO$S=+W850>%l3Gy$lEaXSh$_x6DscuI7Ll2pp9fJTP+FWC zl$w)Rl3AAOl30>jEWpUQnonFtQB_Od#M0L33crA$kgy03PgG_~YF<fZT4ridXnAHz zNk(L1adK*2N@iYqcv)s~W>QXSNM2%Ysvx5dCns2!grt;IL}^lHVQFSbrGPRA2Ut{A zPKqZcC9x#cH$MerT8h9{4v2asIVq`Vkc@LtYGO%h3RHudx)e`Hab{JjKq6G3whj+Z zNOopkia;^cDg#3vsnDFnq|_XN6i$dere-`+;bo9e74U`%T8Z#TMI`6v=BDPA6hp=A z?4_jQK-T&s7MJ)Jq~@ij2%Lc0?JOrH6$z4aOi2Mbh(j3`@yZSw5GL5u$_@q)9w!3_ zgDis)gA0QXLjpq)Lp4J$!wiOn3|knsF>Gfz%y5R`I>TLt=M3)|zA$`e_`&d#k%^I; zk(ZH=k)Kh5QI%1hQG-#F(TLHG(Vo$P(UZ}SF@Q0UF^DmmF@Z6OF_|%iF_*EJv6Qil zv7E7iv4gRTv751naVq0H#`%m37*{gxVBE#Hn{f~0QO5I(7Z@)xUShn>_=NE#<0pvo zn4mQrgAarcBN;zqNygBjHl_*Kld+7OyPJpSAWp_N`5<XmR7_k!Ra4i<+{O_e_h=E0 zk|2@OAUNScQWiMH!BP}B)~`rNN=eJe^2oz70D8s<&CE+lt>6G<2?2J9Y9$#J4uKOK z0vup2kGc#ur@&SSM@C&nhl4|a2_mB}W5_9x$RWTC=E|7xm~jZCa0;-3xjdFU)|>*x zoB}Lhu8bXz1BZY&rvMw6%OlI<!pSMXKw)Y<#c-M75yN|i-;4~5Y>eED!i*A(ij10! z`iz#~)alIV!sy26<cM#2CyN${5EO&zQ)V#F)*P!<fsM&sf1&$ym);!&t}I4o#`O zjMEsWGtOk3#W;s?DdRH66^ttxS2J#5+{(C(aR=j0#zTy!880y2grrJ%TJ?tTVI<=% z^t4(QT$GoSpO+49`hmur%NX(*Dj1R(QW<g>QW=tvhK&b7wPRvxs-s|JY)Wyp!@+>& zPaNTp#*oiY#E{OAiWCkfV0n6gs$cQyYC~fa9R*W!yumO766rW=Ur_A~3kJA1(Sm`Q zp`4+ZA!~Sg9_l$XFGE6YlpGC#(GVC7fzc2c4FS4_05ilk1<=^PD+2?rxqpbNQF1f{ zMnhm&h5#drOR$R*v{#1BhoEtG&{%o`R1!2g0UF6?1dYu@^nsLshV{Wi@l2r39RmY+ o<b{!e0XpUaRtz4BXJmkk<PX?H0P?8VXy<>l^N%s+Jls3~08(W`CIA2c literal 0 HcmV?d00001 diff --git a/egs/digits b/egs/digits new file mode 160000 index 000000000..7a670411c --- /dev/null +++ b/egs/digits @@ -0,0 +1 @@ +Subproject commit 7a670411cf604463bf004beef273467c019d033b diff --git a/egs/kaldi_toy_example b/egs/kaldi_toy_example new file mode 160000 index 000000000..90246e855 --- /dev/null +++ b/egs/kaldi_toy_example @@ -0,0 +1 @@ +Subproject commit 90246e855df9742662efc639e2aa11a042957290 diff --git a/egs/voxceleb/.DS_Store b/egs/voxceleb/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..fd3b239592f6b822947e0fbd2575dd5a1ebf4a24 GIT binary patch literal 6146 zcmZQzU|@7AO)+F(5MW?n;9!8zOq>i@0Z1N%F(jFwA}k>DKxQQ6<R>vOFq}*($jOAt zjZ%X<1VC}e#8Adym{eX|fE<6tB?bo98JU<_SlQS)IJh{txMG7d^2>uu5=%;pof3<p zLA-#(l8huMJ0w3p2hL7R3d>9_j~5Ve&d)1J%*;zI0x1v9Oi4{jEQ$%w%uC5Hcgio# zODP7M1C`+5;N;+p7m%o~HZe8TQ7|$#t<_PeHa9oWQ7|z!tF7ha5LY#{^-RdEtg5c5 zt(yr6T1G|)&A<<(Vbm-J1_rR-%7TmXa`N-iA?Xw>W;BfbYlP-sSa^)mqaiRF0;3@S z2?1!S$-xcbj*_DxFd71*Apj2nQ2C$$Z814O=>`Z5l44*4HLf6hP$Lqoj}g=hfQW&# zg4BStf@qLd21XDIEDz~BFfu^2GJ-oH3=E9m4he_`YiD3&0BdJpWB_YtfHn0PA=()j zA=()jq5Ts^P=5wwK14eMBSbscZKK3!2#kgRA_SNrOaV~+@5;b{tM?C4HA;?#z-R~z z%Mf5>aS3*D0#%wgya%dlLG@_@R323OgQ{ajP(6(h16RdNkl{oLs4%Fy2WbV-;HsFB U0aBBXHUwZHG)j+#0R2M%096Ezo&W#< literal 0 HcmV?d00001 diff --git a/egs/voxceleb/v1/.DS_Store b/egs/voxceleb/v1/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..afcae070c37829f7a5a1ecb02beec75813e1195d GIT binary patch literal 6147 zcmZQzU|@7AO)+F(5MW?n;9!8zOq>i@0Z1N%F(jFwA|RR(Y(`E}d2vBfPJR+pW|SK2 zApnXyR)!pge1>F(L<Z#eJDyaKlUZD1U~rw0iJ66!jh%yoi-U_RHaH`{Jh&vWq_o&6 zu_zkE3rH-<NP@CM^7C`x?8Kz7%+&ID0TJi?ypqJsywoC)^5D#r)TG3snDETJl>Bn1 z{L;LXVz4<-2@VcU4$gQ1vFd6QLsK0EgIXPhYIAb~9R(9(v)Wot4slgOThD~t%Bt#` z+PdkG0A^%_&<y-g8b-}vU|>M<Us-TbUQT{qIw%Lgl#J4&Aut*OqaiTTLI7H1a&V6{ zkB|CyGz3ONfM5uK$_E8#L&*V3H$Z5R6aymz1E}=~qFG>uLAn79ATf|u5Dn4_qCr|2 z7(pzs8DOmpj0_O1jNo1fNFS(20;0j%85kMB+8G!bz}gvL?L0<^b_Pa>b_Pah2Za%$ zoq-Xeoq-Xe9p=1IdNc$^LjW2A%n(`tRR6m&FyQL`LsX5DqaiRF0>d%{7+G9`U7Wy` zGIsxg>RM2JngEps)&8LBm=RP@BgDW}F%x7wQ35Iqs_sErK{U83W@LcW<f9D%SO|^M Kqai^55C8!Au~3fy literal 0 HcmV?d00001 diff --git a/egs/voxceleb/v2/.DS_Store b/egs/voxceleb/v2/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..16ba07ade7f9a22bb4e381774f0f19aa150ecd0f GIT binary patch literal 6147 zcmZQzU|@7AO)+F(5MW?n;9!8zOq>i@0Z1N%F(jFwB5WY@z-Html@}Kz<>V(pWk#vN z9s;1aV`a!;$Y)4qNMt~czvD>-Ihn;J1_sv|nV4Bv+1NQaxH!1DVuLgC%Y#c2OG=BK z5{sfiynw`#j3g*KBtJg~&Q44U%S<hg7Z7pI&nrpH%u6i-DG$y}Nli*DiV4rmOUW;H z$}i1JDF&MZmEhpu<lu}K5UZ{>F*MatFsRj0s5Un@&`~fkHmj}W<PcXiwDnBLt*ol9 zsjZt131CJ>2+hC`rD4<z1_lNs|K+3`1}Ep|7Qm!YWpndgT%f6!<Iu*(KVEG+<cO{! zg@B5J416|}1sCPz<maVBatx9yMwz1_Fd71*Awb6vfEJk?+;p^a)T+@C7!84;5dxs{ zK>^xOae&ed5E>-Kz{tP=?g20|FtEUU#0c&NFo5JhT0t~OD~JYZWncucz-EB8GB855 zGJ<;{Abp@735W)3XJBLiYiD3&0BdJ}weuJu+8G$3HZwvyD2x#842%%%42%%%Fz1cZ zqaiRF0?-g(hR_0_`rnm-0ay1QqH2^J4S~@R7?vTx$l?<0;smagvHK5H*MjQP1gJEq x_6JqRjG%fNAqK9BnIPkd5>R1Kbq~@CqQO-$BLk!+A8iQ0LTHp84FURx001HtaR2}S literal 0 HcmV?d00001 diff --git a/egs/voxceleb/v2/local/make_voxceleb1.pl b/egs/voxceleb/v2/local/make_voxceleb1.pl new file mode 100755 index 000000000..2268c20ab --- /dev/null +++ b/egs/voxceleb/v2/local/make_voxceleb1.pl @@ -0,0 +1,130 @@ +#!/usr/bin/perl +# +# Copyright 2018 Ewald Enzinger +# 2018 David Snyder +# +# Usage: make_voxceleb1.pl /export/voxceleb1 data/ + +if (@ARGV != 2) { + print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n"; + print STDERR "e.g. $0 /export/voxceleb1 data/\n"; + exit(1); +} + +($data_base, $out_dir) = @ARGV; +my $out_test_dir = "$out_dir/voxceleb1_test"; +my $out_train_dir = "$out_dir/voxceleb1_train"; + +if (system("mkdir -p $out_test_dir") != 0) { + die "Error making directory $out_test_dir"; +} + +if (system("mkdir -p $out_train_dir") != 0) { + die "Error making directory $out_train_dir"; +} + +opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!"; +my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh); +closedir $dh; + +if (! -e "$data_base/voxceleb1_test.txt") { + system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt"); +} + +if (! -e "$data_base/vox1_meta.csv") { + system("wget -O $data_base/vox1_meta.csv http://www.openslr.org/resources/49/vox1_meta.csv"); +} + +open(TRIAL_IN, "<", "$data_base/voxceleb1_test.txt") or die "Could not open the verification trials file $data_base/voxceleb1_test.txt"; +open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv"; +open(SPKR_TEST, ">", "$out_test_dir/utt2spk") or die "Could not open the output file $out_test_dir/utt2spk"; +open(WAV_TEST, ">", "$out_test_dir/wav.scp") or die "Could not open the output file $out_test_dir/wav.scp"; +open(SPKR_TRAIN, ">", "$out_train_dir/utt2spk") or die "Could not open the output file $out_train_dir/utt2spk"; +open(WAV_TRAIN, ">", "$out_train_dir/wav.scp") or die "Could not open the output file $out_train_dir/wav.scp"; +open(TRIAL_OUT, ">", "$out_test_dir/trials") or die "Could not open the output file $out_test_dir/trials"; + +my %id2spkr = (); +while (<META_IN>) { + chomp; + my ($vox_id, $spkr_id, $gender, $nation, $set) = split; + $id2spkr{$vox_id} = $spkr_id; +} + +my $test_spkrs = (); +while (<TRIAL_IN>) { + chomp; + my ($tar_or_non, $path1, $path2) = split; + + # Create entry for left-hand side of trial + my ($spkr_id, $filename) = split('/', $path1); + my $rec_id = substr($filename, 0, 11); + my $segment = substr($filename, 12, 7); + my $utt_id1 = "$spkr_id-$rec_id-$segment"; + $test_spkrs{$spkr_id} = (); + + # Create entry for right-hand side of trial + my ($spkr_id, $filename) = split('/', $path2); + my $rec_id = substr($filename, 0, 11); + my $segment = substr($filename, 12, 7); + my $utt_id2 = "$spkr_id-$rec_id-$segment"; + $test_spkrs{$spkr_id} = (); + + my $target = "nontarget"; + if ($tar_or_non eq "1") { + $target = "target"; + } + print TRIAL_OUT "$utt_id1 $utt_id2 $target\n"; +} + +foreach (@spkr_dirs) { + my $spkr_id = $_; + my $new_spkr_id = $spkr_id; + # If we're using a newer version of VoxCeleb1, we need to "deanonymize" + # the speaker labels. + if (exists $id2spkr{$spkr_id}) { + $new_spkr_id = $id2spkr{$spkr_id}; + } + opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); + closedir $dh; + foreach (@files) { + my $filename = $_; + my $rec_id = substr($filename, 0, 11); + my $segment = substr($filename, 12, 7); + my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav"; + my $utt_id = "$new_spkr_id-$rec_id-$segment"; + if (exists $test_spkrs{$new_spkr_id}) { + print WAV_TEST "$utt_id", " $wav", "\n"; + print SPKR_TEST "$utt_id", " $new_spkr_id", "\n"; + } else { + print WAV_TRAIN "$utt_id", " $wav", "\n"; + print SPKR_TRAIN "$utt_id", " $new_spkr_id", "\n"; + } + } +} + +close(SPKR_TEST) or die; +close(WAV_TEST) or die; +close(SPKR_TRAIN) or die; +close(WAV_TRAIN) or die; +close(TRIAL_OUT) or die; +close(TRIAL_IN) or die; +close(META_IN) or die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_test_dir/utt2spk >$out_test_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_test_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_test_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_test_dir") != 0) { + die "Error validating directory $out_test_dir"; +} + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_train_dir/utt2spk >$out_train_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_train_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_train_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_train_dir") != 0) { + die "Error validating directory $out_train_dir"; +} diff --git a/egs/voxceleb/v2/local/make_voxceleb1_v2.pl b/egs/voxceleb/v2/local/make_voxceleb1_v2.pl new file mode 100755 index 000000000..221507870 --- /dev/null +++ b/egs/voxceleb/v2/local/make_voxceleb1_v2.pl @@ -0,0 +1,125 @@ +#!/usr/bin/perl +# +# Copyright 2018 Ewald Enzinger +# 2018 David Snyder +# 2019 Soonshin Seo +# +# Usage: make_voxceleb1_v2.pl /export/voxceleb1 dev data/dev +# +# The VoxCeleb1 corpus underwent several updates that changed the directory and speaker ID format. +# The script 'make_voxceleb1.pl' works for the oldest version of the corpus. +# This script should be used if you've downloaded the corpus recently. + +if (@ARGV != 3) { + print STDERR "Usage: $0 <path-to-voxceleb1> <dataset> <path-to-data-dir>\n"; + print STDERR "e.g. $0 /export/voxceleb1 dev data/dev\n"; + exit(1); +} + +($data_base, $dataset, $out_dir) = @ARGV; + +if ("$dataset" ne "dev" && "$dataset" ne "test") { + die "dataset parameter must be 'dev' or 'test'!"; +} + +if (system("mkdir -p $out_dir") != 0) { + die "Error making directory $out_dir"; +} + +opendir my $dh, "$data_base/$dataset/wav" or die "Cannot open directory: $!"; +my @spkr_dirs = grep {-d "$data_base/$dataset/wav/$_" && ! /^\.{1,2}$/} readdir($dh); +closedir $dh; + +if ($dataset eq "dev"){ + open(SPKR_TRAIN, ">", "$out_dir/utt2spk") or die "could not open the output file $out_dir/utt2spk"; + open(WAV_TRAIN, ">", "$out_dir/wav.scp") or die "could not open the output file $out_dir/wav.scp"; + + foreach (@spkr_dirs) { + my $spkr_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/" or die "Cannot open directory: $!"; + my @rec_dirs = grep {-d "$data_base/$dataset/wav/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); + closedir $dh; + foreach (@rec_dirs) { + my $rec_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); + closedir $dh; + foreach (@files) { + my $name = $_; + my $wav = "$data_base/$dataset/wav/$spkr_id/$rec_id/$name.wav"; + my $utt_id = "$spkr_id-$rec_id-$name"; + print WAV_TRAIN "$utt_id", " $wav", "\n"; + print SPKR_TRAIN "$utt_id", " $spkr_id", "\n"; + } + } + } + close(SPKR_TRAIN) or die; + close(WAV_TRAIN) or die; +} + +if ($dataset eq "test"){ + if (! -e "$data_base/voxceleb1_test_v2.txt") { + system("wget -O $data_base/voxceleb1_test_v2.txt http://www.openslr.org/resources/49/voxceleb1_test_v2.txt"); + } + + open(TRIAL_IN, "<", "$data_base/voxceleb1_test_v2.txt") or die "could not open the verification trials file $data_base/voxceleb1_test_v2.txt"; + open(TRIAL_OUT, ">", "$out_dir/trials") or die "Could not open the output file $out_test_dir/trials"; + open(SPKR_TEST, ">", "$out_dir/utt2spk") or die "could not open the output file $out_dir/utt2spk"; + open(WAV_TEST, ">", "$out_dir/wav.scp") or die "could not open the output file $out_dir/wav.scp"; + + my $test_spkrs = (); + while (<TRIAL_IN>) { + chomp; + my ($tar_or_non, $path1, $path2) = split; + # Create entry for left-hand side of trial + my ($spkr_id, $rec_id, $name) = split('/', $path1); + $name =~ s/\.wav$//g; + my $utt_id1 = "$spkr_id-$rec_id-$name"; + $test_spkrs{$spkr_id} = (); + + # Create entry for right-hand side of trial + my ($spkr_id, $rec_id, $name) = split('/', $path2); + $name =~ s/\.wav$//g; + my $utt_id2 = "$spkr_id-$rec_id-$name"; + $test_spkrs{$spkr_id} = (); + + my $target = "nontarget"; + if ($tar_or_non eq "1") { + $target = "target"; + } + print TRIAL_OUT "$utt_id1 $utt_id2 $target\n"; + } + + foreach (@spkr_dirs) { + my $spkr_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/" or die "Cannot open directory: $!"; + my @rec_dirs = grep {-d "$data_base/$dataset/wav/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); + closedir $dh; + foreach (@rec_dirs) { + my $rec_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); + closedir $dh; + foreach (@files) { + my $name = $_; + my $wav = "$data_base/$dataset/wav/$spkr_id/$rec_id/$name.wav"; + my $utt_id = "$spkr_id-$rec_id-$name"; + print WAV_TEST "$utt_id", " $wav", "\n"; + print SPKR_TEST "$utt_id", " $spkr_id", "\n"; + } + } + } + close(SPKR_TEST) or die; + close(WAV_TEST) or die; + close(TRIAL_OUT) or die; + close(TRIAL_IN) or die; +} + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/voxceleb/v2/local/make_voxceleb2.pl b/egs/voxceleb/v2/local/make_voxceleb2.pl new file mode 100755 index 000000000..34c1591eb --- /dev/null +++ b/egs/voxceleb/v2/local/make_voxceleb2.pl @@ -0,0 +1,70 @@ +#!/usr/bin/perl +# +# Copyright 2018 Ewald Enzinger +# +# Usage: make_voxceleb2.pl /export/voxceleb2 dev data/dev +# +# Note: This script requires ffmpeg to be installed and its location included in $PATH. + +if (@ARGV != 3) { + print STDERR "Usage: $0 <path-to-voxceleb2> <dataset> <path-to-data-dir>\n"; + print STDERR "e.g. $0 /export/voxceleb2 dev data/dev\n"; + exit(1); +} + +# Check that ffmpeg is installed. +if (`which ffmpeg` eq "") { + die "Error: this script requires that ffmpeg is installed."; +} + +($data_base, $dataset, $out_dir) = @ARGV; + +if ("$dataset" ne "dev" && "$dataset" ne "test") { + die "dataset parameter must be 'dev' or 'test'!"; +} + +opendir my $dh, "$data_base/$dataset/aac" or die "Cannot open directory: $!"; +my @spkr_dirs = grep {-d "$data_base/$dataset/aac/$_" && ! /^\.{1,2}$/} readdir($dh); +closedir $dh; + +if (system("mkdir -p $out_dir") != 0) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; + +foreach (@spkr_dirs) { + my $spkr_id = $_; + + opendir my $dh, "$data_base/$dataset/aac/$spkr_id/" or die "Cannot open directory: $!"; + my @rec_dirs = grep {-d "$data_base/$dataset/aac/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); + closedir $dh; + + foreach (@rec_dirs) { + my $rec_id = $_; + + opendir my $dh, "$data_base/$dataset/aac/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.m4a$/} readdir($dh); + closedir $dh; + + foreach (@files) { + my $name = $_; + my $wav = "ffmpeg -v 8 -i $data_base/$dataset/aac/$spkr_id/$rec_id/$name.m4a -f wav -acodec pcm_s16le -|"; + my $utt_id = "$spkr_id-$rec_id-$name"; + print WAV "$utt_id", " $wav", "\n"; + print SPKR "$utt_id", " $spkr_id", "\n"; + } + } +} +close(SPKR) or die; +close(WAV) or die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/voxceleb/v2/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/voxceleb/v2/local/nnet3/xvector/prepare_feats_for_egs.sh new file mode 100755 index 000000000..a899ea7e9 --- /dev/null +++ b/egs/voxceleb/v2/local/nnet3/xvector/prepare_feats_for_egs.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# +# Copied from egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh (commit 3ea534070fd2cccd2e4ee21772132230033022ce). +# +# Apache 2.0. + +# This script applies sliding window cmvn and removes silence frames. This +# is performed on the raw features prior to generating examples for training +# the xvector system. + +nj=40 +cmd="run.pl" +stage=0 +norm_vars=false +center=true +compress=true +cmn_window=300 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; +if [ $# != 3 ]; then + echo "Usage: $0 <in-data-dir> <out-data-dir> <feat-dir>" + echo "e.g.: $0 data/train data/train_no_sil exp/make_xvector_features" + echo "Options: " + echo " --nj <nj> # number of parallel jobs" + echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." + echo " --norm-vars <true|false> # If true, normalize variances in the sliding window cmvn" + exit 1; +fi + +data_in=$1 +data_out=$2 +dir=$3 + +name=`basename $data_in` + +for f in $data_in/feats.scp $data_in/vad.scp ; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +# Set various variables. +mkdir -p $dir/log +mkdir -p $data_out +featdir=$(utils/make_absolute.sh $dir) + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $featdir/storage ]; then + utils/create_split_dir.pl \ + /export/b{14,15,16,17}/$USER/kaldi-data/egs/voxceleb2/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_feats/storage $featdir/storage +fi + +for n in $(seq $nj); do + # the next command does nothing unless $featdir/storage/ exists, see + # utils/create_data_link.pl for more info. + utils/create_data_link.pl $featdir/xvector_feats_${name}.${n}.ark +done + +cp $data_in/utt2spk $data_out/utt2spk +cp $data_in/spk2utt $data_out/spk2utt +cp $data_in/wav.scp $data_out/wav.scp + +write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB" + +sdata_in=$data_in/split$nj; +utils/split_data.sh $data_in $nj || exit 1; + +$cmd JOB=1:$nj $dir/log/create_xvector_feats_${name}.JOB.log \ + apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \ + scp:${sdata_in}/JOB/feats.scp ark:- \| \ + select-voiced-frames ark:- scp,s,cs:${sdata_in}/JOB/vad.scp ark:- \| \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ + ark,scp:$featdir/xvector_feats_${name}.JOB.ark,$featdir/xvector_feats_${name}.JOB.scp || exit 1; + +for n in $(seq $nj); do + cat $featdir/xvector_feats_${name}.$n.scp || exit 1; +done > ${data_out}/feats.scp || exit 1 + +for n in $(seq $nj); do + cat $featdir/log/utt2num_frames.$n || exit 1; +done > $data_out/utt2num_frames || exit 1 +rm $featdir/log/utt2num_frames.* + +echo "$0: Succeeded creating xvector features for $name" diff --git a/egs/voxceleb/v2/local/nnet3/xvector/run_xvector.sh b/egs/voxceleb/v2/local/nnet3/xvector/run_xvector.sh new file mode 120000 index 000000000..585b63fd2 --- /dev/null +++ b/egs/voxceleb/v2/local/nnet3/xvector/run_xvector.sh @@ -0,0 +1 @@ +tuning/run_xvector_1a.sh \ No newline at end of file diff --git a/egs/voxceleb/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/voxceleb/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh new file mode 100755 index 000000000..a7bb0cdd4 --- /dev/null +++ b/egs/voxceleb/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh @@ -0,0 +1,155 @@ +#!/usr/bin/env bash +# Copyright 2017 David Snyder +# 2017 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2017 Johns Hopkins University (Author: Daniel Povey) +# +# Copied from egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh (commit e082c17d4a8f8a791428ae4d9f7ceb776aef3f0b). +# +# Apache 2.0. + +# This script trains a DNN similar to the recipe described in +# http://www.danielpovey.com/files/2018_icassp_xvectors.pdf + +. ./cmd.sh +set -e + +stage=1 +train_stage=0 +use_gpu=true +remove_egs=false + +data=data/train +nnet_dir=exp/xvector_nnet_1a/ +egs_dir=exp/xvector_nnet_1a/egs + +. ./path.sh +. ./cmd.sh +. ./utils/parse_options.sh + +num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l) + +# Now we create the nnet examples using sid/nnet3/xvector/get_egs.sh. +# The argument --num-repeats is related to the number of times a speaker +# repeats per archive. If it seems like you're getting too many archives +# (e.g., more than 200) try increasing the --frames-per-iter option. The +# arguments --min-frames-per-chunk and --max-frames-per-chunk specify the +# minimum and maximum length (in terms of number of frames) of the features +# in the examples. +# +# To make sense of the egs script, it may be necessary to put an "exit 1" +# command immediately after stage 3. Then, inspect +# exp/<your-dir>/egs/temp/ranges.* . The ranges files specify the examples that +# will be created, and which archives they will be stored in. Each line of +# ranges.* has the following form: +# <utt-id> <local-ark-indx> <global-ark-indx> <start-frame> <end-frame> <spk-id> +# For example: +# 100304-f-sre2006-kacg-A 1 2 4079 881 23 + +# If you're satisfied with the number of archives (e.g., 50-150 archives is +# reasonable) and with the number of examples per speaker (e.g., 1000-5000 +# is reasonable) then you can let the script continue to the later stages. +# Otherwise, try increasing or decreasing the --num-repeats option. You might +# need to fiddle with --frames-per-iter. Increasing this value decreases the +# the number of archives and increases the number of examples per archive. +# Decreasing this value increases the number of archives, while decreasing the +# number of examples per archive. +if [ $stage -le 6 ]; then + echo "$0: Getting neural network training egs"; + # dump egs. + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/voxceleb2/v2/xvector-$(date +'%m_%d_%H_%M')/$egs_dir/storage $egs_dir/storage + fi + sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ + --nj 8 \ + --stage 0 \ + --frames-per-iter 1000000000 \ + --frames-per-iter-diagnostic 100000 \ + --min-frames-per-chunk 200 \ + --max-frames-per-chunk 400 \ + --num-diagnostic-archives 3 \ + --num-repeats 50 \ + "$data" $egs_dir +fi + +if [ $stage -le 7 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}') + feat_dim=$(cat $egs_dir/info/feat_dim) + + # This chunk-size corresponds to the maximum number of frames the + # stats layer is able to pool over. In this script, it corresponds + # to 100 seconds. If the input recording is greater than 100 seconds, + # we will compute multiple xvectors from the same recording and average + # to produce the final xvector. + max_chunk_size=10000 + + # The smallest number of frames we're comfortable computing an xvector from. + # Note that the hard minimum is given by the left and right context of the + # frame-level layers. + min_chunk_size=25 + mkdir -p $nnet_dir/configs + cat <<EOF > $nnet_dir/configs/network.xconfig + # please note that it is important to have input layer with the name=input + + # The frame-level layers + input dim=${feat_dim} name=input + relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512 + relu-batchnorm-layer name=tdnn2 input=Append(-2,0,2) dim=512 + relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=512 + relu-batchnorm-layer name=tdnn4 dim=512 + relu-batchnorm-layer name=tdnn5 dim=1500 + + # The stats pooling layer. Layers after this are segment-level. + # In the config below, the first and last argument (0, and ${max_chunk_size}) + # means that we pool over an input segment starting at frame 0 + # and ending at frame ${max_chunk_size} or earlier. The other arguments (1:1) + # mean that no subsampling is performed. + stats-layer name=stats config=mean+stddev(0:1:1:${max_chunk_size}) + + # This is where we usually extract the embedding (aka xvector) from. + relu-batchnorm-layer name=tdnn6 dim=512 input=stats + + # This is where another layer the embedding could be extracted + # from, but usually the previous one works better. + relu-batchnorm-layer name=tdnn7 dim=512 + output-layer name=output include-log-softmax=true dim=${num_targets} +EOF + + steps/nnet3/xconfig_to_configs.py \ + --xconfig-file $nnet_dir/configs/network.xconfig \ + --config-dir $nnet_dir/configs/ + cp $nnet_dir/configs/final.config $nnet_dir/nnet.config + + # These three files will be used by sid/nnet3/xvector/extract_xvectors.sh + echo "output-node name=output input=tdnn6.affine" > $nnet_dir/extract.config + echo "$max_chunk_size" > $nnet_dir/max_chunk_size + echo "$min_chunk_size" > $nnet_dir/min_chunk_size +fi + +dropout_schedule='0,0@0.20,0.1@0.50,0' +srand=123 +if [ $stage -le 8 ]; then + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$train_cmd" \ + --trainer.optimization.proportional-shrink 10 \ + --trainer.optimization.momentum=0.5 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=8 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.minibatch-size=64 \ + --trainer.srand=$srand \ + --trainer.max-param-change=2 \ + --trainer.num-epochs=3 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.shuffle-buffer-size=1000 \ + --egs.frames-per-eg=1 \ + --egs.dir="$egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --dir=$nnet_dir || exit 1; +fi + +exit 0; diff --git a/egs/voxceleb/v2/local/prepare_for_eer.py b/egs/voxceleb/v2/local/prepare_for_eer.py new file mode 100755 index 000000000..2f569b70b --- /dev/null +++ b/egs/voxceleb/v2/local/prepare_for_eer.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# +# Copyright 2015 David Snyder +# Apache 2.0. +# +# Copied from egs/sre10/v1/local/prepare_for_eer.py (commit 9cb4c4c2fb0223ee90c38d98af11305074eb7ef8) +# +# Given a trials and scores file, this script +# prepares input for the binary compute-eer. +import sys +trials = open(sys.argv[1], 'r').readlines() +scores = open(sys.argv[2], 'r').readlines() +spkrutt2target = {} +for line in trials: + spkr, utt, target = line.strip().split() + spkrutt2target[spkr+utt]=target +for line in scores: + spkr, utt, score = line.strip().split() + print("{} {}".format(score, spkrutt2target[spkr+utt])) diff --git a/egs/voxceleb/v2/local b/egs/voxceleb/v2/local2 similarity index 100% rename from egs/voxceleb/v2/local rename to egs/voxceleb/v2/local2 diff --git a/egs/voxforge/.DS_Store b/egs/voxforge/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3ef0e7fb683034f86b60a4c47b17cdeeb7baaa4f GIT binary patch literal 6147 zcmZQzU|@7AO)+F(5MW?n;9!8zOq>i@0Z1N%F(jFwBCH_uz-FW{q%!0(<fjxT=RoB~ zslgorpt$2;NM|T!C}D_aK#ISl{Csd6ax>&J<T2zhWHRJ2q+(aX#8Av&np9p~kd%|3 z#K6FCBB>xJv$({-;5s7{GYcylI|m0B2Nzdta7KQ4a7kiGX|YpcQ8b7bkXVwD1Z9Wh z=jXuLiAiCZspatkBF_1FC5f4NsYM{=!I>$kNr^=<;hA|U`Q=XerFkjEU~`}n92}e+ zobdwU)zyZ^COQhH=CwKs)#l~~ItnJnX0^4P9O9~mww?*Ol~vU>wRJNfF~i6Rp&9t0 zG>n?bz`y|Z8Ynr#f&(m27F?8<lb@FkN%t`Iqx5J9jE2By2n>}FfR=k4+(X5|qwX9H zfzc44TL^&42L)&o#{o(=KxmK@10$$;1mUy59K{Ik1~7o+Kw3dGNGpg2X=PvpvA}vD zod-q+h*m~$F9f6y)FT1WVC@Wy3}EdHj0|Ay46w!;BSbp`BSbp`Bea9U2+_{K2+_{K z2+<C6-Y7j90;3@S4FP5dEdZ+jT^Sf~b^jr%M#<3-7!84883K$fF2OEN;7S?0|3Gyu zs6I`AN`q>DP<6}*s;3cR;HsDjGM*>_6$VxJAgv%8Top4iKx*>Qh5#&tM(NQIpnnJe E0Bo6H82|tP literal 0 HcmV?d00001 diff --git a/egs/voxforge/s5/.DS_Store b/egs/voxforge/s5/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..fd02e9645239fa017d618ee71de4b79048707e6a GIT binary patch literal 6147 zcmZQzU|@7AO)+F(5MW?n;9!8zOq>i@0Z1N%F(jFwA}k>DKxU*AC+8&P=jTAhN2$Rd z0-(5KVMu1kXUJnnLykXIh8%``hGd3BhMc7G;)0}{{3Hejh7(BzIhn;J1_sv|nV4Bv z+1NQaxH!1DVuLgC%Y#c2OG=BK5{sfiynw`#j3g*KBtJg~&Q44U%S<hg7Z7pI&nrpH z%u6i-DG$y}Nli*DiV4rmOUW;H$}i1JDF&MZmEhpu<lu}K5U;K_G&a#uFg35$QK&XI zH_%ZqF*d8M<>U}oHMI3i$gQlZuBok?0SS0UMhMNo52az$Oa=x9BoE^7cUf>zUQT{q zIwX0+^p4V_Aut*OqaiTlLjYPha&QlMpNx87Gz3ONfZ8DdDjyV}Z660H-2kCMQVfg? z4B#FBBLf4d6$!SU5!4NUr~zpOsR3yP(IBl1j35?R9;}st5vr9D+zVk~U<CI_Kr~o8 z10w@iI|CyFSUUr(MaKxy&cFz@nGxDSVFYz(K;}cVGcZE5gWWbtjE2By2p~d$8Nw6* z)&H&x47j@g5LKh(Xb6mkz_1JfMi!S~7bj4qiNkxKx)xNQCP3vuwLhpjW(3vK2r+O~ l%mf)vlz<9@s(X-D5Dl)185tlo`DjA`7DA)+Xb8|h1OU4iTDkxL literal 0 HcmV?d00001 diff --git a/egs/wsj/.DS_Store b/egs/wsj/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6ee6a89767d74d4211ea002f71327b2b72fb5169 GIT binary patch literal 6147 zcmZQzU|@7AO)+F(5MW?n;9!8zj35RBCIAV8Fop~hR0Kpbg3L(D$xmWnU^tOfkdq0O z8|)Ow?Mw{C45mrt#RbUjFD@}KxX#GL%)-jX&cVUO!NnCDoRME1T#{H)TI`fq6b<49 zB$i|(LD?bs`8jZQVp3RUYI(eXh;x2kNn&PRY7t0zaArzsQesg|cxGNoez{YAX<kY( z*c_+?2L~qyXS{%Tb+w_fiH?G)d998@wYj;0j)IA?S#2#Rhq$Vtt!F}RWmR=eZQTq= z*fKIgXa;^L4Wni<Fff3<Ru){8my@5D4vKb|<S0EF0;3@?8UmvsFk(XhT5oW0kJu0x z_4sHAjD`TN5CD}A3eYyA1C(xn&>$%WMo{w-!e@b*0qzDs#6Vg>YCu{+G)OB0BZvi- z2Q|DI7#JBKS{cE;5D*{KBLUH1?F@_zVC@Wy3}EdHu*NPUL^}f`L^}hl@4^Vt&cF!K z&cF!K4s+fpJsJX|Api{lW(X|+s{dUX;8_SFI!ca)z-S1Jh!9|8aS3*D0$0k|{R^sV zLG@_@R2o$KL#ks?Ma=+`2370es+b8fo+trT3K9ot1<~NDn2`ZelaDq8U?DV0kA?vK GLjVB7PEJJt literal 0 HcmV?d00001 diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a8186c21bbcc852b093943a73dda04ae082949ca GIT binary patch literal 6148 zcmZQzU|@7AO)+F(5MW?n;9!8zOq>i@0Z1N%F(jFwA}k>DKxPydB<18MF)%QkOe%oN zjZ%X<1VC}e%#gy6&ybu{jvRkEnZ+dr2G<#xm|0la*f}`3IJkJZVuLgC%Y#c2OG=BK z5{sfiynw`#j3g*KBtJg~&Q44U%S<hg7Z7pI&nrpH%u6i-DG$y}Nli*DiV4rmOUW;H z$}i1JDF&MZmEhpu<lu}Kkf^RUF*VguFful+)lsN6H#g8xFflf(t>xqpS2eWtOvtUQ zs;;T6n+XYBMn(wDzz?Ni)GP)D2B`PSf{XHU^7GOmiIstsA%`KCA&DWAA#b>P9LbAl zo`(g`C_NeiqaiR-LjYQ6a&V8-02uZ6Xb6mk0L~Bql@AKg=8^-HZh+7rDF#Lc25=XE zk%0l!j0Bs*2<itw)PS^t)PS^tXpmM0Mi2`u57x@S2-V66?uIZhFoL@zAR4Tlfsp~M zoq>@7tepYY(qn{ZXJCZd%n0qFFoJqCAoC&G85kkj!EPHRMnhmU1P~#>3}Fg@>VH=T z23-Arh^kR?Gz3ONU|5C#Ba2J0ixa5Q#Nj<qT??vD6QJ^-+8<OMGlJ@Agc!IgW`YbT hN<f7{)jdcnhz3{1j0}*Pe6%3|3!zbZGz91$0szpQlE44} literal 0 HcmV?d00001 diff --git a/src/matrix/t.bin b/src/matrix/t.bin new file mode 100644 index 0000000000000000000000000000000000000000..36e7472b1e5339f1f2806d23959b4d102d9ac6c3 GIT binary patch literal 160 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEPSsIR zFV09TNL9B|&@j?bFwxXes8t{va4~>EgFOQSg98JDg98IYg98J@0tW_$0}c!f4;%mh CG$5=1 literal 0 HcmV?d00001 diff --git a/tools/.DS_Store b/tools/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d0424173c01af5637bc0771814c2a085e13868e8 GIT binary patch literal 6148 zcmZQzU|@7AO)+F(5MW?n;9!8zOq>i@0Z1N%F(jFwA|RR(Y(_9cJVORUDnlYe3RG^C z8r&fOiaTBgM}`mv9|lJT<oHWvC}Jp1DlaZb%E?b+U|=|zRFIQdTw-8woso%|g_Vt+ zgM*8Mi<c`lI3vG2xFoTpwAd-JC>q2ING!=ng0e&M^K;<r#H6sy)be-%5$F88lElos z)FP1b;LMcNq{O0_@XWlF{Bo!K(!7*nusKi(4h~KZ&UgWd>S|+SQym3k6O&pUg=%wi z104kuW3$>?P7ZNZLtD>;+{&uzn%cUVkPv5NgwPE9P#Q+fVqjpv<K?p8qP(2^ymU|| zfoU0~M?+vV1V%$(c!dD8faKsFUfvq@&1eXWh5!{p08~CGK$}4hP`Uv^gQOT385ls# zM-a^dvkuY^U;v4Mw1Q}mRuB!+%D@O>fz1GGWng50Xk`R<LqPgKT@nxt*3Q7l0M^dH z$N<*P0BhATLbNk5LbNk5LVGBT5bX?%5bX?%5bZGMjnbnbFd72T5MYMT0-*Zem4N|Q z{~w}ilpGC#(GVDxA;8Gu671pxu9UI+4^-EJ>eB?MG^q9mRmY5=dKw`Hu8Nr;1Bwz* fVNi7s(h8!%RWTz2q$VG22*5&UlpYNM`iB4jFHcnG literal 0 HcmV?d00001 -- GitLab