diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..983053dc788bd10926c6731ab98d714eebd335be Binary files /dev/null and b/.DS_Store differ diff --git a/egs/.DS_Store b/egs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e04067a2d4553d65b59f96c8f73421ef6bf15192 Binary files /dev/null and b/egs/.DS_Store differ diff --git a/egs/digits b/egs/digits new file mode 160000 index 0000000000000000000000000000000000000000..7a670411cf604463bf004beef273467c019d033b --- /dev/null +++ b/egs/digits @@ -0,0 +1 @@ +Subproject commit 7a670411cf604463bf004beef273467c019d033b diff --git a/egs/kaldi_toy_example b/egs/kaldi_toy_example new file mode 160000 index 0000000000000000000000000000000000000000..90246e855df9742662efc639e2aa11a042957290 --- /dev/null +++ b/egs/kaldi_toy_example @@ -0,0 +1 @@ +Subproject commit 90246e855df9742662efc639e2aa11a042957290 diff --git a/egs/voxceleb/.DS_Store b/egs/voxceleb/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..fd3b239592f6b822947e0fbd2575dd5a1ebf4a24 Binary files /dev/null and b/egs/voxceleb/.DS_Store differ diff --git a/egs/voxceleb/v1/.DS_Store b/egs/voxceleb/v1/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..afcae070c37829f7a5a1ecb02beec75813e1195d Binary files /dev/null and b/egs/voxceleb/v1/.DS_Store differ diff --git a/egs/voxceleb/v2/.DS_Store b/egs/voxceleb/v2/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..16ba07ade7f9a22bb4e381774f0f19aa150ecd0f Binary files /dev/null and b/egs/voxceleb/v2/.DS_Store differ diff --git a/egs/voxceleb/v2/local/make_voxceleb1.pl b/egs/voxceleb/v2/local/make_voxceleb1.pl new file mode 100755 index 0000000000000000000000000000000000000000..2268c20ab52aa62f47d2cb63ec13cc6073ff324d --- /dev/null +++ b/egs/voxceleb/v2/local/make_voxceleb1.pl @@ -0,0 +1,130 @@ +#!/usr/bin/perl +# +# Copyright 2018 Ewald Enzinger +# 2018 David Snyder +# +# Usage: make_voxceleb1.pl /export/voxceleb1 data/ + +if (@ARGV != 2) { + print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n"; + print STDERR "e.g. $0 /export/voxceleb1 data/\n"; + exit(1); +} + +($data_base, $out_dir) = @ARGV; +my $out_test_dir = "$out_dir/voxceleb1_test"; +my $out_train_dir = "$out_dir/voxceleb1_train"; + +if (system("mkdir -p $out_test_dir") != 0) { + die "Error making directory $out_test_dir"; +} + +if (system("mkdir -p $out_train_dir") != 0) { + die "Error making directory $out_train_dir"; +} + +opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!"; +my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh); +closedir $dh; + +if (! -e "$data_base/voxceleb1_test.txt") { + system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt"); +} + +if (! -e "$data_base/vox1_meta.csv") { + system("wget -O $data_base/vox1_meta.csv http://www.openslr.org/resources/49/vox1_meta.csv"); +} + +open(TRIAL_IN, "<", "$data_base/voxceleb1_test.txt") or die "Could not open the verification trials file $data_base/voxceleb1_test.txt"; +open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv"; +open(SPKR_TEST, ">", "$out_test_dir/utt2spk") or die "Could not open the output file $out_test_dir/utt2spk"; +open(WAV_TEST, ">", "$out_test_dir/wav.scp") or die "Could not open the output file $out_test_dir/wav.scp"; +open(SPKR_TRAIN, ">", "$out_train_dir/utt2spk") or die "Could not open the output file $out_train_dir/utt2spk"; +open(WAV_TRAIN, ">", "$out_train_dir/wav.scp") or die "Could not open the output file $out_train_dir/wav.scp"; +open(TRIAL_OUT, ">", "$out_test_dir/trials") or die "Could not open the output file $out_test_dir/trials"; + +my %id2spkr = (); +while (<META_IN>) { + chomp; + my ($vox_id, $spkr_id, $gender, $nation, $set) = split; + $id2spkr{$vox_id} = $spkr_id; +} + +my $test_spkrs = (); +while (<TRIAL_IN>) { + chomp; + my ($tar_or_non, $path1, $path2) = split; + + # Create entry for left-hand side of trial + my ($spkr_id, $filename) = split('/', $path1); + my $rec_id = substr($filename, 0, 11); + my $segment = substr($filename, 12, 7); + my $utt_id1 = "$spkr_id-$rec_id-$segment"; + $test_spkrs{$spkr_id} = (); + + # Create entry for right-hand side of trial + my ($spkr_id, $filename) = split('/', $path2); + my $rec_id = substr($filename, 0, 11); + my $segment = substr($filename, 12, 7); + my $utt_id2 = "$spkr_id-$rec_id-$segment"; + $test_spkrs{$spkr_id} = (); + + my $target = "nontarget"; + if ($tar_or_non eq "1") { + $target = "target"; + } + print TRIAL_OUT "$utt_id1 $utt_id2 $target\n"; +} + +foreach (@spkr_dirs) { + my $spkr_id = $_; + my $new_spkr_id = $spkr_id; + # If we're using a newer version of VoxCeleb1, we need to "deanonymize" + # the speaker labels. + if (exists $id2spkr{$spkr_id}) { + $new_spkr_id = $id2spkr{$spkr_id}; + } + opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); + closedir $dh; + foreach (@files) { + my $filename = $_; + my $rec_id = substr($filename, 0, 11); + my $segment = substr($filename, 12, 7); + my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav"; + my $utt_id = "$new_spkr_id-$rec_id-$segment"; + if (exists $test_spkrs{$new_spkr_id}) { + print WAV_TEST "$utt_id", " $wav", "\n"; + print SPKR_TEST "$utt_id", " $new_spkr_id", "\n"; + } else { + print WAV_TRAIN "$utt_id", " $wav", "\n"; + print SPKR_TRAIN "$utt_id", " $new_spkr_id", "\n"; + } + } +} + +close(SPKR_TEST) or die; +close(WAV_TEST) or die; +close(SPKR_TRAIN) or die; +close(WAV_TRAIN) or die; +close(TRIAL_OUT) or die; +close(TRIAL_IN) or die; +close(META_IN) or die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_test_dir/utt2spk >$out_test_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_test_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_test_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_test_dir") != 0) { + die "Error validating directory $out_test_dir"; +} + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_train_dir/utt2spk >$out_train_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_train_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_train_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_train_dir") != 0) { + die "Error validating directory $out_train_dir"; +} diff --git a/egs/voxceleb/v2/local/make_voxceleb1_v2.pl b/egs/voxceleb/v2/local/make_voxceleb1_v2.pl new file mode 100755 index 0000000000000000000000000000000000000000..221507870be6e6edc15c3b0a7cf4569fa5fd472b --- /dev/null +++ b/egs/voxceleb/v2/local/make_voxceleb1_v2.pl @@ -0,0 +1,125 @@ +#!/usr/bin/perl +# +# Copyright 2018 Ewald Enzinger +# 2018 David Snyder +# 2019 Soonshin Seo +# +# Usage: make_voxceleb1_v2.pl /export/voxceleb1 dev data/dev +# +# The VoxCeleb1 corpus underwent several updates that changed the directory and speaker ID format. +# The script 'make_voxceleb1.pl' works for the oldest version of the corpus. +# This script should be used if you've downloaded the corpus recently. + +if (@ARGV != 3) { + print STDERR "Usage: $0 <path-to-voxceleb1> <dataset> <path-to-data-dir>\n"; + print STDERR "e.g. $0 /export/voxceleb1 dev data/dev\n"; + exit(1); +} + +($data_base, $dataset, $out_dir) = @ARGV; + +if ("$dataset" ne "dev" && "$dataset" ne "test") { + die "dataset parameter must be 'dev' or 'test'!"; +} + +if (system("mkdir -p $out_dir") != 0) { + die "Error making directory $out_dir"; +} + +opendir my $dh, "$data_base/$dataset/wav" or die "Cannot open directory: $!"; +my @spkr_dirs = grep {-d "$data_base/$dataset/wav/$_" && ! /^\.{1,2}$/} readdir($dh); +closedir $dh; + +if ($dataset eq "dev"){ + open(SPKR_TRAIN, ">", "$out_dir/utt2spk") or die "could not open the output file $out_dir/utt2spk"; + open(WAV_TRAIN, ">", "$out_dir/wav.scp") or die "could not open the output file $out_dir/wav.scp"; + + foreach (@spkr_dirs) { + my $spkr_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/" or die "Cannot open directory: $!"; + my @rec_dirs = grep {-d "$data_base/$dataset/wav/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); + closedir $dh; + foreach (@rec_dirs) { + my $rec_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); + closedir $dh; + foreach (@files) { + my $name = $_; + my $wav = "$data_base/$dataset/wav/$spkr_id/$rec_id/$name.wav"; + my $utt_id = "$spkr_id-$rec_id-$name"; + print WAV_TRAIN "$utt_id", " $wav", "\n"; + print SPKR_TRAIN "$utt_id", " $spkr_id", "\n"; + } + } + } + close(SPKR_TRAIN) or die; + close(WAV_TRAIN) or die; +} + +if ($dataset eq "test"){ + if (! -e "$data_base/voxceleb1_test_v2.txt") { + system("wget -O $data_base/voxceleb1_test_v2.txt http://www.openslr.org/resources/49/voxceleb1_test_v2.txt"); + } + + open(TRIAL_IN, "<", "$data_base/voxceleb1_test_v2.txt") or die "could not open the verification trials file $data_base/voxceleb1_test_v2.txt"; + open(TRIAL_OUT, ">", "$out_dir/trials") or die "Could not open the output file $out_test_dir/trials"; + open(SPKR_TEST, ">", "$out_dir/utt2spk") or die "could not open the output file $out_dir/utt2spk"; + open(WAV_TEST, ">", "$out_dir/wav.scp") or die "could not open the output file $out_dir/wav.scp"; + + my $test_spkrs = (); + while (<TRIAL_IN>) { + chomp; + my ($tar_or_non, $path1, $path2) = split; + # Create entry for left-hand side of trial + my ($spkr_id, $rec_id, $name) = split('/', $path1); + $name =~ s/\.wav$//g; + my $utt_id1 = "$spkr_id-$rec_id-$name"; + $test_spkrs{$spkr_id} = (); + + # Create entry for right-hand side of trial + my ($spkr_id, $rec_id, $name) = split('/', $path2); + $name =~ s/\.wav$//g; + my $utt_id2 = "$spkr_id-$rec_id-$name"; + $test_spkrs{$spkr_id} = (); + + my $target = "nontarget"; + if ($tar_or_non eq "1") { + $target = "target"; + } + print TRIAL_OUT "$utt_id1 $utt_id2 $target\n"; + } + + foreach (@spkr_dirs) { + my $spkr_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/" or die "Cannot open directory: $!"; + my @rec_dirs = grep {-d "$data_base/$dataset/wav/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); + closedir $dh; + foreach (@rec_dirs) { + my $rec_id = $_; + opendir my $dh, "$data_base/$dataset/wav/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); + closedir $dh; + foreach (@files) { + my $name = $_; + my $wav = "$data_base/$dataset/wav/$spkr_id/$rec_id/$name.wav"; + my $utt_id = "$spkr_id-$rec_id-$name"; + print WAV_TEST "$utt_id", " $wav", "\n"; + print SPKR_TEST "$utt_id", " $spkr_id", "\n"; + } + } + } + close(SPKR_TEST) or die; + close(WAV_TEST) or die; + close(TRIAL_OUT) or die; + close(TRIAL_IN) or die; +} + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/voxceleb/v2/local/make_voxceleb2.pl b/egs/voxceleb/v2/local/make_voxceleb2.pl new file mode 100755 index 0000000000000000000000000000000000000000..34c1591eba3896e18ec1591443eb55cb429f24ff --- /dev/null +++ b/egs/voxceleb/v2/local/make_voxceleb2.pl @@ -0,0 +1,70 @@ +#!/usr/bin/perl +# +# Copyright 2018 Ewald Enzinger +# +# Usage: make_voxceleb2.pl /export/voxceleb2 dev data/dev +# +# Note: This script requires ffmpeg to be installed and its location included in $PATH. + +if (@ARGV != 3) { + print STDERR "Usage: $0 <path-to-voxceleb2> <dataset> <path-to-data-dir>\n"; + print STDERR "e.g. $0 /export/voxceleb2 dev data/dev\n"; + exit(1); +} + +# Check that ffmpeg is installed. +if (`which ffmpeg` eq "") { + die "Error: this script requires that ffmpeg is installed."; +} + +($data_base, $dataset, $out_dir) = @ARGV; + +if ("$dataset" ne "dev" && "$dataset" ne "test") { + die "dataset parameter must be 'dev' or 'test'!"; +} + +opendir my $dh, "$data_base/$dataset/aac" or die "Cannot open directory: $!"; +my @spkr_dirs = grep {-d "$data_base/$dataset/aac/$_" && ! /^\.{1,2}$/} readdir($dh); +closedir $dh; + +if (system("mkdir -p $out_dir") != 0) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; + +foreach (@spkr_dirs) { + my $spkr_id = $_; + + opendir my $dh, "$data_base/$dataset/aac/$spkr_id/" or die "Cannot open directory: $!"; + my @rec_dirs = grep {-d "$data_base/$dataset/aac/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh); + closedir $dh; + + foreach (@rec_dirs) { + my $rec_id = $_; + + opendir my $dh, "$data_base/$dataset/aac/$spkr_id/$rec_id/" or die "Cannot open directory: $!"; + my @files = map{s/\.[^.]+$//;$_}grep {/\.m4a$/} readdir($dh); + closedir $dh; + + foreach (@files) { + my $name = $_; + my $wav = "ffmpeg -v 8 -i $data_base/$dataset/aac/$spkr_id/$rec_id/$name.m4a -f wav -acodec pcm_s16le -|"; + my $utt_id = "$spkr_id-$rec_id-$name"; + print WAV "$utt_id", " $wav", "\n"; + print SPKR "$utt_id", " $spkr_id", "\n"; + } + } +} +close(SPKR) or die; +close(WAV) or die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir"); +if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/voxceleb/v2/local/nnet3/xvector/prepare_feats_for_egs.sh b/egs/voxceleb/v2/local/nnet3/xvector/prepare_feats_for_egs.sh new file mode 100755 index 0000000000000000000000000000000000000000..a899ea7e952f35182dfaf4849e70316f6ae9b959 --- /dev/null +++ b/egs/voxceleb/v2/local/nnet3/xvector/prepare_feats_for_egs.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# +# Copied from egs/sre16/v1/local/nnet3/xvector/prepare_feats_for_egs.sh (commit 3ea534070fd2cccd2e4ee21772132230033022ce). +# +# Apache 2.0. + +# This script applies sliding window cmvn and removes silence frames. This +# is performed on the raw features prior to generating examples for training +# the xvector system. + +nj=40 +cmd="run.pl" +stage=0 +norm_vars=false +center=true +compress=true +cmn_window=300 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; +if [ $# != 3 ]; then + echo "Usage: $0 <in-data-dir> <out-data-dir> <feat-dir>" + echo "e.g.: $0 data/train data/train_no_sil exp/make_xvector_features" + echo "Options: " + echo " --nj <nj> # number of parallel jobs" + echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." + echo " --norm-vars <true|false> # If true, normalize variances in the sliding window cmvn" + exit 1; +fi + +data_in=$1 +data_out=$2 +dir=$3 + +name=`basename $data_in` + +for f in $data_in/feats.scp $data_in/vad.scp ; do + [ ! -f $f ] && echo "$0: No such file $f" && exit 1; +done + +# Set various variables. +mkdir -p $dir/log +mkdir -p $data_out +featdir=$(utils/make_absolute.sh $dir) + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $featdir/storage ]; then + utils/create_split_dir.pl \ + /export/b{14,15,16,17}/$USER/kaldi-data/egs/voxceleb2/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_feats/storage $featdir/storage +fi + +for n in $(seq $nj); do + # the next command does nothing unless $featdir/storage/ exists, see + # utils/create_data_link.pl for more info. + utils/create_data_link.pl $featdir/xvector_feats_${name}.${n}.ark +done + +cp $data_in/utt2spk $data_out/utt2spk +cp $data_in/spk2utt $data_out/spk2utt +cp $data_in/wav.scp $data_out/wav.scp + +write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB" + +sdata_in=$data_in/split$nj; +utils/split_data.sh $data_in $nj || exit 1; + +$cmd JOB=1:$nj $dir/log/create_xvector_feats_${name}.JOB.log \ + apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \ + scp:${sdata_in}/JOB/feats.scp ark:- \| \ + select-voiced-frames ark:- scp,s,cs:${sdata_in}/JOB/vad.scp ark:- \| \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ + ark,scp:$featdir/xvector_feats_${name}.JOB.ark,$featdir/xvector_feats_${name}.JOB.scp || exit 1; + +for n in $(seq $nj); do + cat $featdir/xvector_feats_${name}.$n.scp || exit 1; +done > ${data_out}/feats.scp || exit 1 + +for n in $(seq $nj); do + cat $featdir/log/utt2num_frames.$n || exit 1; +done > $data_out/utt2num_frames || exit 1 +rm $featdir/log/utt2num_frames.* + +echo "$0: Succeeded creating xvector features for $name" diff --git a/egs/voxceleb/v2/local/nnet3/xvector/run_xvector.sh b/egs/voxceleb/v2/local/nnet3/xvector/run_xvector.sh new file mode 120000 index 0000000000000000000000000000000000000000..585b63fd2dd8ea0937b929f980df2b0b1b561288 --- /dev/null +++ b/egs/voxceleb/v2/local/nnet3/xvector/run_xvector.sh @@ -0,0 +1 @@ +tuning/run_xvector_1a.sh \ No newline at end of file diff --git a/egs/voxceleb/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/voxceleb/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh new file mode 100755 index 0000000000000000000000000000000000000000..a7bb0cdd4329c0e4db997889c515d7e72bc7d1cc --- /dev/null +++ b/egs/voxceleb/v2/local/nnet3/xvector/tuning/run_xvector_1a.sh @@ -0,0 +1,155 @@ +#!/usr/bin/env bash +# Copyright 2017 David Snyder +# 2017 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2017 Johns Hopkins University (Author: Daniel Povey) +# +# Copied from egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh (commit e082c17d4a8f8a791428ae4d9f7ceb776aef3f0b). +# +# Apache 2.0. + +# This script trains a DNN similar to the recipe described in +# http://www.danielpovey.com/files/2018_icassp_xvectors.pdf + +. ./cmd.sh +set -e + +stage=1 +train_stage=0 +use_gpu=true +remove_egs=false + +data=data/train +nnet_dir=exp/xvector_nnet_1a/ +egs_dir=exp/xvector_nnet_1a/egs + +. ./path.sh +. ./cmd.sh +. ./utils/parse_options.sh + +num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l) + +# Now we create the nnet examples using sid/nnet3/xvector/get_egs.sh. +# The argument --num-repeats is related to the number of times a speaker +# repeats per archive. If it seems like you're getting too many archives +# (e.g., more than 200) try increasing the --frames-per-iter option. The +# arguments --min-frames-per-chunk and --max-frames-per-chunk specify the +# minimum and maximum length (in terms of number of frames) of the features +# in the examples. +# +# To make sense of the egs script, it may be necessary to put an "exit 1" +# command immediately after stage 3. Then, inspect +# exp/<your-dir>/egs/temp/ranges.* . The ranges files specify the examples that +# will be created, and which archives they will be stored in. Each line of +# ranges.* has the following form: +# <utt-id> <local-ark-indx> <global-ark-indx> <start-frame> <end-frame> <spk-id> +# For example: +# 100304-f-sre2006-kacg-A 1 2 4079 881 23 + +# If you're satisfied with the number of archives (e.g., 50-150 archives is +# reasonable) and with the number of examples per speaker (e.g., 1000-5000 +# is reasonable) then you can let the script continue to the later stages. +# Otherwise, try increasing or decreasing the --num-repeats option. You might +# need to fiddle with --frames-per-iter. Increasing this value decreases the +# the number of archives and increases the number of examples per archive. +# Decreasing this value increases the number of archives, while decreasing the +# number of examples per archive. +if [ $stage -le 6 ]; then + echo "$0: Getting neural network training egs"; + # dump egs. + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/voxceleb2/v2/xvector-$(date +'%m_%d_%H_%M')/$egs_dir/storage $egs_dir/storage + fi + sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ + --nj 8 \ + --stage 0 \ + --frames-per-iter 1000000000 \ + --frames-per-iter-diagnostic 100000 \ + --min-frames-per-chunk 200 \ + --max-frames-per-chunk 400 \ + --num-diagnostic-archives 3 \ + --num-repeats 50 \ + "$data" $egs_dir +fi + +if [ $stage -le 7 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}') + feat_dim=$(cat $egs_dir/info/feat_dim) + + # This chunk-size corresponds to the maximum number of frames the + # stats layer is able to pool over. In this script, it corresponds + # to 100 seconds. If the input recording is greater than 100 seconds, + # we will compute multiple xvectors from the same recording and average + # to produce the final xvector. + max_chunk_size=10000 + + # The smallest number of frames we're comfortable computing an xvector from. + # Note that the hard minimum is given by the left and right context of the + # frame-level layers. + min_chunk_size=25 + mkdir -p $nnet_dir/configs + cat <<EOF > $nnet_dir/configs/network.xconfig + # please note that it is important to have input layer with the name=input + + # The frame-level layers + input dim=${feat_dim} name=input + relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512 + relu-batchnorm-layer name=tdnn2 input=Append(-2,0,2) dim=512 + relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=512 + relu-batchnorm-layer name=tdnn4 dim=512 + relu-batchnorm-layer name=tdnn5 dim=1500 + + # The stats pooling layer. Layers after this are segment-level. + # In the config below, the first and last argument (0, and ${max_chunk_size}) + # means that we pool over an input segment starting at frame 0 + # and ending at frame ${max_chunk_size} or earlier. The other arguments (1:1) + # mean that no subsampling is performed. + stats-layer name=stats config=mean+stddev(0:1:1:${max_chunk_size}) + + # This is where we usually extract the embedding (aka xvector) from. + relu-batchnorm-layer name=tdnn6 dim=512 input=stats + + # This is where another layer the embedding could be extracted + # from, but usually the previous one works better. + relu-batchnorm-layer name=tdnn7 dim=512 + output-layer name=output include-log-softmax=true dim=${num_targets} +EOF + + steps/nnet3/xconfig_to_configs.py \ + --xconfig-file $nnet_dir/configs/network.xconfig \ + --config-dir $nnet_dir/configs/ + cp $nnet_dir/configs/final.config $nnet_dir/nnet.config + + # These three files will be used by sid/nnet3/xvector/extract_xvectors.sh + echo "output-node name=output input=tdnn6.affine" > $nnet_dir/extract.config + echo "$max_chunk_size" > $nnet_dir/max_chunk_size + echo "$min_chunk_size" > $nnet_dir/min_chunk_size +fi + +dropout_schedule='0,0@0.20,0.1@0.50,0' +srand=123 +if [ $stage -le 8 ]; then + steps/nnet3/train_raw_dnn.py --stage=$train_stage \ + --cmd="$train_cmd" \ + --trainer.optimization.proportional-shrink 10 \ + --trainer.optimization.momentum=0.5 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=8 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.minibatch-size=64 \ + --trainer.srand=$srand \ + --trainer.max-param-change=2 \ + --trainer.num-epochs=3 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.shuffle-buffer-size=1000 \ + --egs.frames-per-eg=1 \ + --egs.dir="$egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --dir=$nnet_dir || exit 1; +fi + +exit 0; diff --git a/egs/voxceleb/v2/local/prepare_for_eer.py b/egs/voxceleb/v2/local/prepare_for_eer.py new file mode 100755 index 0000000000000000000000000000000000000000..2f569b70bc589c785943f95d28f3b2d72c8b20cc --- /dev/null +++ b/egs/voxceleb/v2/local/prepare_for_eer.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# +# Copyright 2015 David Snyder +# Apache 2.0. +# +# Copied from egs/sre10/v1/local/prepare_for_eer.py (commit 9cb4c4c2fb0223ee90c38d98af11305074eb7ef8) +# +# Given a trials and scores file, this script +# prepares input for the binary compute-eer. +import sys +trials = open(sys.argv[1], 'r').readlines() +scores = open(sys.argv[2], 'r').readlines() +spkrutt2target = {} +for line in trials: + spkr, utt, target = line.strip().split() + spkrutt2target[spkr+utt]=target +for line in scores: + spkr, utt, score = line.strip().split() + print("{} {}".format(score, spkrutt2target[spkr+utt])) diff --git a/egs/voxceleb/v2/local b/egs/voxceleb/v2/local2 similarity index 100% rename from egs/voxceleb/v2/local rename to egs/voxceleb/v2/local2 diff --git a/egs/voxforge/.DS_Store b/egs/voxforge/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3ef0e7fb683034f86b60a4c47b17cdeeb7baaa4f Binary files /dev/null and b/egs/voxforge/.DS_Store differ diff --git a/egs/voxforge/s5/.DS_Store b/egs/voxforge/s5/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..fd02e9645239fa017d618ee71de4b79048707e6a Binary files /dev/null and b/egs/voxforge/s5/.DS_Store differ diff --git a/egs/wsj/.DS_Store b/egs/wsj/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6ee6a89767d74d4211ea002f71327b2b72fb5169 Binary files /dev/null and b/egs/wsj/.DS_Store differ diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a8186c21bbcc852b093943a73dda04ae082949ca Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/matrix/t.bin b/src/matrix/t.bin new file mode 100644 index 0000000000000000000000000000000000000000..36e7472b1e5339f1f2806d23959b4d102d9ac6c3 Binary files /dev/null and b/src/matrix/t.bin differ diff --git a/tools/.DS_Store b/tools/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d0424173c01af5637bc0771814c2a085e13868e8 Binary files /dev/null and b/tools/.DS_Store differ