From 6b0938a61d597ca248f5e7eab97995cfbbe20d85 Mon Sep 17 00:00:00 2001 From: Meixu Song <songmeixu@outlook.com> Date: Fri, 19 Jun 2020 23:30:02 +0800 Subject: [PATCH] [scripts,egs] modify chain2 script (add ivector, diagnosis log, compute_wer.sh), and add wer results for mini_librispeech and wsj (#4001) --- .gitpod.Dockerfile | 22 + .gitpod.yml | 7 + README.md | 1 + egs/mini_librispeech/s5/cmd.sh | 1 + .../s5/local/chain2/compare_wer.sh | 137 ++++++ .../s5/local/chain2/data_prep_common.sh | 78 --- .../s5/local/chain2/tuning/run_tdnn_1a.sh | 101 ++-- egs/wsj/s5/local/chain2/compare_wer.sh | 143 ++++++ egs/wsj/s5/local/chain2/run_tdnn.sh | 1 + egs/wsj/s5/local/chain2/tuning/run_tdnn_1i.sh | 463 ++++++++++++++++++ egs/wsj/s5/run.sh | 2 +- egs/wsj/s5/steps/nnet3/chain2/train.sh | 47 +- 12 files changed, 892 insertions(+), 111 deletions(-) create mode 100644 .gitpod.Dockerfile create mode 100644 .gitpod.yml create mode 100755 egs/mini_librispeech/s5/local/chain2/compare_wer.sh delete mode 100755 egs/mini_librispeech/s5/local/chain2/data_prep_common.sh create mode 100755 egs/wsj/s5/local/chain2/compare_wer.sh create mode 120000 egs/wsj/s5/local/chain2/run_tdnn.sh create mode 100755 egs/wsj/s5/local/chain2/tuning/run_tdnn_1i.sh diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile new file mode 100644 index 000000000..53c8237d4 --- /dev/null +++ b/.gitpod.Dockerfile @@ -0,0 +1,22 @@ +FROM gitpod/workspace-full + +USER songmeixu + +# Install custom tools, runtime, etc. using apt-get +# For example, the command below would install "bastet" - a command line tetris clone: +# +# RUN sudo apt-get -q update && # sudo apt-get install -yq bastet && # sudo rm -rf /var/lib/apt/lists/* +# +# More information: https://www.gitpod.io/docs/config-docker/ + +FROM gitpod/workspace-full + +RUN sudo apt-get update \ + && sudo apt-get install -y \ + sox gfortran \ + && sudo rm -rf /var/lib/apt/lists/* + +RUN cd ~/GitHub/r-with-intel-mkl/ \ + && wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | sudo apt-key add - \ + && sudo sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' \ + && sudo apt-get update && sudo apt-get install intel-mkl-64bit diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 000000000..9e8cb2b79 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,7 @@ +image: + file: .gitpod.dockerfile + +tasks: + - init: echo "Replace me with a build script for the project." + command: echo "Replace me with something that should run on every start, or just + remove me entirely." diff --git a/README.md b/README.md index 963b82ed4..ae0ceeeeb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [](https://travis-ci.com/kaldi-asr/kaldi) +[](https://gitpod.io/#https://github.com/kaldi-asr/kaldi) Kaldi Speech Recognition Toolkit ================================ diff --git a/egs/mini_librispeech/s5/cmd.sh b/egs/mini_librispeech/s5/cmd.sh index 71dd849a9..ed453ff84 100644 --- a/egs/mini_librispeech/s5/cmd.sh +++ b/egs/mini_librispeech/s5/cmd.sh @@ -13,3 +13,4 @@ export train_cmd="queue.pl --mem 2G" export decode_cmd="queue.pl --mem 4G" export mkgraph_cmd="queue.pl --mem 8G" +export cuda_cmd="queue.pl --gpu 1" diff --git a/egs/mini_librispeech/s5/local/chain2/compare_wer.sh b/egs/mini_librispeech/s5/local/chain2/compare_wer.sh new file mode 100755 index 000000000..c14d2031f --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain2/compare_wer.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_train.final.log | grep -v xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_heldout.final.log | grep -v xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_train.final.log | grep -w xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_heldout.final.log | grep -w xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Num-params " +for x in $*; do + printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}') +done +echo \ No newline at end of file diff --git a/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh b/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh deleted file mode 100755 index 21b36cce4..000000000 --- a/egs/mini_librispeech/s5/local/chain2/data_prep_common.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash -# Copyright 2019 Daniel Povey -# 2019 Srikanth Madikeri (Idiap Research Institute) - -set -euo pipefail - -# This script is called from local/chain/tuning/run_tdnn_2a.sh and -# similar scripts. It contains the common feature preparation and -# lattice-alignment preparation parts of the chaina training. -# See those scripts for examples of usage. - -stage=0 -train_set=train_clean_5 -test_sets="dev_clean_2" -gmm=tri3b - -. ./cmd.sh -. ./path.sh -. utils/parse_options.sh - -gmm_dir=exp/${gmm} -ali_dir=exp/${gmm}_ali_${train_set}_sp - -for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do - if [ ! -f $f ]; then - echo "$0: expected file $f to exist" - exit 1 - fi -done - -# Our default data augmentation method is 3-way speed augmentation followed by -# volume perturbation. We are looking into better ways of doing this, -# e.g. involving noise and reverberation. - -if [ $stage -le 1 ]; then - # Although the nnet will be trained by high resolution data, we still have to - # perturb the normal data to get the alignment. _sp stands for speed-perturbed - echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" - utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp - echo "$0: making MFCC features for low-resolution speed-perturbed data" - steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1; - steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1; - utils/fix_data_dir.sh data/${train_set}_sp -fi - -if [ $stage -le 2 ]; then - echo "$0: aligning with the perturbed low-resolution data" - steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ - data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1 -fi - -if [ $stage -le 3 ]; then - # Create high-resolution MFCC features (with 40 cepstra instead of 13). - # this shows how you can split across multiple file-systems. - echo "$0: creating high-resolution MFCC features" - mfccdir=data/${train_set}_sp_hires/data - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - utils/create_split_dir.pl /export/fs0{1,2}/$USER/kaldi-data/mfcc/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage - fi - - for datadir in ${train_set}_sp ${test_sets}; do - utils/copy_data_dir.sh data/$datadir data/${datadir}_hires - done - - # do volume-perturbation on the training data prior to extracting hires - # features; this helps make trained nnets more invariant to test data volume. - utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1; - - for datadir in ${train_set}_sp ${test_sets}; do - steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \ - --cmd "$train_cmd" data/${datadir}_hires || exit 1; - steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1; - utils/fix_data_dir.sh data/${datadir}_hires || exit 1; - done -fi - - -exit 0 diff --git a/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh index 2311fc069..ee97f2d93 100755 --- a/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh +++ b/egs/mini_librispeech/s5/local/chain2/tuning/run_tdnn_1a.sh @@ -1,9 +1,25 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2019 Srikanth Madikeri (Idiap Research Institute) -# +# # This script is a modification of local/chain/run_tdnn.sh adapted to the chain2 recipes. +# This is a basic TDNN experiment. +# run_tdnn_1a.sh in local/chain2 but uses new kaldi recipe. + +# steps/info/chain_dir_info.pl exp/chain2/tdnn1a_sp +# exp/chain2/tdnn1a_sp: num-iters=6 nj=2..5 combine=-0.038->-0.033 (over 3) + +# local/chain2/compare_wer.sh exp/chain2/tdnn1a_sp +# System tdnn1a_sp +#WER dev_clean_2 (tgsmall) 17.50 +#WER dev_clean_2 (tglarge) 12.67 +# Final train prob -0.0626 +# Final valid prob -0.0539 +# Final train prob (xent) -1.5220 +# Final valid prob (xent) -1.3991 +# Num-params 10005600 + # Set -e here so that we catch if any executable fails immediately set -euo pipefail @@ -14,20 +30,19 @@ decode_nj=10 train_set=train_clean_5 test_sets=dev_clean_2 gmm=tri3b -srand=0 nnet3_affix= # The rest are configs specific to this script. Most of the parameters # are just hardcoded at this level, in the commands below. -affix=2c # affix for the TDNN directory name +affix=1a # affix for the TDNN directory name tree_affix= train_stage=-10 get_egs_stage=-10 +decode_iter= - +# training options # training chunk-options -chunk_width=140 -dropout_schedule='0,0@0.20,0.3@0.50,0' +chunk_width=140,100,160 xent_regularize=0.1 bottom_subsampling_factor=1 # I'll set this to 3 later, 1 is for compatibility with a broken ru. frame_subsampling_factor=3 @@ -45,6 +60,14 @@ egs_extra_right_context=5 # to group multiple speaker together in some cases). chunks_per_group=4 +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + # End configuration section. echo "$0 $@" # Print the command line for logging @@ -53,32 +76,35 @@ echo "$0 $@" # Print the command line for logging . ./path.sh . ./utils/parse_options.sh -# if ! cuda-compiled; then -# cat <<EOF && exit 1 -# This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA -# If you want to use GPUs (and have them), go to src/, and configure and make on a machine -# where "nvcc" is installed. -# EOF -# fi - -if [ $stage -le 9 ]; then - local/chain2/data_prep_common.sh \ - --train-set $train_set \ - --gmm $gmm || exit 1; +if ! cuda-compiled; then + cat <<EOF && exit 1 +This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA +If you want to use GPUs (and have them), go to src/, and configure and make on a machine +where "nvcc" is installed. +EOF fi +# The iVector-extraction and feature-dumping parts are the same as the standard +# nnet3 setup, and you can skip them by setting "--stage 11" if you have already +# run those things. +local/nnet3/run_ivector_common.sh --stage $stage \ + --train-set $train_set \ + --gmm $gmm \ + --nnet3-affix "$nnet3_affix" || exit 1; + # Problem: We have removed the "train_" prefix of our training set in # the alignment directory names! Bad! gmm_dir=exp/$gmm ali_dir=exp/${gmm}_ali_${train_set}_sp -tree_dir=exp/chaina/tree_sp${tree_affix:+_$tree_affix} +tree_dir=exp/chain2${nnet3_affix}/tree_sp${tree_affix:+_$tree_affix} lang=data/lang_chain -lat_dir=exp/chaina/${gmm}_${train_set}_sp_lats -dir=exp/chaina/tdnn${affix}_sp +lat_dir=exp/chain2${nnet3_affix}/${gmm}_${train_set}_sp_lats +dir=exp/chain2${nnet3_affix}/tdnn${affix}_sp train_data_dir=data/${train_set}_sp_hires lores_train_data_dir=data/${train_set}_sp +train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires -for f in $gmm_dir/final.mdl $train_data_dir/feats.scp \ +for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \ $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 done @@ -151,10 +177,16 @@ if [ $stage -le 14 ]; then echo "$0: creating top model" cat <<EOF > $dir/configs/default.xconfig - input name=input dim=40 + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + # the first splicing is moved before the lda layer, so no splicing here - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat - relu-renorm-layer name=tdnn1 dim=512 input=Append(-2,-1,0,1,2) + relu-renorm-layer name=tdnn1 dim=512 relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1) relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) @@ -245,11 +277,13 @@ if [ $stage -le 17 ]; then # Dump raw egs. steps/chain2/get_raw_egs.sh --cmd "$train_cmd" \ --lang "default" \ + --cmvn-opts "--norm-means=false --norm-vars=false" \ --left-context $egs_left_context \ --right-context $egs_right_context \ --frame-subsampling-factor $frame_subsampling_factor \ --alignment-subsampling-factor $frame_subsampling_factor \ - --frames-per-chunk 140,100,160 \ + --frames-per-chunk ${chunk_width} \ + --online-ivector-dir ${train_ivector_dir} \ ${train_data_dir} ${dir} ${lat_dir} ${dir}/raw_egs fi @@ -298,7 +332,8 @@ if [ $stage -le 22 ]; then --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.1 \ --max-param-change 2.0 \ --num-jobs-initial 2 --num-jobs-final 5 \ - $dir/egs $dir + --groups-per-minibatch 256,128,64 \ + $dir/egs $dir || exit 1; fi if [ $stage -le 23 ]; then @@ -310,23 +345,29 @@ if [ $stage -le 23 ]; then fi if [ $stage -le 24 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) # Do the speaker-dependent decoding pass test_sets=dev_clean_2 for data in $test_sets; do + ( nspk=$(wc -l <data/${data}_hires/spk2utt) steps/nnet3/decode.sh \ --acwt 1.0 --post-decode-acwt 10.0 \ --extra-left-context $egs_left_context \ --extra-right-context $egs_right_context \ - --frames-per-chunk 150 \ --extra-left-context-initial 0 \ --extra-right-context-final 0 \ - --nj $nspk --cmd "$decode_cmd" \ + --frames-per-chunk $frames_per_chunk \ + --nj $nspk --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \ $tree_dir/graph_tgsmall data/${data}_hires ${dir}/decode_tgsmall_${data} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_test_{tgsmall,tglarge} \ data/${data}_hires ${dir}/decode_{tgsmall,tglarge}_${data} || exit 1 + ) || touch $dir/.error & done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 fi exit 0; diff --git a/egs/wsj/s5/local/chain2/compare_wer.sh b/egs/wsj/s5/local/chain2/compare_wer.sh new file mode 100755 index 000000000..e335d92c9 --- /dev/null +++ b/egs/wsj/s5/local/chain2/compare_wer.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev93 (tgpr) " + "#WER dev93 (tg) " + "#WER dev93 (big-dict,tgpr) " + "#WER dev93 (big-dict,fg) " + "#WER eval92 (tgpr) " + "#WER eval92 (tg) " + "#WER eval92 (big-dict,tgpr)" + "#WER eval92 (big-dict,fg) ") + +for n in 0 1 2 3 4 5 6 7; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgpr_dev93 tg_dev93 bd_tgpr_dev93 bd_tgpr_dev93_fg tgpr_eval92 tg_eval92 bd_tgpr_eval92 bd_tgpr_eval92_fg) + + wer=$(< $dirname/decode_${decode_names[$n]}/scoring_kaldi/best_wer utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(< $dirname/decode_looped_${decode_names[$n]}/scoring_kaldi/best_wer utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(< ${dirname}_online/decode_${decode_names[$n]}/scoring_kaldi/best_wer utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_train.final.log | grep -v xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_heldout.final.log | grep -v xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_train.final.log | grep -w xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/diagnostic_heldout.final.log | grep -w xent | awk '{printf("%.4f", $10)}') + printf "% 10s" $prob +done +echo + +echo -n "# Num-params " +for x in $*; do + printf "% 10s" $(grep num-parameters $x/log/progress.1.log | awk '{print $2}') +done +echo diff --git a/egs/wsj/s5/local/chain2/run_tdnn.sh b/egs/wsj/s5/local/chain2/run_tdnn.sh new file mode 120000 index 000000000..deb68d515 --- /dev/null +++ b/egs/wsj/s5/local/chain2/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1i.sh \ No newline at end of file diff --git a/egs/wsj/s5/local/chain2/tuning/run_tdnn_1i.sh b/egs/wsj/s5/local/chain2/tuning/run_tdnn_1i.sh new file mode 100755 index 000000000..4a997a137 --- /dev/null +++ b/egs/wsj/s5/local/chain2/tuning/run_tdnn_1i.sh @@ -0,0 +1,463 @@ +#!/usr/bin/env bash + +# 1i is like 1h, while it introduces 'apply-cmvn-online' that does +# cmn normalization both for i-extractor and TDNN input. +# run_tdnn_1i.sh in local/chain2 uses new kaldi recipe. + +# local/chain2/compare_wer.sh exp/chain2_online_cmn/tdnn1i_sp +# System tdnn1i_sp +#WER dev93 (tgpr) 6.83 +#WER dev93 (tg) 6.53 +#WER dev93 (big-dict,tgpr) 4.71 +#WER dev93 (big-dict,fg) 4.31 +#WER eval92 (tgpr) 4.86 +#WER eval92 (tg) 4.43 +#WER eval92 (big-dict,tgpr) 2.71 +#WER eval92 (big-dict,fg) 2.27 +# Final train prob -0.0397 +# Final valid prob -0.0346 +# Final train prob (xent) -0.7091 +# Final valid prob (xent) -0.6436 +# Num-params 9476352 + +# steps/info/chain_dir_info.pl exp/chain_online_cmn/tdnn1i_sp +# exp/chain_online_cmn/tdnn1i_sp: num-iters=108 nj=2..8 num-params=8.4M dim=40+100->2880 combine=-0.044->-0.044 (over 1) xent:train/valid[71,107,final]=(-0.873,-0.660,-0.672/-0.906,-0.714,-0.734) logprob:train/valid[71,107,final]=(-0.067,-0.044,-0.044/-0.068,-0.054,-0.055) + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +train_set=train_si284 +test_sets="test_dev93 test_eval92" +gmm=tri4b # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. + +num_threads_ubm=8 + +nj_extractor=10 +# It runs a JOB with '-pe smp N', where N=$[threads*processes] +num_threads_extractor=4 +num_processes_extractor=2 + +nnet3_affix=_online_cmn # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=1i #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +reporting_email= + +# Setting 'online_cmvn' to true replaces 'apply-cmvn' by +# 'apply-cmvn-online' both for i-vector extraction and TDNN input. +# The i-vector extractor uses the config 'conf/online_cmvn.conf' for +# both the UBM and the i-extractor. The TDNN input is configured via +# '--feat.cmvn-opts' that is set to the same config, so we use the +# same cmvn for i-extractor and the TDNN input. +online_cmvn=true + +# LSTM/chain options +train_stage=-10 +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.5@0.50,0' + +# training chunk-options +chunk_width=140,100,160 +bottom_subsampling_factor=1 # I'll set this to 3 later, 1 is for compatibility with a broken ru. +frame_subsampling_factor=3 +langs="default" # list of language names + +# The amount of extra left/right context we put in the egs. Note: this could +# easily be zero, since we're not using a recurrent topology, but we put in a +# little extra context so that we have more room to play with the configuration +# without re-dumping egs. +egs_extra_left_context=5 +egs_extra_right_context=5 + +# The number of chunks (of length: see $chunk_width above) that we group +# together for each "speaker" (actually: pseudo-speaker, since we may have +# to group multiple speaker together in some cases). +chunks_per_group=4 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $*" # Print the command line for logging + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <<EOF && exit 1 +This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA +If you want to use GPUs (and have them), go to src/, and configure and make on a machine +where "nvcc" is installed. +EOF +fi + +local/nnet3/run_ivector_common.sh \ + --stage $stage --nj $nj \ + --train-set $train_set --gmm $gmm \ + --online-cmvn-iextractor $online_cmvn \ + --num-threads-ubm $num_threads_ubm \ + --nj-extractor $nj_extractor \ + --num-processes-extractor $num_processes_extractor \ + --num-threads-extractor $num_threads_extractor \ + --nnet3-affix "$nnet3_affix" + + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp +lat_dir=exp/chain2${nnet3_affix}/${gmm}_${train_set}_sp_lats +dir=exp/chain2${nnet3_affix}/tdnn${affix}_sp +train_data_dir=data/${train_set}_sp_hires +train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires +lores_train_data_dir=data/${train_set}_sp + +# note: you don't necessarily have to change the treedir name +# each time you do a new experiment-- only if you change the +# configuration in a way that affects the tree. +tree_dir=exp/chain2${nnet3_affix}/tree_a_sp +# the 'lang' directory is created by this script. +# If you create such a directory with a non-standard topology +# you should probably name it differently. +lang=data/lang_chain + +for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \ + $lores_train_data_dir/feats.scp $gmm_dir/final.mdl \ + $ali_dir/ali.1.gz $gmm_dir/final.mdl; do + [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 +done + + +if [ $stage -le 12 ]; then + echo "$0: creating lang directory $lang with chain-type topology" + # Create a version of the lang/ directory that has one state per phone in the + # topo file. [note, it really has two states.. the first one is only repeated + # once, the second one has zero or more repeats.] + if [ -d $lang ]; then + if [ $lang/L.fst -nt data/lang/L.fst ]; then + echo "$0: $lang already exists, not overwriting it; continuing" + else + echo "$0: $lang already exists and seems to be older than data/lang..." + echo " ... not sure what to do. Exiting." + exit 1; + fi + else + cp -r data/lang $lang + silphonelist=$(cat $lang/phones/silence.csl) || exit 1; + nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1; + # Use our special topology... note that later on may have to tune this + # topology. + steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor ${frame_subsampling_factor} \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 15 ]; then + # $dir/configs will contain xconfig and config files for the initial + # models. It's a scratch space used by this script but not by + # scripts called from here. + mkdir -p $dir/configs/ + # $dir/init will contain the initial models + mkdir -p $dir/init/ + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python) + tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.005" + + cat <<EOF > $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + idct-layer name=idct input=input dim=40 cepstral-lifter=22 affine-transform-file=$dir/configs/idct.mat + delta-layer name=delta input=idct + no-op-component name=input2 input=Append(delta, Scale(1.0, ReplaceIndex(ivector, t, 0))) + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 $tdnn_opts dim=1024 input=input2 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + linear-component name=prefinal-l dim=192 $linear_opts + + + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + output-layer name=output-default input=prefinal-chain include-log-softmax=false dim=$num_targets $output_opts + + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts + output-layer name=output-default-xent input=prefinal-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + if [ -f $dir/init/default_trans.mdl ]; then # checking this because it may have been copied in a previous run of the same script + copy-transition-model $tree_dir/final.mdl $dir/init/default_trans.mdl || exit 1 & + else + echo "Keeping the old $dir/init/default_trans.mdl as it already exists." + fi +fi + +init_info=$dir/init/info.txt +if [ $stage -le 16 ]; then + + if [ ! -f $dir/configs/ref.raw ]; then + echo "Expected $dir/configs/ref.raw to exist" + exit + fi + + nnet3-info $dir/configs/ref.raw > $dir/configs/temp.info + model_left_context=$(grep -F 'left-context' $dir/configs/temp.info | awk '{print $2}') + model_right_context=$(grep -F 'right-context' $dir/configs/temp.info | awk '{print $2}') + cat >$init_info <<EOF +frame_subsampling_factor $frame_subsampling_factor +langs $langs +model_left_context $model_left_context +model_right_context $model_right_context +EOF + rm $dir/configs/temp.info +fi + +# Make phone LM and denominator and normalization FST +if [ $stage -le 17 ]; then + echo "$0: Making Phone LM and denominator and normalization FST" + mkdir -p $dir/den_fsts/log + + # We may later reorganize this. + cp $tree_dir/tree $dir/default.tree + + echo "$0: creating phone language-model" + $train_cmd $dir/den_fsts/log/make_phone_lm_default.log \ + chain-est-phone-lm --num-extra-lm-states=2000 \ + "ark:gunzip -c $gmm_dir/ali.*.gz | ali-to-phones $gmm_dir/final.mdl ark:- ark:- |" \ + $dir/den_fsts/default.phone_lm.fst + + echo "$0: creating denominator FST" + $train_cmd $dir/den_fsts/log/make_den_fst.log \ + chain-make-den-fst $dir/default.tree $dir/init/default_trans.mdl $dir/den_fsts/default.phone_lm.fst \ + $dir/den_fsts/default.den.fst $dir/den_fsts/default.normalization.fst || exit 1; +fi + +model_left_context=$(awk '/^model_left_context/ {print $2;}' $dir/init/info.txt) +model_right_context=$(awk '/^model_right_context/ {print $2;}' $dir/init/info.txt) +if [ -z $model_left_context ]; then + echo "ERROR: Cannot find entry for model_left_context in $dir/init/info.txt" +fi +if [ -z $model_right_context ]; then + echo "ERROR: Cannot find entry for model_right_context in $dir/init/info.txt" +fi +# Note: we add frame_subsampling_factor/2 so that we can support the frame +# shifting that's done during training, so if frame-subsampling-factor=3, we +# train on the same egs with the input shifted by -1,0,1 frames. This is done +# via the --frame-shift option to nnet3-chain-copy-egs in the script. +egs_left_context=$((model_left_context+(frame_subsampling_factor/2)+egs_extra_left_context)) +egs_right_context=$((model_right_context+(frame_subsampling_factor/2)+egs_extra_right_context)) + +for d in $dir/raw_egs $dir/processed_egs; do + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $d/storage ] ; then + mkdir -p $d + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$d/storage $d/storage + fi +done + +if [ $stage -le 18 ]; then + echo "$0: about to dump raw egs." + # Dump raw egs. + steps/chain2/get_raw_egs.sh --cmd "$train_cmd" \ + --lang "default" \ + --cmvn-opts "--config=conf/online_cmvn.conf" \ + --left-context $egs_left_context \ + --right-context $egs_right_context \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor $frame_subsampling_factor \ + --frames-per-chunk ${chunk_width} \ + --online-ivector-dir ${train_ivector_dir} \ + ${train_data_dir} ${dir} ${lat_dir} ${dir}/raw_egs +fi + +if [ $stage -le 19 ]; then + echo "$0: about to process egs" + steps/chain2/process_egs.sh --cmd "$train_cmd" \ + --num-repeats 1 \ + ${dir}/raw_egs ${dir}/processed_egs +fi + +if [ $stage -le 20 ]; then + echo "$0: about to randomize egs" + steps/chain2/randomize_egs.sh --frames-per-job 5000000 \ + ${dir}/processed_egs ${dir}/egs +fi + +if [ $stage -le 21 ]; then + echo "$0: Preparing initial acoustic model" + if [ -f $dir/configs/init.config ]; then + $train_cmd ${dir}/log/add_first_layer.log \ + nnet3-init --srand=${srand} ${dir}/configs/init.raw \ + ${dir}/configs/final.config ${dir}/init/default.raw || exit 1 + else + $train_cmd ${dir}/log/init_model.log \ + nnet3-init --srand=${srand} ${dir}/configs/final.config ${dir}/init/default.raw || exit 1 + fi + + $train_cmd $dir/log/init_mdl.log \ + nnet3-am-init ${dir}/init/default_trans.mdl $dir/init/default.raw $dir/init/default.mdl || exit 1 +fi + +if [ $stage -le 22 ]; then + echo "$0: about to train model" + steps/chain2/train.sh \ + --stage $train_stage --cmd "$decode_cmd" \ + --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.1 \ + --max-param-change 2.0 \ + --dropout-schedule ${dropout_schedule} \ + --num-jobs-initial 2 --num-jobs-final 8 \ + --initial-effective-lrate 0.0005 \ + --final-effective-lrate 0.00005 \ + --num-epochs 10 \ + --groups-per-minibatch 128,64 \ + $dir/egs $dir || exit 1; +fi + +if [ $stage -le 23 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh \ + data/lang_test_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_tgpr \ + $tree_dir $tree_dir/graph_tgpr || exit 1; + + utils/lang/check_phones_compatible.sh \ + data/lang_test_bd_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + $tree_dir $tree_dir/graph_bd_tgpr || exit 1; +fi + +if [ $stage -le 24 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l <data/${data}_hires/spk2utt) + for lmtype in tgpr bd_tgpr; do + steps/nnet3/decode.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $egs_left_context --extra-right-context $egs_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk $frames_per_chunk \ + --nj $nspk --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${data}_hires \ + $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 + done + steps/lmrescore.sh \ + --self-loop-scale 1.0 \ + --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ + data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_test_bd_{tgpr,fgconst} \ + data/${data}_hires ${dir}/decode_${lmtype}_${data_affix}{,_fg} || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + +# Not testing the 'looped' decoding separately, because for +# TDNN systems it would give exactly the same results as the +# normal decoding. + +if $test_online_decoding && [ $stage -le 25 ]; then + cp $dir/default.tree $dir/tree + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l <data/${data}_hires/spk2utt) + # note: we just give it "data/${data}" as it only uses the wav.scp, the + # feature type does not matter. + for lmtype in tgpr bd_tgpr; do + steps/online/nnet3/decode.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nspk --cmd "$decode_cmd" \ + $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 + done + steps/lmrescore.sh \ + --self-loop-scale 1.0 \ + --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ + data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_test_bd_{tgpr,fgconst} \ + data/${data}_hires ${dir}_online/decode_${lmtype}_${data_affix}{,_fg} || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/wsj/s5/run.sh b/egs/wsj/s5/run.sh index c4e9326a3..400130c91 100755 --- a/egs/wsj/s5/run.sh +++ b/egs/wsj/s5/run.sh @@ -322,7 +322,7 @@ fi if [ $stage -le 7 ]; then # Caution: this part needs a GPU. - local/chain/run_tdnn.sh + local/chain2/run_tdnn.sh fi exit 0; diff --git a/egs/wsj/s5/steps/nnet3/chain2/train.sh b/egs/wsj/s5/steps/nnet3/chain2/train.sh index 3acd6962a..e4159b556 100755 --- a/egs/wsj/s5/steps/nnet3/chain2/train.sh +++ b/egs/wsj/s5/steps/nnet3/chain2/train.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2019 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. # Copyright 2019 Idiap Research Institute (Author: Srikanth Madikeri). Apache 2.0. @@ -194,6 +194,13 @@ while [ $x -lt $num_iters ]; do done fi + if [ $x -gt 0 ]; then + # This doesn't use the egs, it only shows the relative change in model parameters. + $cmd $dir/log/progress.$x.log \ + nnet3-show-progress --use-gpu=no $dir/$(($x-1)).raw $dir/${x}.raw '&&' \ + nnet3-info $dir/${x}.raw & + fi + cache_io_opt="--write-cache=$dir/cache.$next_x" if [ $x -gt 0 -a -f $dir/cache.$x ]; then cache_io_opt="$cache_io_opt --read-cache=$dir/cache.$x" @@ -210,7 +217,7 @@ while [ $x -lt $num_iters ]; do $cmd $gpu_cmd_opt $dir/log/train.$x.$j.log \ nnet3-chain-train2 \ $parallel_train_opts $verbose_opt \ - --out-of-range-regularize=$out_of_range_regularize \ + --out-of-range-regularize=$out_of_range_regularize \ $cache_io_opt \ --use-gpu=$use_gpu --apply-deriv-weights=$apply_deriv_weights \ --leaky-hmm-coefficient=$leaky_hmm_coefficient --xent-regularize=$xent_regularize \ @@ -278,7 +285,43 @@ if [ $stage -le $num_iters ]; then nnet3-am-init $dir/0_trans.mdl - $dir/final.mdl fi + # Compute the probability of the final, combined model with + # the same subset we used for the previous diagnostic processes, as the + # different subsets will lead to different probs. + [ -f $dir/.error_diagnostic ] && rm $dir/.error_diagnostic + for name in train heldout; do + egs_opts= + if $multilingual_eg; then + weight_rspecifier=$egs_dir/diagnostic_${name}.weight.ark + [[ -f $weight_rspecifier ]] && egs_opts="--weights=ark:$weight_rspecifier" + fi + $cmd $gpu_cmd_opt $dir/log/diagnostic_${name}.final.log \ + nnet3-chain-train2 --use-gpu=$use_gpu \ + --leaky-hmm-coefficient=$leaky_hmm_coefficient \ + --xent-regularize=$xent_regularize \ + --out-of-range-regularize=$out_of_range_regularize \ + $l2_regularize_opt \ + --print-interval=10 \ + $dir/final.raw $den_fst_dir \ + "ark:nnet3-chain-copy-egs $egs_opts scp:$egs_dir/${name}_subset.scp ark:- | nnet3-chain-merge-egs $multilingual_eg_opts --minibatch-size=1:64 ark:- ark:-|" \ + $dir/final_${name}.mdl || touch $dir/.error_diagnostic & + done + + if [ -f $dir/final_train.mdl ]; then + rm $dir/final_{train,heldout}.mdl + fi fi +if [ ! -f $dir/final.mdl ]; then + echo "$0: $dir/final.mdl does not exist." + # we don't want to clean up if the training didn't succeed. + exit 1; +fi + +sleep 2 + echo "$0: done" + +steps/info/chain_dir_info.pl $dir + exit 0 -- GitLab