Commit 7a670411 authored by poppto72658's avatar poppto72658

Initial commit

parents
File added
# Setting local system jobs (local CPU - no external clusters)
export train_cmd=run.pl
export decode_cmd=run.pl
\ No newline at end of file
File added
first_beam=10.0
beam=13.0
lattice_beam=6.0
\ No newline at end of file
--use-energy=false
\ No newline at end of file
File added
one eight seven
four six three
four six tree
five two nine
five three nine
five five three
five six tree
nine eight one
one nine five
two five seven
three nine five
three nine eight
five six seven
six four one
six four four
nine five four
one nine nine
two seven three
four five six
five six seven
six seven seven
eight four three
nine one one
seven six three
one nine five
one nine seven
three four four
five two seven
five two nine
six six six
seven six eight
nine three five
\ No newline at end of file
!SIL sil
<UNK> spn
eight ey t
five f ay v
four f ao r
nine n ay n
one hh w ah n
one w ah n
seven s eh v ah n
six s ih k s
three th r iy
two t uw
zero z ih r ow
zero z iy r ow
\ No newline at end of file
ah
ao
ay
eh
ey
f
hh
ih
iy
k
n
ow
r
s
t
th
uw
w
v
z
\ No newline at end of file
sil
\ No newline at end of file
sil
spn
\ No newline at end of file
\data\
ngram 1=12
\1-grams:
-0.6245009 </s>
-99 <s>
-1.364864 eight
-0.9125659 five
-1.101622 four
-0.9669235 nine
-1.143015 one
-1.063834 seven
-0.9669235 six
-1.101622 three
-1.665893 tree
-1.364864 two
\end\
-pau-
</s>
<s>
<unk>
eight
five
four
nine
one
seven
six
three
tree
two
kenny m
\ No newline at end of file
kenny kenny_1_8_7 kenny_4_6_3 kenny_4_6_3 kenny_5_2_9 kenny_5_3_9 kenny_5_5_3 kenny_5_6_3 kenny_9_8_1
kenny_1_8_7 one eight seven
kenny_4_6_3 four six three
kenny_4_6_3 four six tree
kenny_5_2_9 five two nine
kenny_5_3_9 five three nine
kenny_5_5_3 five five three
kenny_5_6_3 five six tree
kenny_9_8_1 nine eight one
\ No newline at end of file
kenny_1_8_7 kenny
kenny_4_6_3 kenny
kenny_4_6_3 kenny
kenny_5_2_9 kenny
kenny_5_3_9 kenny
kenny_5_5_3 kenny
kenny_5_6_3 kenny
kenny_9_8_1 kenny
\ No newline at end of file
kenny_1_8_7 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/1_8_7.wav
kenny_4_6_3 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/4_6_3.wav
kenny_4_6_3 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/4_6_3.wav
kenny_5_2_9 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/5_2_9.wav
kenny_5_3_9 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/5_3_9.wav
kenny_5_5_3 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/5_5_3.wav
kenny_5_6_3 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/5_6_3.wav
kenny_9_8_1 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/test/kenny/9_8_1.wav
\ No newline at end of file
lisa f
tobi m
tohm m
\ No newline at end of file
lisa lisa_1_9_5 lisa_2_5_7 lisa_3_9_5 lisa_3_9_8 lisa_5_6_7 lisa_6_4_1 lisa_6_4_4 lisa_9_5_4
tobi tobi_1_9_9 tobi_2_7_3 tobi_4_5_6 tobi_5_6_7 tobi_6_7_7 tobi_8_4_3 tobi_9_1_1 tobi_7_6_3
tohm tohm_1_9_5 tohm_1_9_7 tohm_3_4_4 tohm_5_2_7 tohm_5_2_9 tohm_6_6_6 tohm_7_6_8 tohm_9_3_5
lisa_1_9_5 one nine five
lisa_2_5_7 two five seven
lisa_3_9_5 three nine five
lisa_3_9_8 three nine eight
lisa_5_6_7 five six seven
lisa_6_4_1 six four one
lisa_6_4_4 six four four
lisa_9_5_4 nine five four
tobi_1_9_9 one nine nine
tobi_2_7_3 two seven three
tobi_4_5_6 four five six
tobi_5_6_7 five six seven
tobi_6_7_7 six seven seven
tobi_8_4_3 eight four three
tobi_9_1_1 nine one one
tobi_7_6_3 seven six three
tohm_1_9_5 one nine five
tohm_1_9_7 one nine seven
tohm_3_4_4 three four four
tohm_5_2_7 five two seven
tohm_5_2_9 five two nine
tohm_6_6_6 six six six
tohm_7_6_8 seven six eight
tohm_9_3_5 nine three five
\ No newline at end of file
lisa_1_9_5 lisa
lisa_2_5_7 lisa
lisa_3_9_5 lisa
lisa_3_9_8 lisa
lisa_5_6_7 lisa
lisa_6_4_1 lisa
lisa_6_4_4 lisa
lisa_9_5_4 lisa
tobi_1_9_9 tobi
tobi_2_7_3 tobi
tobi_4_5_6 tobi
tobi_5_6_7 tobi
tobi_6_7_7 tobi
tobi_8_4_3 tobi
tobi_9_1_1 tobi
tobi_7_6_3 tobi
tohm_1_9_5 tohm
tohm_1_9_7 tohm
tohm_3_4_4 tohm
tohm_5_2_7 tohm
tohm_5_2_9 tohm
tohm_6_6_6 tohm
tohm_7_6_8 tohm
tohm_9_3_5 tohm
\ No newline at end of file
lisa_1_9_5 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/1_9_5.wav
lisa_2_5_7 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/2_5_7.wav
lisa_3_9_5 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/3_9_5.wav
lisa_3_9_8 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/3_9_8.wav
lisa_5_6_7 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/5_6_7.wav
lisa_6_4_1 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/6_4_1.wav
lisa_6_4_4 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/6_4_4.wav
lisa_9_5_4 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/lisa/9_5_4.wav
tobi_1_9_9 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/1_9_9.wav
tobi_2_7_3 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/2_7_3.wav
tobi_4_5_6 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/4_5_6.wav
tobi_5_6_7 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/5_6_7.wav
tobi_6_7_7 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/6_7_7.wav
tobi_8_4_3 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/8_4_3.wav
tobi_9_1_1 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/9_1_1.wav
tobi_7_6_3 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tobi/7_6_3.wav
tohm_1_9_5 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/1_9_5.wav
tohm_1_9_7 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/1_9_7.wav
tohm_3_4_4 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/3_4_4.wav
tohm_5_2_7 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/5_2_7.wav
tohm_5_2_9 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/5_2_9.wav
tohm_6_6_6 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/6_6_6.wav
tohm_7_6_8 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/7_6_8.wav
tohm_9_3_5 /Users/toby/Desktop/kaldi/kaldi/egs/digits/digits_audio/train/tohm/9_3_5.wav
\ No newline at end of file
#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
[ -f ./path.sh ] && . ./path.sh
# begin configuration section.
cmd=run.pl
min_lmwt=7
max_lmwt=17
#end configuration section.
[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
echo " Options:"
echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
echo " --min_lmwt <int> # minumum LM-weight for lattice rescoring "
echo " --max_lmwt <int> # maximum LM-weight for lattice rescoring "
exit 1;
fi
data=$1
lang_or_graph=$2
dir=$3
symtab=$lang_or_graph/words.txt
for f in $symtab $dir/lat.1.gz $data/text; do
[ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
done
mkdir -p $dir/scoring/log
cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
lattice-best-path --lm-scale=LMWT --word-symbol-table=$symtab \
"ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1;
# Note: the double level of quoting for the sed command
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
cat $dir/scoring/LMWT.tra \| \
utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
compute-wer --text --mode=present \
ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1;
# Show results
for f in $dir/wer_*; do echo $f; egrep '(WER)|(SER)' < $f; done
exit 0;
# Defining Kaldi root directory
export KALDI_ROOT=`pwd`/../..
echo kaldiroot $KALDI_ROOT
echo with lmbin $KALDI_ROOT/src/lmbin/
#ls -la $KALDI_ROOT/src/lmbin/
# Setting paths to useful tools
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH
# Defining audio data directory (modify it for your installation directory!)
export DATA_ROOT="/home/tobi/kaldi/egs/digits/digits_audio"
# Enable SRILM
. $KALDI_ROOT/tools/env.sh
# Variable needed for proper data sorting
export LC_ALL=C
#!/bin/bash
. ./path.sh || exit 1
. ./cmd.sh || exit 1
nj=1 # number of parallel jobs - 1 is perfect for such a small dataset
lm_order=1 # language model order (n-gram quantity) - 1 is enough for digits grammar
# Safety mechanism (possible running this script with modified arguments)
. utils/parse_options.sh || exit 1
[[ $# -ge 1 ]] && { echo "Wrong arguments!"; exit 1; }
# Removing previously created data (from last run.sh execution)
rm -rf exp mfcc data/train/spk2utt data/train/cmvn.scp data/train/feats.scp data/train/split1 data/test/spk2utt data/test/cmvn.scp data/test/feats.scp data/test/split1 data/local/lang data/lang data/local/tmp data/local/dict/lexiconp.txt
echo
echo "===== PREPARING ACOUSTIC DATA ====="
echo
# Needs to be prepared by hand (or using self written scripts):
#
# spk2gender [<speaker-id> <gender>]
# wav.scp [<uterranceID> <full_path_to_audio_file>]
# text [<uterranceID> <text_transcription>]
# utt2spk [<uterranceID> <speakerID>]
# corpus.txt [<text_transcription>]
# Making spk2utt files
utils/utt2spk_to_spk2utt.pl data/train/utt2spk > data/train/spk2utt
utils/utt2spk_to_spk2utt.pl data/test/utt2spk > data/test/spk2utt
echo
echo "===== FEATURES EXTRACTION ====="
echo
# Making feats.scp files
mfccdir=mfcc
# Uncomment and modify arguments in scripts below if you have any problems with data sorting
# utils/validate_data_dir.sh data/train # script for checking prepared data - here: for data/train directory
# utils/fix_data_dir.sh data/train # tool for data proper sorting if needed - here: for data/train directory
steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir
steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" data/test exp/make_mfcc/test $mfccdir
# Making cmvn.scp files
steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir
steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir
echo
echo "===== PREPARING LANGUAGE DATA ====="
echo
# Needs to be prepared by hand (or using self written scripts):
#
# lexicon.txt [<word> <phone 1> <phone 2> ...]
# nonsilence_phones.txt [<phone>]
# silence_phones.txt [<phone>]
# optional_silence.txt [<phone>]
# Preparing language data
utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang
echo
echo "===== LANGUAGE MODEL CREATION ====="
echo "===== MAKING lm.arpa ====="
echo
loc=`which ngram-count`;
if [ -z $loc ]; then
if uname -a | grep 64 >/dev/null; then
sdir=$KALDI_ROOT/tools/srilm/bin/i686-m64
else
sdir=$KALDI_ROOT/tools/srilm/bin/i686
fi
if [ -f $sdir/ngram-count ]; then
echo "Using SRILM language modelling tool from $sdir"
export PATH=$PATH:$sdir
else
echo "SRILM toolkit is probably not installed.
Instructions: tools/install_srilm.sh"
exit 1
fi
fi
local=data/local
mkdir $local/tmp
ngram-count -order $lm_order -write-vocab $local/tmp/vocab-full.txt -wbdiscount -text $local/corpus.txt -lm $local/tmp/lm.arpa
echo
echo "===== MAKING G.fst ====="
echo
lang=data/lang
arpa2fst --disambig-symbol=#0 --read-symbol-table=$lang/words.txt $local/tmp/lm.arpa $lang/G.fst
echo
echo "===== MONO TRAINING ====="
echo
steps/train_mono.sh --nj $nj --cmd "$train_cmd" data/train data/lang exp/mono || exit 1
echo
echo "===== MONO DECODING ====="
echo
utils/mkgraph.sh --mono data/lang exp/mono exp/mono/graph || exit 1
steps/decode.sh --config conf/decode.config --nj $nj --cmd "$decode_cmd" exp/mono/graph data/test exp/mono/decode
echo
echo "===== MONO ALIGNMENT ====="
echo
steps/align_si.sh --nj $nj --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_ali || exit 1
echo
echo "===== TRI1 (first triphone pass) TRAINING ====="
echo
steps/train_deltas.sh --cmd "$train_cmd" 2000 11000 data/train data/lang exp/mono_ali exp/tri1 || exit 1
echo
echo "===== TRI1 (first triphone pass) DECODING ====="
echo
utils/mkgraph.sh data/lang exp/tri1 exp/tri1/graph || exit 1
steps/decode.sh --config conf/decode.config --nj $nj --cmd "$decode_cmd" exp/tri1/graph data/test exp/tri1/decode
echo
echo "===== run.sh script is finished ====="
echo
#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Copyright 2013 GoVivace Inc (Author: Nagendra Goel)
# Apache 2.0
# Computes training alignments; assumes features are (LDA+MLLT or delta+delta-delta)
# + fMLLR (probably with SAT models).
# It first computes an alignment with the final.alimdl (or the final.mdl if final.alimdl
# is not present), then does 2 iterations of fMLLR estimation.
# If you supply the --use-graphs option, it will use the training
# graphs from the source directory (where the model is). In this
# case the number of jobs must match the source directory.
# Begin configuration section.
stage=0
nj=4
cmd=run.pl
use_graphs=false
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
basis_fmllr_opts="--fmllr-min-count=22 --num-iters=10 --size-scale=0.2 --step-size-iters=3"
beam=10
retry_beam=40
boost_silence=1.5 # factor by which to boost silence during alignment.
fmllr_update_type=full
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_basis_fmllr.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_basis_fmllr.sh data/train data/lang exp/tri4 exp/tri4_ali"
echo "Note: <src-dir> should ideally have been trained by steps/train_sat_basis.sh, or"
echo "if a non-SAT system (not recommended), the basis should have been computed"
echo "by steps/get_fmllr_basis.sh."
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --fmllr-update-type (full|diag|offset|none) # default full."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
graphdir=$dir
oov=`cat $lang/oov.int` || exit 1;
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
sdata=$data/split$nj
mkdir -p $dir/log
echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
for f in $srcdir/tree $srcdir/final.mdl $srcdir/fmllr.basis \
$data/feats.scp $lang/phones.txt; do
if [ ! -f $f ]; then
echo "$0: expected file $f to exist"
exit 1
fi
done
utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
cp $lang/phones.txt $dir || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
cp $srcdir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
cp $srcdir/delta_opts $dir 2>/dev/null
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
## Set up model and alignment model.
mdl=$srcdir/final.mdl
if [ -f $srcdir/final.alimdl ]; then
alimdl=$srcdir/final.alimdl
else
alimdl=$srcdir/final.mdl
fi
[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
## Work out where we're getting the graphs from.
if $use_graphs; then
[ "$nj" != "`cat $srcdir/num_jobs`" ] && \
echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1;
[ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1;
graphdir=$srcdir
else
graphdir=$dir
if [ $stage -le 0 ]; then
echo "$0: compiling training graphs"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
fi
if [ $stage -le 1 ]; then
echo "$0: aligning data in $data using $alimdl and speaker-independent features."
$cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
"ark:gunzip -c $graphdir/fsts.JOB.gz|" "$sifeats"