123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423 |
- __all__= ['load']
- import sys,threading,collections,time
- import TLK
- import lmodel,oepmodel,segmodel
- import logging as Log
- Hyp= collections.namedtuple('Hyp',['var','novar','err',
- 'score','nframes',
- 'eos'])
- class Recogniser:
- def __init__(self,models,params,
- # uppercase_tbl,uppercase_tbl_bi,
- sil_word,sil_sym,fea_freq,mng):
- args= {
- 'ifreq' : 16000,
- 'bsize' : 400,
- 'amodel' : models['amodel'],
- 'lm' : models['lm'],
- 'params' : params,
- 'output' : self,
- }
- self._fea_freq= fea_freq
- if sil_word!=None:
- args['sil_word']= sil_word
- if sil_sym!=None:
- args['sil_sym']= sil_sym
- if models['dnn']!=None: args['dnn']= models['dnn']
- if models['dlm']!=None: args['dlm']= models['dlm']
- if models['mustd']!=None: args['mustd']= models['mustd']
- if models['oep-factory']!=None:
- oep= models['oep-factory'](models['oep-model'])
- args['oep']= oep
- self.reco= TLK.ORecogniser(**args)
- if models['seg-factory']!=None:
- self.seg= models['seg-factory'](models['seg-model'])
- else: self.seg= None
- self._mng= mng
- self._cv= threading.Condition()
- def reset(self):
- self._err= False
- self._hyps= []
- self._prev= None
- if self.seg is not None:
- self.seg.reset()
- # Per a gestionar tokens insertats el que vaig a fer es tindre
- # una llista de llistes. En la primera llista estaran els
- # tokens del segment que s'està processant actualment i en
- # l'última els tokens de l'últim segment. Cada token serà una
- # tupla paraula i número de frames on es va insertar.
- self._itokens= [[]]
- self._inframes= 0.0 # Numero de frames del segment actual
- # insertats en el reconeixedor
- def __register_itoken_nosec(self,token):
- token= token.strip()
- if token=='': return
- nframes= int(self._inframes) if self._inframes>0.0 else None
- self._itokens[-1].append((token,nframes))
- def __register_itoken(self,token):
- try:
- self.__register_itoken_nosec(token)
- except:
- pass
-
- def __register_itoken_eos(self):
- if self._inframes>0.0:
- self._itokens.append([])
- self._inframes= 0.0
- def __merge_itokens_nosec(self,code,hyp):
- # Comprovacions inicials i eliminació
- itokens= self._itokens[0]
- if code==TLK.OUT_RES: self._itokens.pop(0)
- if itokens==[]: return hyp
- # Inserta
- hyp,pos_var= hyp[0],hyp[1]
- pref,novar,var= [],list(hyp[:pos_var]),list(hyp[pos_var:])
- # -> Inserta inici segment
- while len(itokens)>0 and itokens[0][1] is None:
- tok= itokens.pop(0)
- pref.append((tok[0],0,0))
- # --> Processa novar
- if len(itokens)>0 and len(novar)>0:
- i,new_novar= 0,[]
- while len(itokens)>0 and i<len(novar):
- w,b,e= novar[i]
- epos= b+e
- while len(itokens)>0 and itokens[0][1]<epos:
- tok= itokens.pop(0)
- new_novar.append((tok[0],b,0))
- new_novar.append((w,b,e))
- i+= 1
- while i<len(novar):
- new_novar.append(novar[i])
- i+= 1
- else: new_novar= novar
- # --> Processa var
- if len(itokens)>0 and len(var)>0:
- i,j,new_var= 0,0,[]
- while len(itokens)>0 and i<len(var):
- w,b,e= var[i]
- epos= b+e
- while j<len(itokens) and itokens[j][1]<epos:
- new_var.append((itokens[j][0],b,0))
- j+= 1
- new_var.append((w,b,e))
- i+= 1
- while i<len(var):
- new_var.append(var[i])
- i+= 1
- else: new_var= var
- # --> Afegeix pendents si code==TLK.OUT_RES
- if code==TLK.OUT_RES and len(itokens)>0:
- assert var==[]
- pos= new_novar[-1][1]+new_novar[-1][2] if len(new_novar)>0 else -1
- for tok in itokens:
- tmp_pos= pos if pos!=-1 else tok[1]
- new_novar.append((tok[0],tmp_pos,0))
- return pref+new_novar+new_var,len(pref)+len(new_novar)
- # end __merge_itokens_nosec
- def __merge_itokens(self,code,hyp):
- try:
- return self.__merge_itokens_nosec(code,hyp)
- except:
- return hyp
-
- # None denotes end of segment
- def feed(self,data=None):
- if data==None:
- self.reco.feed()
- elif not self._err:
- if type(data)==str:
- self.__register_itoken(data)
- elif len(data)==0:
- self.reco.split()
- self.__register_itoken_eos()
- else:
- self._inframes+= (len(data)/32000)*self._fea_freq
- self.reco.feed(data)
- def __write_hyp(self,novar,var,score,nframes,err,eos):
- h= Hyp(novar=novar,var=var,err=err,
- score=score,nframes=nframes,
- eos=eos)
- with self._cv:
- self._hyps.append(h)
- self._cv.notify_all()
-
- def process_out(self,code,hyp,stats):
- #def totxt(rec,last_word):
- # aux= []
- # for x in rec:
- # w= x[0].lower()
- # aux.append(toupper(w,last_word))
- # last_word= w
- # ret= ' '.join(aux)
- # ret= ret.replace('[hesitation]','').replace('<unk>','')
- # return ret
- def totxt(rec):
- ret= ' '.join([x[0] for x in rec])
- ret= ret.replace('[hesitation]','').replace('<unk>','')
- return ret
- hyp,pos_var= hyp[0],hyp[1]
- novar= hyp[:pos_var]
- #new_last_word= novar[-1][0] if len(novar)>0 else self.last_word
- #novar= totxt(novar,self.last_word)
- #var= totxt(hyp[pos_var:],new_last_word)
- #self.last_word= new_last_word
- novar= totxt(novar)
- var= totxt(hyp[pos_var:])
- if novar=='' and var==self._prev and code==TLK.OUT_HYP: return
- self._prev= var
- score,nframes= stats
- self.__write_hyp(novar,var,score,nframes,False,
- code==TLK.OUT_RES)
-
- # Output method
- def write(self,code,hyp,stats):
- hyp= self.__merge_itokens(code,hyp)
- if code==TLK.OUT_ERR:
- txt= self._prev if self._prev!=None else ''
- self.__write_hyp(txt,'',0,0,False,False)
- self._err= True
- return
- if code==TLK.OUT_END:
- self.__write_hyp(None,None,0,0,self._err,False)
- return
- if self.seg is not None:
- hyp= self.seg(hyp)
- if code==TLK.OUT_RES:
- hyp0,pos_var0= hyp
- hyp1= self.seg.eos()
- if hyp1 is not None:
- hyp1,pos_var1= hyp1
- hyp= hyp0[:pos_var0]+hyp1[:pos_var1]
- hyp= hyp,len(hyp)
- self.seg.reset()
- self.process_out(code,hyp,stats)
- @property
- def output(self):
- end= False
- while not end:
- with self._cv:
- self._cv.wait_for(lambda: len(self._hyps)>0)
- ret= self._hyps.pop(0)
- end= ret.novar is None
- yield ret
-
- # end Recogniser
- class RecogniserManager:
- def __init__(self,name,tag,date,lang,
- models,params,nreco,
- sil_word,sil_sym,fea_freq):
- self.v= []
- self.vv= [] # Reference copy
- #self.cmllr= params.cmllr_enabled
- for n in range(0,nreco):
- reco= Recogniser(models,params,
- sil_word,sil_sym,
- fea_freq,self)
- self.v.append(reco)
- self.vv.append(reco)
- self.name= name
- self.tag= tag
- self.date= date
- self.lang= lang
- self._lock= threading.Lock()
- self._models= models
- self._enabled= True
- def __len__(self):
- return len(self.vv)
-
- def get_reco(self):
- with self._lock:
- if self.v==[]: return None
- if not self._enabled: return None
- ret= self.v.pop()
- ret.reset()
- #ret.reset_cmllr()
- Log.info(('Recogniser taken from %s '+
- '(available: %d of %d)')%(self.name,
- len(self.v),
- len(self.vv)))
- return ret
-
- def append(self,reco):
- with self._lock:
- self.v.append(reco)
- Log.info(('Recogniser from %s realeased'+
- '(available: %d of %d)')%(self.name,
- len(self.v),
- len(self.vv)))
-
- def set_enabled(self,value):
- with self._lock:
- self._enabled= value
-
- @property
- def num_recos_available(self):
- with self._lock:
- return len(self.v)
- @property
- def enabled(self):
- with self._lock:
- return self._enabled
- # end RecogniserManager
- def prepare_params(cfg):
- params= TLK.OParameters()
- aux= cfg.get('hp')
- if aux!=None: params.hp= int(aux)
- aux= cfg.get('hp_min')
- if aux!=None: params.hp_min= int(aux)
- aux= cfg.get('wep')
- if aux!=None: params.wep= float(aux)
- aux= cfg.get('beam')
- if aux!=None: params.beam= float(aux)
- aux= cfg.get('dynormthr')
- if aux!=None: params.dynormthr= int(aux)
- aux= cfg.get('meannorm')
- if aux!=None: params.meannorm= bool(aux)
- aux= cfg.get('cmllr')
- if aux!=None: params.cmllr_enabled= bool(aux)
- aux= cfg.get('cmllr_nframes_step0')
- if aux!=None: params.cmllr_nframes_step0= int(aux)
- aux= cfg.get('cmllr_nframes')
- if aux!=None: params.cmllr_nframes= int(aux)
- aux= cfg.get('cmllr_niters')
- if aux!=None: params.cmllr_niters= int(aux)
- aux= cfg.get('numceps')
- if aux!=None: params.numceps= int(aux)
- aux= cfg.get('numchans')
- if aux!=None: params.numchans= int(aux)
- aux= cfg.get('accwindow')
- if aux!=None: params.accwindow= int(aux)
- aux= cfg.get('deltawindow')
- if aux!=None: params.deltawindow= int(aux)
- aux= cfg.get('hp_lm')
- if aux!=None: params.hp_lm= int(aux)
- aux= cfg.get('order')
- if aux!=None: params.order= int(aux)
- aux= cfg.get('ftype')
- if aux!=None: params.ftype= str(aux)
- aux= cfg.get('mustdnorm')
- if aux!=None: params.mustdnorm= bool(aux)
- aux= cfg.get('gsf')
- gsf= float(aux) if aux!=None else 1.0
- params.gsf= gsf
- aux= cfg.get('wip')
- wip= float(aux) if aux!=None else 0.0
- params.wip= wip
- params.amla= cfg.get('amla-fs')!=None
- params.sil= cfg.get('sil_sym','SP')
-
- # IMPORTANT !!!!! Deshabilita l'heurístic de segmentació
- params.sil_length= 20000
- params.sil_thr= 0.0
- return params
- # end prepare_params
- def load_models(cfg,name,params,sil_word):
- Log.info("Loading '%s' models..."%name)
- mustd= cfg.get('mustd')
- dnn= cfg.get('dnn')
- if params.cmllr_enabled:
- sys.exit('CMLLR NOT SUPPORTED !!!!')
- target= cfg.get('target')
- dnn_cmllr= cfg.get('dnn_cmllr')
- else:
- target= dnn_cmllr= None
- lexicon= cfg.get('lexicon')
-
- amodel= TLK.AModel(cfg['amodel'])
- if lexicon!=None:
- lex= TLK.Lexicon(lexicon, syms=amodel.syms)
- lm= TLK.LM()
- lm.load(cfg['lm'],lexicon=lex)
- else:
- try:
- lm= TLK.SearchGraph(cfg['lm'], syms=amodel.syms)
- except:
- lm= TLK.StaticLookaheadTables(cfg['lm'], syms=amodel.syms)
- dlm= cfg.get('dlm')
- if dlm!=None:
- dlm= lmodel.load_model(dlm)(lm.words)
- oep= cfg.get('oep')
- if oep!=None:
- step= cfg.get('oep-step')
- if step==None:
- sys.exit('oep-step not defined')
- step= int(step)
- lookahead= cfg.get('oep-lookahead')
- if lookahead==None:
- sys.exit('oep-lookahead not defined')
- lookahead= int(lookahead)
- priors_fn= cfg.get('oep-priors')
- amla_fs= cfg.get('amla-fs')
- tmp= oepmodel.load_model(oep,step,lookahead,
- priors_fn,amla_fs)
- oep_factory,oep_model= tmp
- else: oep_factory= oep_model= None
- seg= cfg.get('seg')
- if seg!=None:
- tmp= segmodel.load_model(seg,sil_word=sil_word)
- seg_factory,seg_model= tmp
- else: seg_factory= seg_model= None
- aux= cfg.get('gsf')
- gsf= float(aux) if aux!=None else 1.0
- aux= cfg.get('wip')
- wip= float(aux) if aux!=None else 0.0
- models= {}
- models["amodel"]=amodel
- models["lm"]=lm
- models["dnn"]=dnn
- models["gsf"]=gsf
- models["wip"]=wip
- models['dlm']= dlm
- models['oep-factory']= oep_factory
- models['oep-model']= oep_model
- models['mustd']= mustd
- models['seg-factory']= seg_factory
- models['seg-model']= seg_model
- return models
- # end load_models
-
- def load_system(conf):
- sil_word= conf.get('sil_word')
- sil_sym= conf.get('sil_sym')
- fea_freq= conf.get('fea_freq',100)
- params= prepare_params(conf)
- name= conf['id']
- tag= conf.get('tag',name)
- date= tuple(conf.get('date',[1,1,1971]))
- models= load_models(conf,tag,params,sil_word)
- Log.info("Creating '%s' recognisers..."%name)
- return RecogniserManager(name,tag,date,conf['lang'],
- models,params,conf['nreco'],
- sil_word,sil_sym,fea_freq)
- # uppercase_tbl,uppercase_tbl_bi,
- # Uppercase-table (Deshabilitat de moment)
- #aux= cfg.get('uppercase-table')
- #uppercase_tbl= load_uppercase_table(aux) if aux!=None else None
- #aux= cfg.get('uppercase-table-bi')
- #uppercase_tbl_bi= load_uppercase_table_bi(aux) if aux!=None else None
-
- # end create_recognisers
-
|