asr_system.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. __all__= ['load']
  2. import sys,threading,collections,time
  3. import TLK
  4. import lmodel,oepmodel,segmodel
  5. import logging as Log
  6. Hyp= collections.namedtuple('Hyp',['var','novar','err',
  7. 'score','nframes',
  8. 'eos'])
  9. class Recogniser:
  10. def __init__(self,models,params,
  11. # uppercase_tbl,uppercase_tbl_bi,
  12. sil_word,sil_sym,fea_freq,mng):
  13. args= {
  14. 'ifreq' : 16000,
  15. 'bsize' : 400,
  16. 'amodel' : models['amodel'],
  17. 'lm' : models['lm'],
  18. 'params' : params,
  19. 'output' : self,
  20. }
  21. self._fea_freq= fea_freq
  22. if sil_word!=None:
  23. args['sil_word']= sil_word
  24. if sil_sym!=None:
  25. args['sil_sym']= sil_sym
  26. if models['dnn']!=None: args['dnn']= models['dnn']
  27. if models['dlm']!=None: args['dlm']= models['dlm']
  28. if models['mustd']!=None: args['mustd']= models['mustd']
  29. if models['oep-factory']!=None:
  30. oep= models['oep-factory'](models['oep-model'])
  31. args['oep']= oep
  32. self.reco= TLK.ORecogniser(**args)
  33. if models['seg-factory']!=None:
  34. self.seg= models['seg-factory'](models['seg-model'])
  35. else: self.seg= None
  36. self._mng= mng
  37. self._cv= threading.Condition()
  38. def reset(self):
  39. self._err= False
  40. self._hyps= []
  41. self._prev= None
  42. if self.seg is not None:
  43. self.seg.reset()
  44. # Per a gestionar tokens insertats el que vaig a fer es tindre
  45. # una llista de llistes. En la primera llista estaran els
  46. # tokens del segment que s'està processant actualment i en
  47. # l'última els tokens de l'últim segment. Cada token serà una
  48. # tupla paraula i número de frames on es va insertar.
  49. self._itokens= [[]]
  50. self._inframes= 0.0 # Numero de frames del segment actual
  51. # insertats en el reconeixedor
  52. def __register_itoken_nosec(self,token):
  53. token= token.strip()
  54. if token=='': return
  55. nframes= int(self._inframes) if self._inframes>0.0 else None
  56. self._itokens[-1].append((token,nframes))
  57. def __register_itoken(self,token):
  58. try:
  59. self.__register_itoken_nosec(token)
  60. except:
  61. pass
  62. def __register_itoken_eos(self):
  63. if self._inframes>0.0:
  64. self._itokens.append([])
  65. self._inframes= 0.0
  66. def __merge_itokens_nosec(self,code,hyp):
  67. # Comprovacions inicials i eliminació
  68. itokens= self._itokens[0]
  69. if code==TLK.OUT_RES: self._itokens.pop(0)
  70. if itokens==[]: return hyp
  71. # Inserta
  72. hyp,pos_var= hyp[0],hyp[1]
  73. pref,novar,var= [],list(hyp[:pos_var]),list(hyp[pos_var:])
  74. # -> Inserta inici segment
  75. while len(itokens)>0 and itokens[0][1] is None:
  76. tok= itokens.pop(0)
  77. pref.append((tok[0],0,0))
  78. # --> Processa novar
  79. if len(itokens)>0 and len(novar)>0:
  80. i,new_novar= 0,[]
  81. while len(itokens)>0 and i<len(novar):
  82. w,b,e= novar[i]
  83. epos= b+e
  84. while len(itokens)>0 and itokens[0][1]<epos:
  85. tok= itokens.pop(0)
  86. new_novar.append((tok[0],b,0))
  87. new_novar.append((w,b,e))
  88. i+= 1
  89. while i<len(novar):
  90. new_novar.append(novar[i])
  91. i+= 1
  92. else: new_novar= novar
  93. # --> Processa var
  94. if len(itokens)>0 and len(var)>0:
  95. i,j,new_var= 0,0,[]
  96. while len(itokens)>0 and i<len(var):
  97. w,b,e= var[i]
  98. epos= b+e
  99. while j<len(itokens) and itokens[j][1]<epos:
  100. new_var.append((itokens[j][0],b,0))
  101. j+= 1
  102. new_var.append((w,b,e))
  103. i+= 1
  104. while i<len(var):
  105. new_var.append(var[i])
  106. i+= 1
  107. else: new_var= var
  108. # --> Afegeix pendents si code==TLK.OUT_RES
  109. if code==TLK.OUT_RES and len(itokens)>0:
  110. assert var==[]
  111. pos= new_novar[-1][1]+new_novar[-1][2] if len(new_novar)>0 else -1
  112. for tok in itokens:
  113. tmp_pos= pos if pos!=-1 else tok[1]
  114. new_novar.append((tok[0],tmp_pos,0))
  115. return pref+new_novar+new_var,len(pref)+len(new_novar)
  116. # end __merge_itokens_nosec
  117. def __merge_itokens(self,code,hyp):
  118. try:
  119. return self.__merge_itokens_nosec(code,hyp)
  120. except:
  121. return hyp
  122. # None denotes end of segment
  123. def feed(self,data=None):
  124. if data==None:
  125. self.reco.feed()
  126. elif not self._err:
  127. if type(data)==str:
  128. self.__register_itoken(data)
  129. elif len(data)==0:
  130. self.reco.split()
  131. self.__register_itoken_eos()
  132. else:
  133. self._inframes+= (len(data)/32000)*self._fea_freq
  134. self.reco.feed(data)
  135. def __write_hyp(self,novar,var,score,nframes,err,eos):
  136. h= Hyp(novar=novar,var=var,err=err,
  137. score=score,nframes=nframes,
  138. eos=eos)
  139. with self._cv:
  140. self._hyps.append(h)
  141. self._cv.notify_all()
  142. def process_out(self,code,hyp,stats):
  143. #def totxt(rec,last_word):
  144. # aux= []
  145. # for x in rec:
  146. # w= x[0].lower()
  147. # aux.append(toupper(w,last_word))
  148. # last_word= w
  149. # ret= ' '.join(aux)
  150. # ret= ret.replace('[hesitation]','').replace('<unk>','')
  151. # return ret
  152. def totxt(rec):
  153. ret= ' '.join([x[0] for x in rec])
  154. ret= ret.replace('[hesitation]','').replace('<unk>','')
  155. return ret
  156. hyp,pos_var= hyp[0],hyp[1]
  157. novar= hyp[:pos_var]
  158. #new_last_word= novar[-1][0] if len(novar)>0 else self.last_word
  159. #novar= totxt(novar,self.last_word)
  160. #var= totxt(hyp[pos_var:],new_last_word)
  161. #self.last_word= new_last_word
  162. novar= totxt(novar)
  163. var= totxt(hyp[pos_var:])
  164. if novar=='' and var==self._prev and code==TLK.OUT_HYP: return
  165. self._prev= var
  166. score,nframes= stats
  167. self.__write_hyp(novar,var,score,nframes,False,
  168. code==TLK.OUT_RES)
  169. # Output method
  170. def write(self,code,hyp,stats):
  171. hyp= self.__merge_itokens(code,hyp)
  172. if code==TLK.OUT_ERR:
  173. txt= self._prev if self._prev!=None else ''
  174. self.__write_hyp(txt,'',0,0,False,False)
  175. self._err= True
  176. return
  177. if code==TLK.OUT_END:
  178. self.__write_hyp(None,None,0,0,self._err,False)
  179. return
  180. if self.seg is not None:
  181. hyp= self.seg(hyp)
  182. if code==TLK.OUT_RES:
  183. hyp0,pos_var0= hyp
  184. hyp1= self.seg.eos()
  185. if hyp1 is not None:
  186. hyp1,pos_var1= hyp1
  187. hyp= hyp0[:pos_var0]+hyp1[:pos_var1]
  188. hyp= hyp,len(hyp)
  189. self.seg.reset()
  190. self.process_out(code,hyp,stats)
  191. @property
  192. def output(self):
  193. end= False
  194. while not end:
  195. with self._cv:
  196. self._cv.wait_for(lambda: len(self._hyps)>0)
  197. ret= self._hyps.pop(0)
  198. end= ret.novar is None
  199. yield ret
  200. # end Recogniser
  201. class RecogniserManager:
  202. def __init__(self,name,tag,date,lang,
  203. models,params,nreco,
  204. sil_word,sil_sym,fea_freq):
  205. self.v= []
  206. self.vv= [] # Reference copy
  207. #self.cmllr= params.cmllr_enabled
  208. for n in range(0,nreco):
  209. reco= Recogniser(models,params,
  210. sil_word,sil_sym,
  211. fea_freq,self)
  212. self.v.append(reco)
  213. self.vv.append(reco)
  214. self.name= name
  215. self.tag= tag
  216. self.date= date
  217. self.lang= lang
  218. self._lock= threading.Lock()
  219. self._models= models
  220. self._enabled= True
  221. def __len__(self):
  222. return len(self.vv)
  223. def get_reco(self):
  224. with self._lock:
  225. if self.v==[]: return None
  226. if not self._enabled: return None
  227. ret= self.v.pop()
  228. ret.reset()
  229. #ret.reset_cmllr()
  230. Log.info(('Recogniser taken from %s '+
  231. '(available: %d of %d)')%(self.name,
  232. len(self.v),
  233. len(self.vv)))
  234. return ret
  235. def append(self,reco):
  236. with self._lock:
  237. self.v.append(reco)
  238. Log.info(('Recogniser from %s realeased'+
  239. '(available: %d of %d)')%(self.name,
  240. len(self.v),
  241. len(self.vv)))
  242. def set_enabled(self,value):
  243. with self._lock:
  244. self._enabled= value
  245. @property
  246. def num_recos_available(self):
  247. with self._lock:
  248. return len(self.v)
  249. @property
  250. def enabled(self):
  251. with self._lock:
  252. return self._enabled
  253. # end RecogniserManager
  254. def prepare_params(cfg):
  255. params= TLK.OParameters()
  256. aux= cfg.get('hp')
  257. if aux!=None: params.hp= int(aux)
  258. aux= cfg.get('hp_min')
  259. if aux!=None: params.hp_min= int(aux)
  260. aux= cfg.get('wep')
  261. if aux!=None: params.wep= float(aux)
  262. aux= cfg.get('beam')
  263. if aux!=None: params.beam= float(aux)
  264. aux= cfg.get('dynormthr')
  265. if aux!=None: params.dynormthr= int(aux)
  266. aux= cfg.get('meannorm')
  267. if aux!=None: params.meannorm= bool(aux)
  268. aux= cfg.get('cmllr')
  269. if aux!=None: params.cmllr_enabled= bool(aux)
  270. aux= cfg.get('cmllr_nframes_step0')
  271. if aux!=None: params.cmllr_nframes_step0= int(aux)
  272. aux= cfg.get('cmllr_nframes')
  273. if aux!=None: params.cmllr_nframes= int(aux)
  274. aux= cfg.get('cmllr_niters')
  275. if aux!=None: params.cmllr_niters= int(aux)
  276. aux= cfg.get('numceps')
  277. if aux!=None: params.numceps= int(aux)
  278. aux= cfg.get('numchans')
  279. if aux!=None: params.numchans= int(aux)
  280. aux= cfg.get('accwindow')
  281. if aux!=None: params.accwindow= int(aux)
  282. aux= cfg.get('deltawindow')
  283. if aux!=None: params.deltawindow= int(aux)
  284. aux= cfg.get('hp_lm')
  285. if aux!=None: params.hp_lm= int(aux)
  286. aux= cfg.get('order')
  287. if aux!=None: params.order= int(aux)
  288. aux= cfg.get('ftype')
  289. if aux!=None: params.ftype= str(aux)
  290. aux= cfg.get('mustdnorm')
  291. if aux!=None: params.mustdnorm= bool(aux)
  292. aux= cfg.get('gsf')
  293. gsf= float(aux) if aux!=None else 1.0
  294. params.gsf= gsf
  295. aux= cfg.get('wip')
  296. wip= float(aux) if aux!=None else 0.0
  297. params.wip= wip
  298. params.amla= cfg.get('amla-fs')!=None
  299. params.sil= cfg.get('sil_sym','SP')
  300. # IMPORTANT !!!!! Deshabilita l'heurístic de segmentació
  301. params.sil_length= 20000
  302. params.sil_thr= 0.0
  303. return params
  304. # end prepare_params
  305. def load_models(cfg,name,params,sil_word):
  306. Log.info("Loading '%s' models..."%name)
  307. mustd= cfg.get('mustd')
  308. dnn= cfg.get('dnn')
  309. if params.cmllr_enabled:
  310. sys.exit('CMLLR NOT SUPPORTED !!!!')
  311. target= cfg.get('target')
  312. dnn_cmllr= cfg.get('dnn_cmllr')
  313. else:
  314. target= dnn_cmllr= None
  315. lexicon= cfg.get('lexicon')
  316. amodel= TLK.AModel(cfg['amodel'])
  317. if lexicon!=None:
  318. lex= TLK.Lexicon(lexicon, syms=amodel.syms)
  319. lm= TLK.LM()
  320. lm.load(cfg['lm'],lexicon=lex)
  321. else:
  322. try:
  323. lm= TLK.SearchGraph(cfg['lm'], syms=amodel.syms)
  324. except:
  325. lm= TLK.StaticLookaheadTables(cfg['lm'], syms=amodel.syms)
  326. dlm= cfg.get('dlm')
  327. if dlm!=None:
  328. dlm= lmodel.load_model(dlm)(lm.words)
  329. oep= cfg.get('oep')
  330. if oep!=None:
  331. step= cfg.get('oep-step')
  332. if step==None:
  333. sys.exit('oep-step not defined')
  334. step= int(step)
  335. lookahead= cfg.get('oep-lookahead')
  336. if lookahead==None:
  337. sys.exit('oep-lookahead not defined')
  338. lookahead= int(lookahead)
  339. priors_fn= cfg.get('oep-priors')
  340. amla_fs= cfg.get('amla-fs')
  341. tmp= oepmodel.load_model(oep,step,lookahead,
  342. priors_fn,amla_fs)
  343. oep_factory,oep_model= tmp
  344. else: oep_factory= oep_model= None
  345. seg= cfg.get('seg')
  346. if seg!=None:
  347. tmp= segmodel.load_model(seg,sil_word=sil_word)
  348. seg_factory,seg_model= tmp
  349. else: seg_factory= seg_model= None
  350. aux= cfg.get('gsf')
  351. gsf= float(aux) if aux!=None else 1.0
  352. aux= cfg.get('wip')
  353. wip= float(aux) if aux!=None else 0.0
  354. models= {}
  355. models["amodel"]=amodel
  356. models["lm"]=lm
  357. models["dnn"]=dnn
  358. models["gsf"]=gsf
  359. models["wip"]=wip
  360. models['dlm']= dlm
  361. models['oep-factory']= oep_factory
  362. models['oep-model']= oep_model
  363. models['mustd']= mustd
  364. models['seg-factory']= seg_factory
  365. models['seg-model']= seg_model
  366. return models
  367. # end load_models
  368. def load_system(conf):
  369. sil_word= conf.get('sil_word')
  370. sil_sym= conf.get('sil_sym')
  371. fea_freq= conf.get('fea_freq',100)
  372. params= prepare_params(conf)
  373. name= conf['id']
  374. tag= conf.get('tag',name)
  375. date= tuple(conf.get('date',[1,1,1971]))
  376. models= load_models(conf,tag,params,sil_word)
  377. Log.info("Creating '%s' recognisers..."%name)
  378. return RecogniserManager(name,tag,date,conf['lang'],
  379. models,params,conf['nreco'],
  380. sil_word,sil_sym,fea_freq)
  381. # uppercase_tbl,uppercase_tbl_bi,
  382. # Uppercase-table (Deshabilitat de moment)
  383. #aux= cfg.get('uppercase-table')
  384. #uppercase_tbl= load_uppercase_table(aux) if aux!=None else None
  385. #aux= cfg.get('uppercase-table-bi')
  386. #uppercase_tbl_bi= load_uppercase_table_bi(aux) if aux!=None else None
  387. # end create_recognisers