PocketSphinx  5prealpha
pocketsphinx.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <stdio.h>
40 #include <assert.h>
41 
42 #ifdef HAVE_UNISTD_H
43 #include <unistd.h>
44 #endif
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/err.h>
48 #include <sphinxbase/strfuncs.h>
49 #include <sphinxbase/filename.h>
50 #include <sphinxbase/pio.h>
51 #include <sphinxbase/jsgf.h>
52 #include <sphinxbase/hash_table.h>
53 
54 /* Local headers. */
55 #include "cmdln_macro.h"
56 #include "pocketsphinx.h"
57 #include "pocketsphinx_internal.h"
58 #include "ps_lattice_internal.h"
59 #include "phone_loop_search.h"
60 #include "kws_search.h"
61 #include "fsg_search_internal.h"
62 #include "ngram_search.h"
63 #include "ngram_search_fwdtree.h"
64 #include "ngram_search_fwdflat.h"
65 #include "allphone_search.h"
66 
67 static const arg_t ps_args_def[] = {
68  POCKETSPHINX_OPTIONS,
69  CMDLN_EMPTY_OPTION
70 };
71 
72 /* I'm not sure what the portable way to do this is. */
73 static int
74 file_exists(const char *path)
75 {
76  FILE *tmp;
77 
78  tmp = fopen(path, "rb");
79  if (tmp) fclose(tmp);
80  return (tmp != NULL);
81 }
82 
83 #ifdef MODELDIR
84 static int
85 hmmdir_exists(const char *path)
86 {
87  FILE *tmp;
88  char *mdef = string_join(path, "/mdef", NULL);
89 
90  tmp = fopen(mdef, "rb");
91  if (tmp) fclose(tmp);
92  ckd_free(mdef);
93  return (tmp != NULL);
94 }
95 #endif
96 
97 static void
98 ps_add_file(ps_decoder_t *ps, const char *arg,
99  const char *hmmdir, const char *file)
100 {
101  char *tmp = string_join(hmmdir, "/", file, NULL);
102 
103  if (cmd_ln_str_r(ps->config, arg) == NULL && file_exists(tmp))
104  cmd_ln_set_str_r(ps->config, arg, tmp);
105  ckd_free(tmp);
106 }
107 
108 /* Feature and front-end parameters that may be in feat.params */
109 static const arg_t feat_defn[] = {
110  waveform_to_cepstral_command_line_macro(),
111  cepstral_to_feature_command_line_macro(),
112  CMDLN_EMPTY_OPTION
113 };
114 
115 static void
116 ps_expand_model_config(ps_decoder_t *ps)
117 {
118  char const *hmmdir, *featparams;
119 
120  /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
121 #ifdef __ADSPBLACKFIN__
122  E_INFO("Will not use mmap() on uClinux/Blackfin.");
123  cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
124 #endif
125 
126  /* Get acoustic model filenames and add them to the command-line */
127  if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
128  ps_add_file(ps, "-mdef", hmmdir, "mdef");
129  ps_add_file(ps, "-mean", hmmdir, "means");
130  ps_add_file(ps, "-var", hmmdir, "variances");
131  ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
132  ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
133  ps_add_file(ps, "-sendump", hmmdir, "sendump");
134  ps_add_file(ps, "-fdict", hmmdir, "noisedict");
135  ps_add_file(ps, "-lda", hmmdir, "feature_transform");
136  ps_add_file(ps, "-featparams", hmmdir, "feat.params");
137  ps_add_file(ps, "-senmgau", hmmdir, "senmgau");
138  }
139 
140  /* Look for feat.params in acoustic model dir. */
141  if ((featparams = cmd_ln_str_r(ps->config, "-featparams"))) {
142  if (NULL !=
143  cmd_ln_parse_file_r(ps->config, feat_defn, featparams, FALSE))
144  E_INFO("Parsed model-specific feature parameters from %s\n",
145  featparams);
146  }
147 
148  /* Print here because acmod_init might load feat.params file */
149  if (err_get_logfp() != NULL) {
150  cmd_ln_print_values_r(ps->config, err_get_logfp(), ps_args());
151  }
152 }
153 
154 static void
155 ps_free_searches(ps_decoder_t *ps)
156 {
157  if (ps->searches) {
158  hash_iter_t *search_it;
159  for (search_it = hash_table_iter(ps->searches); search_it;
160  search_it = hash_table_iter_next(search_it)) {
161  ps_search_free(hash_entry_val(search_it->ent));
162  }
163  hash_table_free(ps->searches);
164  }
165 
166  ps->searches = NULL;
167  ps->search = NULL;
168 }
169 
170 static ps_search_t *
171 ps_find_search(ps_decoder_t *ps, char const *name)
172 {
173  void *search = NULL;
174  hash_table_lookup(ps->searches, name, &search);
175 
176  return (ps_search_t *) search;
177 }
178 
179 void
180 ps_default_search_args(cmd_ln_t *config)
181 {
182 #ifdef MODELDIR
183  /* Set default acoustic and language models. */
184  const char *hmmdir = cmd_ln_str_r(config, "-hmm");
185  if (hmmdir == NULL && hmmdir_exists(MODELDIR "/en-us/en-us")) {
186  hmmdir = MODELDIR "/en-us/en-us";
187  cmd_ln_set_str_r(config, "-hmm", hmmdir);
188  }
189 
190  const char *lmfile = cmd_ln_str_r(config, "-lm");
191 
192  if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg")
193  && !cmd_ln_str_r(config, "-jsgf")
194  && !cmd_ln_str_r(config, "-lmctl")
195  && !cmd_ln_str_r(config, "-kws")
196  && !cmd_ln_str_r(config, "-keyphrase")
197  && file_exists(MODELDIR "/en-us/en-us.lm.bin")) {
198  lmfile = MODELDIR "/en-us/en-us.lm.bin";
199  cmd_ln_set_str_r(config, "-lm", lmfile);
200  }
201 
202  const char *dictfile = cmd_ln_str_r(config, "-dict");
203  if (dictfile == NULL && file_exists(MODELDIR "/en-us/cmudict-en-us.dict")) {
204  dictfile = MODELDIR "/en-us/cmudict-en-us.dict";
205  cmd_ln_set_str_r(config, "-dict", dictfile);
206  }
207 
208  /* Expand acoustic and language model filenames relative to installation
209  * path. */
210  if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
211  char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL);
212  if (hmmdir_exists(tmphmm)) {
213  cmd_ln_set_str_r(config, "-hmm", tmphmm);
214  } else {
215  E_ERROR("Failed to find mdef file inside the model folder "
216  "specified with -hmm `%s'\n", hmmdir);
217  }
218  ckd_free(tmphmm);
219  }
220  if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
221  char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL);
222  cmd_ln_set_str_r(config, "-lm", tmplm);
223  ckd_free(tmplm);
224  }
225  if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
226  char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL);
227  cmd_ln_set_str_r(config, "-dict", tmpdict);
228  ckd_free(tmpdict);
229  }
230 #endif
231 }
232 
233 int
234 ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
235 {
236  const char *path;
237  const char *keyphrase;
238  int32 lw;
239 
240  if (config && config != ps->config) {
241  cmd_ln_free_r(ps->config);
242  ps->config = cmd_ln_retain(config);
243  }
244 
245  err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
246  /* Set up logging. We need to do this earlier because we want to dump
247  * the information to the configured log, not to the stderr. */
248  if (config && cmd_ln_str_r(ps->config, "-logfn")) {
249  if (err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")) < 0) {
250  E_ERROR("Cannot redirect log output\n");
251  return -1;
252  }
253  }
254 
255  ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
256  ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
257  ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");
258 
259  /* Fill in some default arguments. */
260  ps_expand_model_config(ps);
261 
262  /* Free old searches (do this before other reinit) */
263  ps_free_searches(ps);
264  ps->searches = hash_table_new(3, HASH_CASE_YES);
265 
266  /* Free old acmod. */
267  acmod_free(ps->acmod);
268  ps->acmod = NULL;
269 
270  /* Free old dictionary (must be done after the two things above) */
271  dict_free(ps->dict);
272  ps->dict = NULL;
273 
274  /* Free d2p */
275  dict2pid_free(ps->d2p);
276  ps->d2p = NULL;
277 
278  /* Logmath computation (used in acmod and search) */
279  if (ps->lmath == NULL
280  || (logmath_get_base(ps->lmath) !=
281  (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
282  if (ps->lmath)
283  logmath_free(ps->lmath);
284  ps->lmath = logmath_init
285  ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
286  cmd_ln_boolean_r(ps->config, "-bestpath"));
287  }
288 
289  /* Acoustic model (this is basically everything that
290  * uttproc.c, senscr.c, and others used to do) */
291  if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
292  return -1;
293 
294 
295 
296  if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) {
297  /* Initialize an auxiliary phone loop search, which will run in
298  * "parallel" with FSG or N-Gram search. */
299  if ((ps->phone_loop =
300  phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL)
301  return -1;
302  hash_table_enter(ps->searches,
303  ps_search_name(ps->phone_loop),
304  ps->phone_loop);
305  }
306 
307  /* Dictionary and triphone mappings (depends on acmod). */
308  /* FIXME: pass config, change arguments, implement LTS, etc. */
309  if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
310  return -1;
311  if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
312  return -1;
313 
314  lw = cmd_ln_float32_r(ps->config, "-lw");
315 
316  /* Determine whether we are starting out in FSG or N-Gram search mode.
317  * If neither is used skip search initialization. */
318 
319  /* Load KWS if one was specified in config */
320  if ((keyphrase = cmd_ln_str_r(ps->config, "-keyphrase"))) {
321  if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase))
322  return -1;
323  ps_set_search(ps, PS_DEFAULT_SEARCH);
324  }
325 
326  if ((path = cmd_ln_str_r(ps->config, "-kws"))) {
327  if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path))
328  return -1;
329  ps_set_search(ps, PS_DEFAULT_SEARCH);
330  }
331 
332  /* Load an FSG if one was specified in config */
333  if ((path = cmd_ln_str_r(ps->config, "-fsg"))) {
334  fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw);
335  if (!fsg)
336  return -1;
337  if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) {
338  fsg_model_free(fsg);
339  return -1;
340  }
341  fsg_model_free(fsg);
342  ps_set_search(ps, PS_DEFAULT_SEARCH);
343  }
344 
345  /* Or load a JSGF grammar */
346  if ((path = cmd_ln_str_r(ps->config, "-jsgf"))) {
347  if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path)
348  || ps_set_search(ps, PS_DEFAULT_SEARCH))
349  return -1;
350  }
351 
352  if ((path = cmd_ln_str_r(ps->config, "-allphone"))) {
353  if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path)
354  || ps_set_search(ps, PS_DEFAULT_SEARCH))
355  return -1;
356  }
357 
358  if ((path = cmd_ln_str_r(ps->config, "-lm")) &&
359  !cmd_ln_boolean_r(ps->config, "-allphone")) {
360  if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path)
361  || ps_set_search(ps, PS_DEFAULT_SEARCH))
362  return -1;
363  }
364 
365  if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) {
366  const char *name;
367  ngram_model_t *lmset;
368  ngram_model_set_iter_t *lmset_it;
369 
370  if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) {
371  E_ERROR("Failed to read language model control file: %s\n", path);
372  return -1;
373  }
374 
375  for(lmset_it = ngram_model_set_iter(lmset);
376  lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) {
377  ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name);
378  E_INFO("adding search %s\n", name);
379  if (ps_set_lm(ps, name, lm)) {
380  ngram_model_set_iter_free(lmset_it);
381  ngram_model_free(lmset);
382  return -1;
383  }
384  }
385  ngram_model_free(lmset);
386 
387  name = cmd_ln_str_r(ps->config, "-lmname");
388  if (name)
389  ps_set_search(ps, name);
390  else {
391  E_ERROR("No default LM name (-lmname) for `-lmctl'\n");
392  return -1;
393  }
394  }
395 
396  /* Initialize performance timer. */
397  ps->perf.name = "decode";
398  ptmr_init(&ps->perf);
399 
400  return 0;
401 }
402 
403 ps_decoder_t *
404 ps_init(cmd_ln_t *config)
405 {
406  ps_decoder_t *ps;
407 
408  if (!config) {
409  E_ERROR("No configuration specified");
410  return NULL;
411  }
412 
413  ps = ckd_calloc(1, sizeof(*ps));
414  ps->refcount = 1;
415  if (ps_reinit(ps, config) < 0) {
416  ps_free(ps);
417  return NULL;
418  }
419  return ps;
420 }
421 
422 arg_t const *
423 ps_args(void)
424 {
425  return ps_args_def;
426 }
427 
428 ps_decoder_t *
430 {
431  ++ps->refcount;
432  return ps;
433 }
434 
435 int
437 {
438  if (ps == NULL)
439  return 0;
440  if (--ps->refcount > 0)
441  return ps->refcount;
442  ps_free_searches(ps);
443  dict_free(ps->dict);
444  dict2pid_free(ps->d2p);
445  acmod_free(ps->acmod);
446  logmath_free(ps->lmath);
447  cmd_ln_free_r(ps->config);
448  ckd_free(ps);
449  return 0;
450 }
451 
452 cmd_ln_t *
454 {
455  return ps->config;
456 }
457 
458 logmath_t *
460 {
461  return ps->lmath;
462 }
463 
464 fe_t *
466 {
467  return ps->acmod->fe;
468 }
469 
470 feat_t *
472 {
473  return ps->acmod->fcb;
474 }
475 
476 ps_mllr_t *
478 {
479  return acmod_update_mllr(ps->acmod, mllr);
480 }
481 
482 int
483 ps_set_search(ps_decoder_t *ps, const char *name)
484 {
485  ps_search_t *search = ps_find_search(ps, name);
486  if (!search)
487  return -1;
488 
489  ps->search = search;
490  /* Set pl window depending on the search */
491  if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
492  ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window");
493  } else {
494  ps->pl_window = 0;
495  }
496 
497  return 0;
498 }
499 
500 const char*
502 {
503  hash_iter_t *search_it;
504  const char* name = NULL;
505  for (search_it = hash_table_iter(ps->searches); search_it;
506  search_it = hash_table_iter_next(search_it)) {
507  if (hash_entry_val(search_it->ent) == ps->search) {
508  name = hash_entry_key(search_it->ent);
509  break;
510  }
511  }
512  return name;
513 }
514 
515 int
516 ps_unset_search(ps_decoder_t *ps, const char *name)
517 {
518  ps_search_t *search = hash_table_delete(ps->searches, name);
519  if (!search)
520  return -1;
521  if (ps->search == search)
522  ps->search = NULL;
523  ps_search_free(search);
524  return 0;
525 }
526 
529 {
530  return (ps_search_iter_t *)hash_table_iter(ps->searches);
531 }
532 
535 {
536  return (ps_search_iter_t *)hash_table_iter_next((hash_iter_t *)itor);
537 }
538 
539 const char*
541 {
542  return (const char*)(((hash_iter_t *)itor)->ent->key);
543 }
544 
545 void
547 {
548  hash_table_iter_free((hash_iter_t *)itor);
549 }
550 
551 ngram_model_t *
552 ps_get_lm(ps_decoder_t *ps, const char *name)
553 {
554  ps_search_t *search = ps_find_search(ps, name);
555  if (search && strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search)))
556  return NULL;
557  return search ? ((ngram_search_t *) search)->lmset : NULL;
558 }
559 
560 fsg_model_t *
561 ps_get_fsg(ps_decoder_t *ps, const char *name)
562 {
563  ps_search_t *search = ps_find_search(ps, name);
564  if (search && strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search)))
565  return NULL;
566  return search ? ((fsg_search_t *) search)->fsg : NULL;
567 }
568 
569 const char*
570 ps_get_kws(ps_decoder_t *ps, const char* name)
571 {
572  ps_search_t *search = ps_find_search(ps, name);
573  if (search && strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search)))
574  return NULL;
575  return search ? kws_search_get_keywords(search) : NULL;
576 }
577 
578 static int
579 set_search_internal(ps_decoder_t *ps, ps_search_t *search)
580 {
581  ps_search_t *old_search;
582 
583  if (!search)
584  return -1;
585 
586  search->pls = ps->phone_loop;
587  old_search = (ps_search_t *) hash_table_replace(ps->searches, ps_search_name(search), search);
588  if (old_search != search)
589  ps_search_free(old_search);
590 
591  return 0;
592 }
593 
594 int
595 ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
596 {
597  ps_search_t *search;
598  search = ngram_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p);
599  return set_search_internal(ps, search);
600 }
601 
602 int
603 ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
604 {
605  ngram_model_t *lm;
606  int result;
607 
608  lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
609  if (!lm)
610  return -1;
611 
612  result = ps_set_lm(ps, name, lm);
613  ngram_model_free(lm);
614  return result;
615 }
616 
617 int
618 ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
619 {
620  ps_search_t *search;
621  search = allphone_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p);
622  return set_search_internal(ps, search);
623 }
624 
625 int
626 ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
627 {
628  ngram_model_t *lm;
629  int result;
630 
631  lm = NULL;
632  if (path)
633  lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
634  result = ps_set_allphone(ps, name, lm);
635  if (lm)
636  ngram_model_free(lm);
637  return result;
638 }
639 
640 int
641 ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
642 {
643  ps_search_t *search;
644  search = kws_search_init(name, NULL, keyfile, ps->config, ps->acmod, ps->dict, ps->d2p);
645  return set_search_internal(ps, search);
646 }
647 
648 int
649 ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase)
650 {
651  ps_search_t *search;
652  search = kws_search_init(name, keyphrase, NULL, ps->config, ps->acmod, ps->dict, ps->d2p);
653  return set_search_internal(ps, search);
654 }
655 
656 int
657 ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg)
658 {
659  ps_search_t *search;
660  search = fsg_search_init(name, fsg, ps->config, ps->acmod, ps->dict, ps->d2p);
661  return set_search_internal(ps, search);
662 }
663 
664 int
665 ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
666 {
667  fsg_model_t *fsg;
668  jsgf_rule_t *rule;
669  char const *toprule;
670  jsgf_t *jsgf = jsgf_parse_file(path, NULL);
671  float lw;
672  int result;
673 
674  if (!jsgf)
675  return -1;
676 
677  rule = NULL;
678  /* Take the -toprule if specified. */
679  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
680  rule = jsgf_get_rule(jsgf, toprule);
681  if (rule == NULL) {
682  E_ERROR("Start rule %s not found\n", toprule);
683  jsgf_grammar_free(jsgf);
684  return -1;
685  }
686  } else {
687  rule = jsgf_get_public_rule(jsgf);
688  if (rule == NULL) {
689  E_ERROR("No public rules found in %s\n", path);
690  jsgf_grammar_free(jsgf);
691  return -1;
692  }
693  }
694 
695  lw = cmd_ln_float32_r(ps->config, "-lw");
696  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
697  result = ps_set_fsg(ps, name, fsg);
698  fsg_model_free(fsg);
699  jsgf_grammar_free(jsgf);
700  return result;
701 }
702 
703 int
704 ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
705 {
706  fsg_model_t *fsg;
707  jsgf_rule_t *rule;
708  char const *toprule;
709  jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL);
710  float lw;
711  int result;
712 
713  if (!jsgf)
714  return -1;
715 
716  rule = NULL;
717  /* Take the -toprule if specified. */
718  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
719  rule = jsgf_get_rule(jsgf, toprule);
720  if (rule == NULL) {
721  E_ERROR("Start rule %s not found\n", toprule);
722  return -1;
723  }
724  } else {
725  rule = jsgf_get_public_rule(jsgf);
726  if (rule == NULL) {
727  E_ERROR("No public rules found in input string\n");
728  return -1;
729  }
730  }
731 
732  lw = cmd_ln_float32_r(ps->config, "-lw");
733  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
734  result = ps_set_fsg(ps, name, fsg);
735  fsg_model_free(fsg);
736  return result;
737 }
738 
739 
740 int
741 ps_load_dict(ps_decoder_t *ps, char const *dictfile,
742  char const *fdictfile, char const *format)
743 {
744  cmd_ln_t *newconfig;
745  dict2pid_t *d2p;
746  dict_t *dict;
747  hash_iter_t *search_it;
748 
749  /* Create a new scratch config to load this dict (so existing one
750  * won't be affected if it fails) */
751  newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
752  cmd_ln_set_boolean_r(newconfig, "-dictcase",
753  cmd_ln_boolean_r(ps->config, "-dictcase"));
754  cmd_ln_set_str_r(newconfig, "-dict", dictfile);
755  if (fdictfile)
756  cmd_ln_set_str_r(newconfig, "-fdict", fdictfile);
757  else
758  cmd_ln_set_str_r(newconfig, "-fdict",
759  cmd_ln_str_r(ps->config, "-fdict"));
760 
761  /* Try to load it. */
762  if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
763  cmd_ln_free_r(newconfig);
764  return -1;
765  }
766 
767  /* Reinit the dict2pid. */
768  if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
769  cmd_ln_free_r(newconfig);
770  return -1;
771  }
772 
773  /* Success! Update the existing config to reflect new dicts and
774  * drop everything into place. */
775  cmd_ln_free_r(newconfig);
776  cmd_ln_set_str_r(ps->config, "-dict", dictfile);
777  if (fdictfile)
778  cmd_ln_set_str_r(ps->config, "-fdict", fdictfile);
779  dict_free(ps->dict);
780  ps->dict = dict;
781  dict2pid_free(ps->d2p);
782  ps->d2p = d2p;
783 
784  /* And tell all searches to reconfigure themselves. */
785  for (search_it = hash_table_iter(ps->searches); search_it;
786  search_it = hash_table_iter_next(search_it)) {
787  if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) {
788  hash_table_iter_free(search_it);
789  return -1;
790  }
791  }
792 
793  return 0;
794 }
795 
796 int
797 ps_save_dict(ps_decoder_t *ps, char const *dictfile,
798  char const *format)
799 {
800  return dict_write(ps->dict, dictfile, format);
801 }
802 
803 int
805  char const *word,
806  char const *phones,
807  int update)
808 {
809  int32 wid;
810  s3cipid_t *pron;
811  hash_iter_t *search_it;
812  char **phonestr, *tmp;
813  int np, i, rv;
814 
815  /* Parse phones into an array of phone IDs. */
816  tmp = ckd_salloc(phones);
817  np = str2words(tmp, NULL, 0);
818  phonestr = ckd_calloc(np, sizeof(*phonestr));
819  str2words(tmp, phonestr, np);
820  pron = ckd_calloc(np, sizeof(*pron));
821  for (i = 0; i < np; ++i) {
822  pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
823  if (pron[i] == -1) {
824  E_ERROR("Unknown phone %s in phone string %s\n",
825  phonestr[i], tmp);
826  ckd_free(phonestr);
827  ckd_free(tmp);
828  ckd_free(pron);
829  return -1;
830  }
831  }
832  /* No longer needed. */
833  ckd_free(phonestr);
834  ckd_free(tmp);
835 
836  /* Add it to the dictionary. */
837  if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
838  ckd_free(pron);
839  return -1;
840  }
841  /* No longer needed. */
842  ckd_free(pron);
843 
844  /* Now we also have to add it to dict2pid. */
845  dict2pid_add_word(ps->d2p, wid);
846 
847  /* TODO: we definitely need to refactor this */
848  for (search_it = hash_table_iter(ps->searches); search_it;
849  search_it = hash_table_iter_next(search_it)) {
850  ps_search_t *search = hash_entry_val(search_it->ent);
851  if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
852  ngram_model_t *lmset = ((ngram_search_t *) search)->lmset;
853  if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
854  hash_table_iter_free(search_it);
855  return -1;
856  }
857  }
858 
859  if (update) {
860  if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) {
861  hash_table_iter_free(search_it);
862  return rv;
863  }
864  }
865  }
866 
867  /* Rebuild the widmap and search tree if requested. */
868  return wid;
869 }
870 
871 char *
872 ps_lookup_word(ps_decoder_t *ps, const char *word)
873 {
874  s3wid_t wid;
875  int32 phlen, j;
876  char *phones;
877  dict_t *dict = ps->dict;
878 
879  wid = dict_wordid(dict, word);
880  if (wid == BAD_S3WID)
881  return NULL;
882 
883  for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j)
884  phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1;
885  phones = ckd_calloc(1, phlen);
886  for (j = 0; j < dict_pronlen(dict, wid); ++j) {
887  strcat(phones, dict_ciphone_str(dict, wid, j));
888  if (j != dict_pronlen(dict, wid) - 1)
889  strcat(phones, " ");
890  }
891  return phones;
892 }
893 
894 long
895 ps_decode_raw(ps_decoder_t *ps, FILE *rawfh,
896  long maxsamps)
897 {
898  int16 *data;
899  long total;
900  OFF_T pos;
901  OFF_T endpos;
902 
903  ps_start_stream(ps);
904  ps_start_utt(ps);
905 
906  /* If this file is seekable or maxsamps is specified, then decode
907  * the whole thing at once. */
908  if (maxsamps != -1) {
909  data = ckd_calloc(maxsamps, sizeof(*data));
910  total = fread(data, sizeof(*data), maxsamps, rawfh);
911  ps_process_raw(ps, data, total, FALSE, TRUE);
912  ckd_free(data);
913  } else if ((pos = FTELL(rawfh)) >= 0) {
914  FSEEK(rawfh, 0, SEEK_END);
915  endpos = FTELL(rawfh);
916  FSEEK(rawfh, pos, SEEK_SET);
917  maxsamps = endpos - pos;
918 
919  data = ckd_calloc(maxsamps, sizeof(*data));
920  total = fread(data, sizeof(*data), maxsamps, rawfh);
921  ps_process_raw(ps, data, total, FALSE, TRUE);
922  ckd_free(data);
923  } else {
924  /* Otherwise decode it in a stream. */
925  total = 0;
926  while (!feof(rawfh)) {
927  int16 data[256];
928  size_t nread;
929 
930  nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh);
931  ps_process_raw(ps, data, nread, FALSE, FALSE);
932  total += nread;
933  }
934  }
935  ps_end_utt(ps);
936  return total;
937 }
938 
939 int
941 {
943  return 0;
944 }
945 
946 int
948 {
949  int rv;
950  char uttid[16];
951 
952  if (ps->acmod->state == ACMOD_STARTED || ps->acmod->state == ACMOD_PROCESSING) {
953  E_ERROR("Utterance already started\n");
954  return -1;
955  }
956 
957  if (ps->search == NULL) {
958  E_ERROR("No search module is selected, did you forget to "
959  "specify a language model or grammar?\n");
960  return -1;
961  }
962 
963  ptmr_reset(&ps->perf);
964  ptmr_start(&ps->perf);
965 
966  sprintf(uttid, "%09u", ps->uttno);
967  ++ps->uttno;
968 
969  /* Remove any residual word lattice and hypothesis. */
970  ps_lattice_free(ps->search->dag);
971  ps->search->dag = NULL;
972  ps->search->last_link = NULL;
973  ps->search->post = 0;
974  ckd_free(ps->search->hyp_str);
975  ps->search->hyp_str = NULL;
976  if ((rv = acmod_start_utt(ps->acmod)) < 0)
977  return rv;
978 
979  /* Start logging features and audio if requested. */
980  if (ps->mfclogdir) {
981  char *logfn = string_join(ps->mfclogdir, "/",
982  uttid, ".mfc", NULL);
983  FILE *mfcfh;
984  E_INFO("Writing MFCC log file: %s\n", logfn);
985  if ((mfcfh = fopen(logfn, "wb")) == NULL) {
986  E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn);
987  ckd_free(logfn);
988  return -1;
989  }
990  ckd_free(logfn);
991  acmod_set_mfcfh(ps->acmod, mfcfh);
992  }
993  if (ps->rawlogdir) {
994  char *logfn = string_join(ps->rawlogdir, "/",
995  uttid, ".raw", NULL);
996  FILE *rawfh;
997  E_INFO("Writing raw audio log file: %s\n", logfn);
998  if ((rawfh = fopen(logfn, "wb")) == NULL) {
999  E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn);
1000  ckd_free(logfn);
1001  return -1;
1002  }
1003  ckd_free(logfn);
1004  acmod_set_rawfh(ps->acmod, rawfh);
1005  }
1006  if (ps->senlogdir) {
1007  char *logfn = string_join(ps->senlogdir, "/",
1008  uttid, ".sen", NULL);
1009  FILE *senfh;
1010  E_INFO("Writing senone score log file: %s\n", logfn);
1011  if ((senfh = fopen(logfn, "wb")) == NULL) {
1012  E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn);
1013  ckd_free(logfn);
1014  return -1;
1015  }
1016  ckd_free(logfn);
1017  acmod_set_senfh(ps->acmod, senfh);
1018  }
1019 
1020  /* Start auxiliary phone loop search. */
1021  if (ps->phone_loop)
1022  ps_search_start(ps->phone_loop);
1023 
1024  return ps_search_start(ps->search);
1025 }
1026 
1027 static int
1028 ps_search_forward(ps_decoder_t *ps)
1029 {
1030  int nfr;
1031 
1032  nfr = 0;
1033  while (ps->acmod->n_feat_frame > 0) {
1034  int k;
1035  if (ps->pl_window > 0)
1036  if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0)
1037  return k;
1038  if (ps->acmod->output_frame >= ps->pl_window)
1039  if ((k = ps_search_step(ps->search,
1040  ps->acmod->output_frame - ps->pl_window)) < 0)
1041  return k;
1042  acmod_advance(ps->acmod);
1043  ++ps->n_frame;
1044  ++nfr;
1045  }
1046  return nfr;
1047 }
1048 
1049 int
1051 {
1052  int nfr, n_searchfr;
1053 
1054  ps_start_utt(ps);
1055  n_searchfr = 0;
1056  acmod_set_insenfh(ps->acmod, senfh);
1057  while ((nfr = acmod_read_scores(ps->acmod)) > 0) {
1058  if ((nfr = ps_search_forward(ps)) < 0) {
1059  ps_end_utt(ps);
1060  return nfr;
1061  }
1062  n_searchfr += nfr;
1063  }
1064  ps_end_utt(ps);
1065  acmod_set_insenfh(ps->acmod, NULL);
1066 
1067  return n_searchfr;
1068 }
1069 
1070 int
1072  int16 const *data,
1073  size_t n_samples,
1074  int no_search,
1075  int full_utt)
1076 {
1077  int n_searchfr = 0;
1078 
1079  if (ps->acmod->state == ACMOD_IDLE) {
1080  E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n");
1081  return 0;
1082  }
1083 
1084  if (no_search)
1085  acmod_set_grow(ps->acmod, TRUE);
1086 
1087  while (n_samples) {
1088  int nfr;
1089 
1090  /* Process some data into features. */
1091  if ((nfr = acmod_process_raw(ps->acmod, &data,
1092  &n_samples, full_utt)) < 0)
1093  return nfr;
1094 
1095  /* Score and search as much data as possible */
1096  if (no_search)
1097  continue;
1098  if ((nfr = ps_search_forward(ps)) < 0)
1099  return nfr;
1100  n_searchfr += nfr;
1101  }
1102 
1103  return n_searchfr;
1104 }
1105 
1106 int
1108  mfcc_t **data,
1109  int32 n_frames,
1110  int no_search,
1111  int full_utt)
1112 {
1113  int n_searchfr = 0;
1114 
1115  if (no_search)
1116  acmod_set_grow(ps->acmod, TRUE);
1117 
1118  while (n_frames) {
1119  int nfr;
1120 
1121  /* Process some data into features. */
1122  if ((nfr = acmod_process_cep(ps->acmod, &data,
1123  &n_frames, full_utt)) < 0)
1124  return nfr;
1125 
1126  /* Score and search as much data as possible */
1127  if (no_search)
1128  continue;
1129  if ((nfr = ps_search_forward(ps)) < 0)
1130  return nfr;
1131  n_searchfr += nfr;
1132  }
1133 
1134  return n_searchfr;
1135 }
1136 
1137 int
1139 {
1140  int rv, i;
1141 
1142  if (ps->acmod->state == ACMOD_ENDED || ps->acmod->state == ACMOD_IDLE) {
1143  E_ERROR("Utterance is not started\n");
1144  return -1;
1145  }
1146  acmod_end_utt(ps->acmod);
1147 
1148  /* Search any remaining frames. */
1149  if ((rv = ps_search_forward(ps)) < 0) {
1150  ptmr_stop(&ps->perf);
1151  return rv;
1152  }
1153  /* Finish phone loop search. */
1154  if (ps->phone_loop) {
1155  if ((rv = ps_search_finish(ps->phone_loop)) < 0) {
1156  ptmr_stop(&ps->perf);
1157  return rv;
1158  }
1159  }
1160  /* Search any frames remaining in the lookahead window. */
1161  if (ps->acmod->output_frame >= ps->pl_window) {
1162  for (i = ps->acmod->output_frame - ps->pl_window;
1163  i < ps->acmod->output_frame; ++i)
1164  ps_search_step(ps->search, i);
1165  }
1166  /* Finish main search. */
1167  if ((rv = ps_search_finish(ps->search)) < 0) {
1168  ptmr_stop(&ps->perf);
1169  return rv;
1170  }
1171  ptmr_stop(&ps->perf);
1172 
1173  /* Log a backtrace if requested. */
1174  if (cmd_ln_boolean_r(ps->config, "-backtrace")) {
1175  const char* hyp;
1176  ps_seg_t *seg;
1177  int32 score;
1178 
1179  hyp = ps_get_hyp(ps, &score);
1180 
1181  if (hyp != NULL) {
1182  E_INFO("%s (%d)\n", hyp, score);
1183  E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
1184  "word", "start", "end", "pprob", "ascr", "lscr", "lback");
1185  for (seg = ps_seg_iter(ps, &score); seg;
1186  seg = ps_seg_next(seg)) {
1187  char const *word;
1188  int sf, ef;
1189  int32 post, lscr, ascr, lback;
1190 
1191  word = ps_seg_word(seg);
1192  ps_seg_frames(seg, &sf, &ef);
1193  post = ps_seg_prob(seg, &ascr, &lscr, &lback);
1194  E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
1195  word, sf, ef, logmath_exp(ps_get_logmath(ps), post),
1196  ascr, lscr, lback);
1197  }
1198  }
1199  }
1200  return rv;
1201 }
1202 
1203 char const *
1204 ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
1205 {
1206  char const *hyp;
1207 
1208  ptmr_start(&ps->perf);
1209  hyp = ps_search_hyp(ps->search, out_best_score, NULL);
1210  ptmr_stop(&ps->perf);
1211  return hyp;
1212 }
1213 
1214 char const *
1215 ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)
1216 {
1217  char const *hyp;
1218 
1219  ptmr_start(&ps->perf);
1220  hyp = ps_search_hyp(ps->search, NULL, out_is_final);
1221  ptmr_stop(&ps->perf);
1222  return hyp;
1223 }
1224 
1225 
1226 int32
1228 {
1229  int32 prob;
1230 
1231  ptmr_start(&ps->perf);
1232  prob = ps_search_prob(ps->search);
1233  ptmr_stop(&ps->perf);
1234  return prob;
1235 }
1236 
1237 ps_seg_t *
1238 ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
1239 {
1240  ps_seg_t *itor;
1241 
1242  ptmr_start(&ps->perf);
1243  itor = ps_search_seg_iter(ps->search, out_best_score);
1244  ptmr_stop(&ps->perf);
1245  return itor;
1246 }
1247 
1248 ps_seg_t *
1250 {
1251  return ps_search_seg_next(seg);
1252 }
1253 
1254 char const *
1256 {
1257  return seg->word;
1258 }
1259 
1260 void
1261 ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
1262 {
1263  int uf;
1264  uf = acmod_stream_offset(seg->search->acmod);
1265  if (out_sf) *out_sf = seg->sf + uf;
1266  if (out_ef) *out_ef = seg->ef + uf;
1267 }
1268 
1269 int32
1270 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
1271 {
1272  if (out_ascr) *out_ascr = seg->ascr;
1273  if (out_lscr) *out_lscr = seg->lscr;
1274  if (out_lback) *out_lback = seg->lback;
1275  return seg->prob;
1276 }
1277 
1278 void
1280 {
1281  ps_search_seg_free(seg);
1282 }
1283 
1284 ps_lattice_t *
1286 {
1287  return ps_search_lattice(ps->search);
1288 }
1289 
1290 ps_nbest_t *
1291 ps_nbest(ps_decoder_t *ps, int sf, int ef,
1292  char const *ctx1, char const *ctx2)
1293 {
1294  ps_lattice_t *dag;
1295  ngram_model_t *lmset;
1296  ps_astar_t *nbest;
1297  float32 lwf;
1298  int32 w1, w2;
1299 
1300  if (ps->search == NULL)
1301  return NULL;
1302  if ((dag = ps_get_lattice(ps)) == NULL)
1303  return NULL;
1304 
1305  /* FIXME: This is all quite specific to N-Gram search. Either we
1306  * should make N-best a method for each search module or it needs
1307  * to be abstracted to work for N-Gram and FSG. */
1308  if (0 != strcmp(ps_search_type(ps->search), PS_SEARCH_TYPE_NGRAM)) {
1309  lmset = NULL;
1310  lwf = 1.0f;
1311  } else {
1312  lmset = ((ngram_search_t *)ps->search)->lmset;
1313  lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio;
1314  }
1315 
1316  w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1;
1317  w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1;
1318  nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2);
1319 
1320  return (ps_nbest_t *)nbest;
1321 }
1322 
1323 void
1325 {
1326  ps_astar_finish(nbest);
1327 }
1328 
1329 ps_nbest_t *
1331 {
1332  ps_latpath_t *next;
1333 
1334  next = ps_astar_next(nbest);
1335  if (next == NULL) {
1336  ps_nbest_free(nbest);
1337  return NULL;
1338  }
1339  return nbest;
1340 }
1341 
1342 char const *
1343 ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
1344 {
1345  assert(nbest != NULL);
1346 
1347  if (nbest->top == NULL)
1348  return NULL;
1349  if (out_score) *out_score = nbest->top->score;
1350  return ps_astar_hyp(nbest, nbest->top);
1351 }
1352 
1353 ps_seg_t *
1354 ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
1355 {
1356  if (nbest->top == NULL)
1357  return NULL;
1358  if (out_score) *out_score = nbest->top->score;
1359  return ps_astar_seg_iter(nbest, nbest->top, 1.0);
1360 }
1361 
1362 int
1364 {
1365  return ps->acmod->output_frame + 1;
1366 }
1367 
1368 void
1369 ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech,
1370  double *out_ncpu, double *out_nwall)
1371 {
1372  int32 frate;
1373 
1374  frate = cmd_ln_int32_r(ps->config, "-frate");
1375  *out_nspeech = (double)ps->acmod->output_frame / frate;
1376  *out_ncpu = ps->perf.t_cpu;
1377  *out_nwall = ps->perf.t_elapsed;
1378 }
1379 
1380 void
1381 ps_get_all_time(ps_decoder_t *ps, double *out_nspeech,
1382  double *out_ncpu, double *out_nwall)
1383 {
1384  int32 frate;
1385 
1386  frate = cmd_ln_int32_r(ps->config, "-frate");
1387  *out_nspeech = (double)ps->n_frame / frate;
1388  *out_ncpu = ps->perf.t_tot_cpu;
1389  *out_nwall = ps->perf.t_tot_elapsed;
1390 }
1391 
1392 uint8
1394 {
1395  return fe_get_vad_state(ps->acmod->fe);
1396 }
1397 
1398 void
1400  const char *type,
1401  const char *name,
1402  cmd_ln_t *config, acmod_t *acmod, dict_t *dict,
1403  dict2pid_t *d2p)
1404 {
1405  search->vt = vt;
1406  search->name = ckd_salloc(name);
1407  search->type = ckd_salloc(type);
1408 
1409  search->config = config;
1410  search->acmod = acmod;
1411  if (d2p)
1412  search->d2p = dict2pid_retain(d2p);
1413  else
1414  search->d2p = NULL;
1415  if (dict) {
1416  search->dict = dict_retain(dict);
1417  search->start_wid = dict_startwid(dict);
1418  search->finish_wid = dict_finishwid(dict);
1419  search->silence_wid = dict_silwid(dict);
1420  search->n_words = dict_size(dict);
1421  }
1422  else {
1423  search->dict = NULL;
1424  search->start_wid = search->finish_wid = search->silence_wid = -1;
1425  search->n_words = 0;
1426  }
1427 }
1428 
1429 void
1431 {
1432  /* FIXME: We will have refcounting on acmod, config, etc, at which
1433  * point we will free them here too. */
1434  ckd_free(search->name);
1435  ckd_free(search->type);
1436  dict_free(search->dict);
1437  dict2pid_free(search->d2p);
1438  ckd_free(search->hyp_str);
1439  ps_lattice_free(search->dag);
1440 }
1441 
1442 void
1444  dict2pid_t *d2p)
1445 {
1446  dict_free(search->dict);
1447  dict2pid_free(search->d2p);
1448  /* FIXME: _retain() should just return NULL if passed NULL. */
1449  if (dict) {
1450  search->dict = dict_retain(dict);
1451  search->start_wid = dict_startwid(dict);
1452  search->finish_wid = dict_finishwid(dict);
1453  search->silence_wid = dict_silwid(dict);
1454  search->n_words = dict_size(dict);
1455  }
1456  else {
1457  search->dict = NULL;
1458  search->start_wid = search->finish_wid = search->silence_wid = -1;
1459  search->n_words = 0;
1460  }
1461  if (d2p)
1462  search->d2p = dict2pid_retain(d2p);
1463  else
1464  search->d2p = NULL;
1465 }
1466 
1467 void
1469 {
1470  acmod_set_rawdata_size(ps->acmod, size);
1471 }
1472 
1473 void
1474 ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
1475 {
1476  acmod_get_rawdata(ps->acmod, buffer, size);
1477 }
ps_mllr_s
Feature space linear transform structure.
Definition: acmod.h:82
ps_search_iter_s
Definition: pocketsphinx_internal.h:230
ps_start_stream
POCKETSPHINX_EXPORT int ps_start_stream(ps_decoder_t *ps)
Start processing of the stream of speech.
Definition: pocketsphinx.c:940
ps_decoder_s::uttno
uint32 uttno
Utterance counter.
Definition: pocketsphinx_internal.h:221
ps_nbest
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest(ps_decoder_t *ps, int sf, int ef, char const *ctx1, char const *ctx2)
Get an iterator over the best hypotheses, optionally within a selected region of the utterance.
Definition: pocketsphinx.c:1291
ps_search_init
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
Definition: pocketsphinx.c:1399
ps_astar_s
A* search structure.
Definition: ps_lattice_internal.h:163
ps_search_s::silence_wid
int32 silence_wid
Silence word ID.
Definition: pocketsphinx_internal.h:118
ps_search_s::dag
ps_lattice_t * dag
Current hypothesis word graph.
Definition: pocketsphinx_internal.h:110
ps_astar_finish
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
Definition: ps_lattice.c:1925
ps_nbest_next
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest_next(ps_nbest_t *nbest)
Move an N-best list iterator forward.
Definition: pocketsphinx.c:1330
ps_seg_frames
POCKETSPHINX_EXPORT void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
Get inclusive start and end frames from a segmentation iterator.
Definition: pocketsphinx.c:1261
ps_decoder_s::searches
hash_table_t * searches
Set of search modules.
Definition: pocketsphinx_internal.h:213
ps_decoder_s::mfclogdir
const char * mfclogdir
Log directory for MFCC files.
Definition: pocketsphinx_internal.h:224
acmod_set_senfh
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
Definition: acmod.c:366
ps_seg_prob
POCKETSPHINX_EXPORT int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
Get language, acoustic, and posterior probabilities from a segmentation iterator.
Definition: pocketsphinx.c:1270
ps_get_utt_time
POCKETSPHINX_EXPORT void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get performance information for the current utterance.
Definition: pocketsphinx.c:1369
acmod_read_scores
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition: acmod.c:1032
ps_seg_s::sf
frame_idx_t sf
Start frame.
Definition: pocketsphinx_internal.h:182
ps_decoder_s::pl_window
int pl_window
Window size for phoneme lookahead.
Definition: pocketsphinx_internal.h:218
pocketsphinx_internal.h
ps_set_jsgf_file
POCKETSPHINX_EXPORT int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search using JSGF model.
Definition: pocketsphinx.c:665
ps_default_search_args
POCKETSPHINX_EXPORT void ps_default_search_args(cmd_ln_t *)
Sets default grammar and language model if they are not set explicitly and are present in the default...
Definition: pocketsphinx.c:180
ps_get_lattice
POCKETSPHINX_EXPORT ps_lattice_t * ps_get_lattice(ps_decoder_t *ps)
Get word lattice.
Definition: pocketsphinx.c:1285
ps_search_s::finish_wid
int32 finish_wid
Finish word ID.
Definition: pocketsphinx_internal.h:119
ps_latpath_s::score
int32 score
Exact score from start node up to node->sf.
Definition: ps_lattice_internal.h:157
acmod_end_utt
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition: acmod.c:443
ps_decode_raw
POCKETSPHINX_EXPORT long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, long maxsamps)
Decode a raw audio stream.
Definition: pocketsphinx.c:895
ps_search_s
Base structure for search module.
Definition: pocketsphinx_internal.h:98
ps_search_s::n_words
int32 n_words
Number of words known to search (may be less than in the dictionary)
Definition: pocketsphinx_internal.h:113
ps_get_lm
POCKETSPHINX_EXPORT ngram_model_t * ps_get_lm(ps_decoder_t *ps, const char *name)
Get the language model set object for this decoder.
Definition: pocketsphinx.c:552
ps_add_word
POCKETSPHINX_EXPORT int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update)
Add a word to the pronunciation dictionary.
Definition: pocketsphinx.c:804
ps_search_s::pls
ps_search_t * pls
Phoneme loop for lookahead.
Definition: pocketsphinx_internal.h:104
dict2pid_retain
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
Definition: dict2pid.c:500
ps_seg_s
Base structure for hypothesis segmentation iterator.
Definition: pocketsphinx_internal.h:178
ps_set_lm
POCKETSPHINX_EXPORT int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on N-gram language model.
Definition: pocketsphinx.c:595
ACMOD_IDLE
Not in an utterance.
Definition: acmod.h:68
ps_search_iter_free
POCKETSPHINX_EXPORT void ps_search_iter_free(ps_search_iter_t *itor)
Delete an unfinished search iterator.
Definition: pocketsphinx.c:546
ps_set_lm_file
POCKETSPHINX_EXPORT int ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on N-gram language model.
Definition: pocketsphinx.c:603
ps_search_iter_next
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter_next(ps_search_iter_t *itor)
Updates search iterator to point to the next position.
Definition: pocketsphinx.c:534
ps_decode_senscr
POCKETSPHINX_EXPORT int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
Decode a senone score dump file.
Definition: pocketsphinx.c:1050
acmod_set_mfcfh
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition: acmod.c:377
ps_get_rawdata
POCKETSPHINX_EXPORT void ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Definition: pocketsphinx.c:1474
ps_get_prob
POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps)
Get posterior probability.
Definition: pocketsphinx.c:1227
ps_get_feat
POCKETSPHINX_EXPORT feat_t * ps_get_feat(ps_decoder_t *ps)
Get the dynamic feature computation object for this decoder.
Definition: pocketsphinx.c:471
ps_get_hyp
POCKETSPHINX_EXPORT const char * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
Get hypothesis string and path score.
Definition: pocketsphinx.c:1204
ps_decoder_s::n_frame
uint32 n_frame
Total number of frames processed.
Definition: pocketsphinx_internal.h:223
ps_search_s::d2p
dict2pid_t * d2p
Dictionary to senone mappings.
Definition: pocketsphinx_internal.h:108
ps_load_dict
POCKETSPHINX_EXPORT int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format)
Reload the pronunciation dictionary from a file.
Definition: pocketsphinx.c:741
ps_unset_search
POCKETSPHINX_EXPORT int ps_unset_search(ps_decoder_t *ps, const char *name)
Unsets the search and releases related resources.
Definition: pocketsphinx.c:516
ps_save_dict
POCKETSPHINX_EXPORT int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format)
Dump the current pronunciation dictionary to a file.
Definition: pocketsphinx.c:797
ps_process_cep
POCKETSPHINX_EXPORT int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int n_frames, int no_search, int full_utt)
Decode acoustic feature data.
ps_seg_free
POCKETSPHINX_EXPORT void ps_seg_free(ps_seg_t *seg)
Finish iterating over a word segmentation early, freeing resources.
Definition: pocketsphinx.c:1279
ps_search_base_reinit
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
Definition: pocketsphinx.c:1443
ps_seg_iter
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
Get an iterator over the word segmentation for the best hypothesis.
Definition: pocketsphinx.c:1238
ps_decoder_s::dict
dict_t * dict
Pronunciation dictionary.
Definition: pocketsphinx_internal.h:208
ps_decoder_s::d2p
dict2pid_t * d2p
Dictionary to senone mapping.
Definition: pocketsphinx_internal.h:209
ps_set_rawdata_size
POCKETSPHINX_EXPORT void ps_set_rawdata_size(ps_decoder_t *ps, int32 size)
Sets the limit of the raw audio data to store in decoder to retrieve it later on ps_get_rawdata.
Definition: pocketsphinx.c:1468
BAD_S3WID
#define BAD_S3WID
Dictionary word id.
Definition: s3types.h:90
dict_add_word
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
Definition: dict.c:80
acmod_s::n_feat_frame
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition: acmod.h:199
ps_update_mllr
POCKETSPHINX_EXPORT ps_mllr_t * ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
Adapt current acoustic model using a linear transform.
Definition: pocketsphinx.c:477
ngram_search_fwdtree.h
ps_decoder_s::lmath
logmath_t * lmath
Log math computation.
Definition: pocketsphinx_internal.h:210
ps_set_keyphrase
POCKETSPHINX_EXPORT int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase)
Adds new keyword to spot.
Definition: pocketsphinx.c:649
ps_seg_s::word
const char * word
Word string (pointer into dictionary hash)
Definition: pocketsphinx_internal.h:181
ps_decoder_s::senlogdir
const char * senlogdir
Log directory for senone score files.
Definition: pocketsphinx_internal.h:226
ps_search_s::post
int32 post
Utterance posterior probability.
Definition: pocketsphinx_internal.h:112
ps_astar_start
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
Definition: ps_lattice.c:1712
ps_free
POCKETSPHINX_EXPORT int ps_free(ps_decoder_t *ps)
Finalize the decoder.
Definition: pocketsphinx.c:436
dict2pid_t
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
ps_seg_s::lscr
int32 lscr
Language model score.
Definition: pocketsphinx_internal.h:185
acmod_s::mdef
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
ps_nbest_seg
POCKETSPHINX_EXPORT ps_seg_t * ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
Get the word segmentation from an N-best list iterator.
Definition: pocketsphinx.c:1354
ps_get_config
POCKETSPHINX_EXPORT cmd_ln_t * ps_get_config(ps_decoder_t *ps)
Get the configuration object for this decoder.
Definition: pocketsphinx.c:453
dict2pid_add_word
int dict2pid_add_word(dict2pid_t *d2p, int32 wid)
Add a word to the dict2pid structure (after adding it to dict).
Definition: dict2pid.c:298
dict_free
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Definition: dict.c:468
ps_get_kws
const POCKETSPHINX_EXPORT char * ps_get_kws(ps_decoder_t *ps, const char *name)
Get the current Key phrase to spot.
Definition: pocketsphinx.c:570
acmod_set_rawdata_size
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
Definition: acmod.c:1341
s3cipid_t
int16 s3cipid_t
Size definitions for more semantially meaningful units.
Definition: s3types.h:63
ps_seg_s::search
ps_search_t * search
Search object from whence this came.
Definition: pocketsphinx_internal.h:180
ps_search_iter_val
const POCKETSPHINX_EXPORT char * ps_search_iter_val(ps_search_iter_t *itor)
Retrieves the name of the search the iterator points to.
Definition: pocketsphinx.c:540
acmod_init
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition: acmod.c:230
ps_seg_next
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_next(ps_seg_t *seg)
Get the next segment in a word segmentation.
Definition: pocketsphinx.c:1249
ACMOD_STARTED
Utterance started, no data yet.
Definition: acmod.h:69
ps_nbest_hyp
POCKETSPHINX_EXPORT const char * ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
Get the hypothesis string from an N-best list iterator.
Definition: pocketsphinx.c:1343
ps_reinit
POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
Reinitialize the decoder with updated configuration.
Definition: pocketsphinx.c:234
ps_lattice_internal.h
ps_end_utt
POCKETSPHINX_EXPORT int ps_end_utt(ps_decoder_t *ps)
End utterance processing.
Definition: pocketsphinx.c:1138
ps_search_s::hyp_str
char * hyp_str
Current hypothesis string.
Definition: pocketsphinx_internal.h:109
acmod_process_raw
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition: acmod.c:627
ps_retain
POCKETSPHINX_EXPORT ps_decoder_t * ps_retain(ps_decoder_t *ps)
Retain a pointer to the decoder.
Definition: pocketsphinx.c:429
ps_seg_s::prob
int32 prob
Log posterior probability.
Definition: pocketsphinx_internal.h:186
ps_get_fe
POCKETSPHINX_EXPORT fe_t * ps_get_fe(ps_decoder_t *ps)
Get the feature extraction object for this decoder.
Definition: pocketsphinx.c:465
dict_init
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Definition: dict.c:252
ps_decoder_s::phone_loop
ps_search_t * phone_loop
Phone loop search for lookahead.
Definition: pocketsphinx_internal.h:217
bin_mdef_ciphone_id
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
Definition: bin_mdef.c:690
ps_astar_next
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
Definition: ps_lattice.c:1771
ps_search_iter
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter(ps_decoder_t *ps)
Returns iterator over current searches.
Definition: pocketsphinx.c:528
ps_seg_s::ef
frame_idx_t ef
End frame.
Definition: pocketsphinx_internal.h:183
dict_retain
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
Definition: dict.c:461
dict_size
#define dict_size(d)
Packaged macro access to dictionary members.
Definition: dict.h:151
ps_get_logmath
POCKETSPHINX_EXPORT logmath_t * ps_get_logmath(ps_decoder_t *ps)
Get the log-math computation object for this decoder.
Definition: pocketsphinx.c:459
acmod_free
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition: acmod.c:301
acmod_set_grow
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition: acmod.c:412
ps_astar_hyp
const char * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
Definition: ps_lattice.c:1804
ps_decoder_s::acmod
acmod_t * acmod
Acoustic model.
Definition: pocketsphinx_internal.h:207
ps_astar_seg_iter
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
Definition: ps_lattice.c:1898
ps_args
POCKETSPHINX_EXPORT const arg_t * ps_args(void)
Returns the argument definitions used in ps_init().
Definition: pocketsphinx.c:423
ps_seg_word
POCKETSPHINX_EXPORT const char * ps_seg_word(ps_seg_t *seg)
Get word string from a segmentation iterator.
Definition: pocketsphinx.c:1255
ps_search_base_free
void ps_search_base_free(ps_search_t *search)
Free search.
Definition: pocketsphinx.c:1430
ps_decoder_s
Decoder object.
Definition: pocketsphinx_internal.h:201
ps_process_raw
POCKETSPHINX_EXPORT int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt)
Decode raw audio data.
Definition: pocketsphinx.c:1071
ACMOD_PROCESSING
Utterance in progress.
Definition: acmod.h:70
pocketsphinx.h
dict_write
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
Definition: dict.c:221
ps_searchfuncs_s
V-table for search algorithm.
Definition: pocketsphinx_internal.h:82
ps_search_s::last_link
ps_latlink_t * last_link
Final link in best path.
Definition: pocketsphinx_internal.h:111
acmod_stream_offset
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing.
Definition: acmod.c:1328
ps_seg_s::ascr
int32 ascr
Acoustic score.
Definition: pocketsphinx_internal.h:184
ps_nbest_free
POCKETSPHINX_EXPORT void ps_nbest_free(ps_nbest_t *nbest)
Finish N-best search early, releasing resources.
Definition: pocketsphinx.c:1324
dict_ciphone_str
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
Definition: dict.c:69
ps_lattice_s
Word graph structure used in bestpath/nbest search.
Definition: ps_lattice_internal.h:61
acmod_s::fcb
feat_t * fcb
Dynamic feature computation.
Definition: acmod.h:156
ps_search_s::config
cmd_ln_t * config
Configuration.
Definition: pocketsphinx_internal.h:105
dict2pid_build
dict2pid_t * dict2pid_build(bin_mdef_t *mdef, dict_t *dict)
Build the dict2pid structure for the given model/dictionary.
Definition: dict2pid.c:388
ps_seg_s::lback
int32 lback
Language model backoff.
Definition: pocketsphinx_internal.h:189
ps_search_s::vt
ps_searchfuncs_t * vt
V-table of search methods.
Definition: pocketsphinx_internal.h:99
ngram_search_s
N-Gram search module structure.
Definition: ngram_search.h:197
ps_start_utt
POCKETSPHINX_EXPORT int ps_start_utt(ps_decoder_t *ps)
Start utterance processing.
Definition: pocketsphinx.c:947
acmod_advance
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition: acmod.c:919
ps_decoder_s::search
ps_search_t * search
Currently active search module.
Definition: pocketsphinx_internal.h:216
dict_t
a structure for a dictionary.
Definition: dict.h:76
ps_get_hyp_final
POCKETSPHINX_EXPORT const char * ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)
Get hypothesis string and final flag.
Definition: pocketsphinx.c:1215
ps_search_s::dict
dict_t * dict
Pronunciation dictionary.
Definition: pocketsphinx_internal.h:107
ps_latpath_s
Partial path structure used in N-best (A*) search.
Definition: ps_lattice_internal.h:153
ngram_search_init
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Definition: ngram_search.c:140
ngram_search_fwdflat.h
ps_decoder_s::refcount
int refcount
Reference count.
Definition: pocketsphinx_internal.h:204
ps_get_fsg
POCKETSPHINX_EXPORT fsg_model_t * ps_get_fsg(ps_decoder_t *ps, const char *name)
Get the finite-state grammar set object for this decoder.
Definition: pocketsphinx.c:561
ps_lattice_free
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
Definition: ps_lattice.c:665
ps_init
POCKETSPHINX_EXPORT ps_decoder_t * ps_init(cmd_ln_t *config)
Initialize the decoder from a configuration object.
Definition: pocketsphinx.c:404
ps_decoder_s::perf
ptmr_t perf
Performance counter for all of decoding.
Definition: pocketsphinx_internal.h:222
phone_loop_search.h
dict_wordid
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
Definition: dict.c:399
acmod_process_cep
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition: acmod.c:719
dict2pid_free
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
Definition: dict2pid.c:507
ps_set_fsg
POCKETSPHINX_EXPORT int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg)
Adds new search based on finite state grammar.
Definition: pocketsphinx.c:657
ps_get_all_time
POCKETSPHINX_EXPORT void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get overall performance information.
Definition: pocketsphinx.c:1381
acmod_update_mllr
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition: acmod.c:341
acmod_get_rawdata
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Definition: acmod.c:1352
ACMOD_ENDED
Utterance ended, still buffering.
Definition: acmod.h:71
ps_get_search
const POCKETSPHINX_EXPORT char * ps_get_search(ps_decoder_t *ps)
Returns name of curent search in decoder.
Definition: pocketsphinx.c:501
ps_set_search
POCKETSPHINX_EXPORT int ps_set_search(ps_decoder_t *ps, const char *name)
Actives search with the provided name.
Definition: pocketsphinx.c:483
acmod_s::fe
fe_t * fe
Acoustic feature computation.
Definition: acmod.h:155
ps_decoder_s::rawlogdir
const char * rawlogdir
Log directory for audio files.
Definition: pocketsphinx_internal.h:225
ps_set_allphone_file
POCKETSPHINX_EXPORT int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on phone N-gram language model.
Definition: pocketsphinx.c:626
acmod_s
Acoustic model structure.
Definition: acmod.h:148
ps_decoder_s::config
cmd_ln_t * config
Configuration.
Definition: pocketsphinx_internal.h:203
ngram_search.h
acmod_s::state
uint8 state
State of utterance processing.
Definition: acmod.h:187
fsg_search_s
Implementation of FSG search (and "FSG set") structure.
Definition: fsg_search_internal.h:68
ps_search_s::start_wid
int32 start_wid
Start word ID.
Definition: pocketsphinx_internal.h:117
ps_lookup_word
POCKETSPHINX_EXPORT char * ps_lookup_word(ps_decoder_t *ps, const char *word)
Lookup for the word in the dictionary and return phone transcription for it.
Definition: pocketsphinx.c:872
acmod_start_utt
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition: acmod.c:425
acmod_s::output_frame
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition: acmod.h:194
ps_set_kws
POCKETSPHINX_EXPORT int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
Adds keywords from a file to spotting.
Definition: pocketsphinx.c:641
acmod_set_rawfh
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition: acmod.c:389
ps_get_in_speech
POCKETSPHINX_EXPORT uint8 ps_get_in_speech(ps_decoder_t *ps)
Checks if the last feed audio buffer contained speech.
Definition: pocketsphinx.c:1393
ps_set_jsgf_string
POCKETSPHINX_EXPORT int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
Adds new search using JSGF model.
Definition: pocketsphinx.c:704
ps_get_n_frames
POCKETSPHINX_EXPORT int ps_get_n_frames(ps_decoder_t *ps)
Get the number of frames of data searched.
Definition: pocketsphinx.c:1363
ps_search_s::acmod
acmod_t * acmod
Acoustic model.
Definition: pocketsphinx_internal.h:106
acmod_start_stream
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
Definition: acmod.c:1334
ps_set_allphone
POCKETSPHINX_EXPORT int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on phone N-gram language model.
Definition: pocketsphinx.c:618
acmod_set_insenfh
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
Definition: acmod.c:884