SphinxBase
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2007 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * \file ngram_model_internal.h Internal structures for N-Gram models 00039 * 00040 * Author: David Huggins-Daines <dhuggins@cs.cmu.edu> 00041 */ 00042 00043 #ifndef __NGRAM_MODEL_INTERNAL_H__ 00044 #define __NGRAM_MODEL_INTERNAL_H__ 00045 00046 #include "sphinxbase/ngram_model.h" 00047 #include "sphinxbase/hash_table.h" 00048 00055 struct ngram_model_s { 00056 int refcount; 00057 int32 *n_counts; 00058 int32 n_1g_alloc; 00059 int32 n_words; 00061 uint8 n; 00062 uint8 n_classes; 00063 uint8 writable; 00064 uint8 flags; 00066 logmath_t *lmath; 00067 float32 lw; 00068 int32 log_wip; 00069 int32 log_uw; 00070 int32 log_uniform; 00071 int32 log_uniform_weight; 00072 int32 log_zero; 00073 char **word_str; 00074 hash_table_t *wid; 00075 int32 *tmp_wids; 00076 struct ngram_class_s **classes; 00077 struct ngram_funcs_s *funcs; 00078 }; 00079 00083 struct ngram_class_s { 00084 int32 tag_wid; 00085 int32 start_wid; 00086 int32 n_words; 00087 int32 *prob1; 00091 struct ngram_hash_s { 00092 int32 wid; 00093 int32 prob1; 00094 int32 next; 00095 } *nword_hash; 00096 int32 n_hash; 00097 int32 n_hash_inuse; 00098 }; 00099 00100 #define NGRAM_HASH_SIZE 128 00101 00102 #define NGRAM_BASEWID(wid) ((wid)&0xffffff) 00103 #define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f) 00104 #define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid)) 00105 #define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000) 00106 00107 #define UG_ALLOC_STEP 10 00108 00110 typedef struct ngram_funcs_s { 00114 void (*free)(ngram_model_t *model); 00118 int (*apply_weights)(ngram_model_t *model, 00119 float32 lw, 00120 float32 wip, 00121 float32 uw); 00125 int32 (*score)(ngram_model_t *model, 00126 int32 wid, 00127 int32 *history, 00128 int32 n_hist, 00129 int32 *n_used); 00134 int32 (*raw_score)(ngram_model_t *model, 00135 int32 wid, 00136 int32 *history, 00137 int32 n_hist, 00138 int32 *n_used); 00150 int32 (*add_ug)(ngram_model_t *model, 00151 int32 wid, int32 lweight); 00155 void (*flush)(ngram_model_t *model); 00156 00160 ngram_iter_t * (*iter)(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist); 00161 00165 ngram_iter_t * (*mgrams)(ngram_model_t *model, int32 m); 00166 00170 ngram_iter_t * (*successors)(ngram_iter_t *itor); 00171 00175 int32 const * (*iter_get)(ngram_iter_t *itor, 00176 int32 *out_score, 00177 int32 *out_bowt); 00178 00182 ngram_iter_t * (*iter_next)(ngram_iter_t *itor); 00183 00187 void (*iter_free)(ngram_iter_t *itor); 00188 } ngram_funcs_t; 00189 00193 struct ngram_iter_s { 00194 ngram_model_t *model; 00195 int32 *wids; 00196 int16 m; 00197 int16 successor; 00198 }; 00199 00203 typedef struct classdef_s { 00204 char **words; 00205 float32 *weights; 00206 int32 n_words; 00207 } classdef_t; 00208 00212 int32 00213 ngram_model_init(ngram_model_t *model, 00214 ngram_funcs_t *funcs, 00215 logmath_t *lmath, 00216 int32 n, int32 n_unigram); 00217 00221 ngram_model_t *ngram_model_arpa_read(cmd_ln_t *config, 00222 const char *file_name, 00223 logmath_t *lmath); 00227 ngram_model_t *ngram_model_dmp_read(cmd_ln_t *config, 00228 const char *file_name, 00229 logmath_t *lmath); 00233 ngram_model_t *ngram_model_dmp32_read(cmd_ln_t *config, 00234 const char *file_name, 00235 logmath_t *lmath); 00236 00240 int ngram_model_arpa_write(ngram_model_t *model, 00241 const char *file_name); 00245 int ngram_model_dmp_write(ngram_model_t *model, 00246 const char *file_name); 00247 00251 int32 read_classdef_file(hash_table_t *classes, const char *classdef_file); 00252 00256 void classdef_free(classdef_t *classdef); 00257 00261 ngram_class_t *ngram_class_new(ngram_model_t *model, int32 tag_wid, 00262 int32 start_wid, glist_t classwords); 00263 00267 void ngram_class_free(ngram_class_t *lmclass); 00268 00274 int32 ngram_class_prob(ngram_class_t *lmclass, int32 wid); 00275 00279 void ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model, 00280 int m, int successor); 00281 00282 #endif /* __NGRAM_MODEL_INTERNAL_H__ */