SphinxBase
0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 #ifndef __FE_INTERNAL_H__ 00039 #define __FE_INTERNAL_H__ 00040 00041 #ifdef HAVE_CONFIG_H 00042 #include <config.h> 00043 #endif 00044 00045 #include "sphinxbase/fe.h" 00046 #include "sphinxbase/fixpoint.h" 00047 00048 #ifdef __cplusplus 00049 extern "C" { 00050 #endif 00051 #if 0 00052 /* Fool Emacs. */ 00053 } 00054 #endif 00055 00056 #ifdef FIXED16 00057 /* Q15 format */ 00058 typedef int16 frame_t; 00059 typedef int16 window_t; 00060 typedef int32 powspec_t; 00061 typedef struct { int16 r, i; } complex; 00062 #elif defined(FIXED_POINT) 00063 typedef fixed32 frame_t; 00064 typedef int32 powspec_t; 00065 typedef fixed32 window_t; 00066 typedef struct { fixed32 r, i; } complex; 00067 #else /* FIXED_POINT */ 00068 typedef float64 frame_t; 00069 typedef float64 powspec_t; 00070 typedef float64 window_t; 00071 typedef struct { float64 r, i; } complex; 00072 #endif /* FIXED_POINT */ 00073 00074 /* Values for the 'logspec' field. */ 00075 enum { 00076 RAW_LOG_SPEC = 1, 00077 SMOOTH_LOG_SPEC = 2 00078 }; 00079 00080 /* Values for the 'transform' field. */ 00081 enum { 00082 LEGACY_DCT = 0, 00083 DCT_II = 1, 00084 DCT_HTK = 2 00085 }; 00086 00087 typedef struct melfb_s melfb_t; 00089 struct melfb_s { 00090 float32 sampling_rate; 00091 int32 num_cepstra; 00092 int32 num_filters; 00093 int32 fft_size; 00094 float32 lower_filt_freq; 00095 float32 upper_filt_freq; 00096 /* DCT coefficients. */ 00097 mfcc_t **mel_cosine; 00098 /* Filter coefficients. */ 00099 mfcc_t *filt_coeffs; 00100 int16 *spec_start; 00101 int16 *filt_start; 00102 int16 *filt_width; 00103 /* Luxury mobile home. */ 00104 int32 doublewide; 00105 char const *warp_type; 00106 char const *warp_params; 00107 uint32 warp_id; 00108 /* Precomputed normalization constants for unitary DCT-II/DCT-III */ 00109 mfcc_t sqrt_inv_n, sqrt_inv_2n; 00110 /* Value and coefficients for HTK-style liftering */ 00111 int32 lifter_val; 00112 mfcc_t *lifter; 00113 /* Normalize filters to unit area */ 00114 int32 unit_area; 00115 /* Round filter frequencies to DFT points (hurts accuracy, but is 00116 useful for legacy purposes) */ 00117 int32 round_filters; 00118 }; 00119 00120 /* sqrt(1/2), also used for unitary DCT-II/DCT-III */ 00121 #define SQRT_HALF FLOAT2MFCC(0.707106781186548) 00122 00124 struct fe_s { 00125 cmd_ln_t *config; 00126 int refcount; 00127 00128 float32 sampling_rate; 00129 int16 frame_rate; 00130 int16 frame_shift; 00131 00132 float32 window_length; 00133 int16 frame_size; 00134 int16 fft_size; 00135 00136 uint8 fft_order; 00137 uint8 feature_dimension; 00138 uint8 num_cepstra; 00139 uint8 remove_dc; 00140 uint8 log_spec; 00141 uint8 swap; 00142 uint8 dither; 00143 uint8 transform; 00144 00145 float32 pre_emphasis_alpha; 00146 int32 seed; 00147 00148 int16 frame_counter; 00149 uint8 start_flag; 00150 uint8 reserved; 00151 00152 /* Twiddle factors for FFT. */ 00153 frame_t *ccc, *sss; 00154 /* Mel filter parameters. */ 00155 melfb_t *mel_fb; 00156 /* Half of a Hamming Window. */ 00157 window_t *hamming_window; 00158 00159 /* Temporary buffers for processing. */ 00160 /* FIXME: too many of these. */ 00161 int16 *spch; 00162 frame_t *frame; 00163 powspec_t *spec, *mfspec; 00164 int16 *overflow_samps; 00165 int16 num_overflow_samps; 00166 int16 prior; 00167 }; 00168 00169 #define BB_SAMPLING_RATE 16000 00170 #define DEFAULT_BB_FFT_SIZE 512 00171 #define DEFAULT_BB_FRAME_SHIFT 160 00172 #define DEFAULT_BB_NUM_FILTERS 40 00173 #define DEFAULT_BB_LOWER_FILT_FREQ 133.33334 00174 #define DEFAULT_BB_UPPER_FILT_FREQ 6855.4976 00175 00176 #define NB_SAMPLING_RATE 8000 00177 #define DEFAULT_NB_FFT_SIZE 256 00178 #define DEFAULT_NB_FRAME_SHIFT 80 00179 #define DEFAULT_NB_NUM_FILTERS 31 00180 #define DEFAULT_NB_LOWER_FILT_FREQ 200 00181 #define DEFAULT_NB_UPPER_FILT_FREQ 3500 00182 00183 void fe_init_dither(int32 seed); 00184 00185 /* Apply 1/2 bit noise to a buffer of audio. */ 00186 int32 fe_dither(int16 *buffer, int32 nsamps); 00187 00188 /* Load a frame of data into the fe. */ 00189 int fe_read_frame(fe_t *fe, int16 const *in, int32 len); 00190 00191 /* Shift the input buffer back and read more data. */ 00192 int fe_shift_frame(fe_t *fe, int16 const *in, int32 len); 00193 00194 /* Process a frame of data into features. */ 00195 int32 fe_write_frame(fe_t *fe, mfcc_t *fea); 00196 00197 /* Initialization functions. */ 00198 int32 fe_build_melfilters(melfb_t *MEL_FB); 00199 int32 fe_compute_melcosine(melfb_t *MEL_FB); 00200 void fe_create_hamming(window_t *in, int32 in_len); 00201 void fe_create_twiddle(fe_t *fe); 00202 00203 /* Miscellaneous processing functions. */ 00204 void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep); 00205 void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk); 00206 void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec); 00207 00208 #ifdef __cplusplus 00209 } 00210 #endif 00211 00212 #endif /* __FE_INTERNAL_H__ */