00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #if !defined(WIN32) || defined(GNUWINCE)
00040 #include <unistd.h>
00041 #include <sys/file.h>
00042 #include <sys/fcntl.h>
00043 #if !defined(O_BINARY)
00044 #define O_BINARY 0
00045 #endif
00046 #endif
00047 #include <string.h>
00048 #include <time.h>
00049 #include <sys/types.h>
00050 #include <sys/stat.h>
00051 #include <fcntl.h>
00052 #include <assert.h>
00053
00054 #ifdef _WIN32
00055 #pragma warning (disable: 4996 4018)
00056 #endif
00057
00058 #if defined(WIN32) && !defined(GNUWINCE)
00059 #include <io.h>
00060 #include <errno.h>
00061 #endif
00062
00063 #ifdef HAVE_CONFIG_H
00064 #include <config.h>
00065 #endif
00066
00067 #include "fe.h"
00068 #include "strfuncs.h"
00069 #include "cmd_ln.h"
00070 #include "err.h"
00071 #include "ckd_alloc.h"
00072 #include "byteorder.h"
00073
00074 #include "wave2feat.h"
00075 #include "cmd_ln_defn.h"
00076
00077 struct globals_s {
00078 cmd_ln_t *config;
00079 int32 nskip;
00080 int32 runlen;
00081 char const *wavfile;
00082 char const *cepfile;
00083 char const *ctlfile;
00084 char const *wavdir;
00085 char const *cepdir;
00086 char const *wavext;
00087 char const *cepext;
00088 int32 input_format;
00089 int32 is_batch;
00090 int32 is_single;
00091 int32 blocksize;
00092 int32 machine_endian;
00093 int32 input_endian;
00094 int32 output_endian;
00095 int32 nchans;
00096 int32 whichchan;
00097 int32 convert;
00098 int32 verbose;
00099 int32 logspec;
00100 };
00101 typedef struct globals_s globals_t;
00102
00103 globals_t *fe_parse_options(int argc, char **argv);
00104 int32 fe_convert_files(globals_t * P);
00105 int32 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00106 char **outfilename);
00107 int32 fe_openfiles(globals_t * P, fe_t * FE, char *infile, int32 * fp_in,
00108 int32 * nsamps, int32 * nframes, int32 * nblocks,
00109 char *outfile, int32 * fp_out);
00110 int32 fe_readblock_spch(globals_t * P, int32 fp, int32 nsamps,
00111 int16 * buf);
00112 int32 fe_writeblock_feat(globals_t * P, fe_t * FE, int32 fp, int32 nframes,
00113 mfcc_t ** feat);
00114 int32 fe_closefiles(int32 fp_in, int32 fp_out);
00115 int32 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile);
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138 int32
00139 main(int32 argc, char **argv)
00140 {
00141 globals_t *P;
00142
00143 P = fe_parse_options(argc, argv);
00144 if (fe_convert_files(P) != FE_SUCCESS) {
00145 E_FATAL("error converting files...exiting\n");
00146 }
00147 free(P);
00148 return (0);
00149 }
00150
00151
00152 int32
00153 fe_convert_files(globals_t * P)
00154 {
00155
00156 fe_t *FE;
00157 char *infile, *outfile, fileroot[MAXCHARS];
00158 FILE *ctlfile;
00159 int16 *spdata = NULL;
00160 int32 splen =
00161 0, total_samps, frames_proc, nframes, nblocks, last_frame;
00162 int32 fp_in, fp_out, last_blocksize = 0, curr_block, total_frames;
00163 mfcc_t **cep = NULL, **last_frame_cep;
00164 int32 return_value;
00165 int32 warn_zero_energy = 0;
00166 int32 process_utt_return_value;
00167
00168 if ((FE = fe_init_auto_r(P->config)) == NULL) {
00169 E_ERROR("memory alloc failed...exiting\n");
00170 return (FE_MEM_ALLOC_ERROR);
00171 }
00172
00173 if (P->is_batch) {
00174 int32 nskip = P->nskip;
00175 int32 runlen = P->runlen;
00176
00177 if ((ctlfile = fopen(P->ctlfile, "r")) == NULL) {
00178 E_ERROR("Unable to open control file %s\n", P->ctlfile);
00179 fe_free(FE);
00180 return (FE_CONTROL_FILE_ERROR);
00181 }
00182 while (fscanf(ctlfile, "%s", fileroot) != EOF) {
00183 if (nskip > 0) {
00184 --nskip;
00185 continue;
00186 }
00187 if (runlen > 0) {
00188 --runlen;
00189 }
00190 else if (runlen == 0) {
00191 break;
00192 }
00193
00194 fe_build_filenames(P, fileroot, &infile, &outfile);
00195
00196 if (P->verbose)
00197 E_INFO("%s\n", infile);
00198
00199 if (P->convert) {
00200
00201 return_value = fe_convert_with_dct(P, FE, infile, outfile);
00202 ckd_free(infile);
00203 ckd_free(outfile);
00204 infile = outfile = NULL;
00205 if (return_value != FE_SUCCESS) {
00206 fe_free(FE);
00207 return return_value;
00208 }
00209 continue;
00210 }
00211 return_value =
00212 fe_openfiles(P, FE, infile, &fp_in,
00213 &total_samps, &nframes, &nblocks,
00214 outfile, &fp_out);
00215 ckd_free(infile);
00216 ckd_free(outfile);
00217 infile = outfile = NULL;
00218 if (return_value != FE_SUCCESS) {
00219 fe_free(FE);
00220 return (return_value);
00221 }
00222
00223 warn_zero_energy = 0;
00224
00225 if (nblocks * P->blocksize >= total_samps)
00226 last_blocksize =
00227 total_samps - (nblocks - 1) * P->blocksize;
00228
00229 if (!fe_start_utt(FE)) {
00230 curr_block = 1;
00231 total_frames = frames_proc = 0;
00232
00233
00234 while (curr_block < nblocks) {
00235 splen = P->blocksize;
00236 if ((spdata =
00237 (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00238 E_ERROR
00239 ("Unable to allocate memory block of %d shorts for input speech\n",
00240 splen);
00241 fe_free(FE);
00242 return (FE_MEM_ALLOC_ERROR);
00243 }
00244 if (fe_readblock_spch
00245 (P, fp_in, splen, spdata) != splen) {
00246 E_ERROR("error reading speech data\n");
00247 fe_free(FE);
00248 return (FE_INPUT_FILE_READ_ERROR);
00249 }
00250 process_utt_return_value =
00251 fe_process_utt(FE, spdata,
00252 splen, &cep, &frames_proc);
00253 if (process_utt_return_value != FE_SUCCESS) {
00254 if (FE_ZERO_ENERGY_ERROR ==
00255 process_utt_return_value) {
00256 warn_zero_energy = 1;
00257 }
00258 else {
00259 fe_free(FE);
00260 return (process_utt_return_value);
00261 }
00262 }
00263 if (frames_proc > 0)
00264 fe_writeblock_feat(P, FE,
00265 fp_out, frames_proc, cep);
00266 if (cep != NULL) {
00267 ckd_free_2d((void **) cep);
00268 cep = NULL;
00269 }
00270 curr_block++;
00271 total_frames += frames_proc;
00272 free(spdata);
00273 spdata = NULL;
00274 }
00275
00276 free(spdata);
00277 spdata = NULL;
00278 splen = last_blocksize;
00279
00280 if ((spdata =
00281 (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00282 E_ERROR
00283 ("Unable to allocate memory block of %d shorts for input speech\n",
00284 splen);
00285 fe_free(FE);
00286 return (FE_MEM_ALLOC_ERROR);
00287 }
00288
00289 if (fe_readblock_spch(P, fp_in, splen, spdata) != splen) {
00290 E_ERROR("error reading speech data\n");
00291 fe_free(FE);
00292 return (FE_INPUT_FILE_READ_ERROR);
00293 }
00294
00295 process_utt_return_value =
00296 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00297 if (process_utt_return_value != FE_SUCCESS) {
00298 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00299 warn_zero_energy = 1;
00300 }
00301 else {
00302 fe_free(FE);
00303 return (process_utt_return_value);
00304 }
00305 }
00306 if (frames_proc > 0)
00307 fe_writeblock_feat(P, FE, fp_out, frames_proc, cep);
00308 if (cep != NULL) {
00309 ckd_free_2d((void **) cep);
00310 cep = NULL;
00311 }
00312 curr_block++;
00313 last_frame_cep =
00314 (mfcc_t **) ckd_calloc_2d(1,
00315 fe_get_output_size(FE),
00316 sizeof(float32));
00317 process_utt_return_value =
00318 fe_end_utt(FE, last_frame_cep[0], &last_frame);
00319 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00320 warn_zero_energy = 1;
00321 }
00322 else {
00323 assert(process_utt_return_value == FE_SUCCESS);
00324 }
00325 if (last_frame > 0) {
00326 fe_writeblock_feat(P, FE, fp_out,
00327 last_frame, last_frame_cep);
00328 frames_proc++;
00329 }
00330 total_frames += frames_proc;
00331
00332 fe_closefiles(fp_in, fp_out);
00333 free(spdata);
00334 spdata = NULL;
00335 if (last_frame_cep != NULL) {
00336 ckd_free_2d((void **)
00337 last_frame_cep);
00338 last_frame_cep = NULL;
00339 }
00340 if (warn_zero_energy) {
00341 E_WARN
00342 ("File %s has some frames with zero energy. Consider using dither\n",
00343 infile);
00344 }
00345 }
00346 else {
00347 E_ERROR("fe_start_utt() failed\n");
00348 return (FE_START_ERROR);
00349 }
00350 }
00351 }
00352 else if (P->is_single) {
00353
00354 fe_build_filenames(P, fileroot, &infile, &outfile);
00355 if (P->verbose)
00356 printf("%s\n", infile);
00357
00358
00359 if (P->convert != WAV2FEAT) {
00360 int rv;
00361
00362 rv = fe_convert_with_dct(P, FE, infile, outfile);
00363 ckd_free(infile);
00364 ckd_free(outfile);
00365 infile = outfile = NULL;
00366 fe_free(FE);
00367 return rv;
00368 }
00369
00370 return_value =
00371 fe_openfiles(P, FE, infile, &fp_in, &total_samps,
00372 &nframes, &nblocks, outfile, &fp_out);
00373 ckd_free(infile);
00374 ckd_free(outfile);
00375 infile = outfile = NULL;
00376 if (return_value != FE_SUCCESS) {
00377 fe_free(FE);
00378 return (return_value);
00379 }
00380
00381 warn_zero_energy = 0;
00382
00383 if (nblocks * P->blocksize >= total_samps)
00384 last_blocksize = total_samps - (nblocks - 1) * P->blocksize;
00385
00386 if (!fe_start_utt(FE)) {
00387 curr_block = 1;
00388 total_frames = frames_proc = 0;
00389
00390
00391 while (curr_block < nblocks) {
00392 splen = P->blocksize;
00393 if ((spdata =
00394 (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00395 E_ERROR
00396 ("Unable to allocate memory block of %d shorts for input speech\n",
00397 splen);
00398 fe_free(FE);
00399 return (FE_MEM_ALLOC_ERROR);
00400 }
00401 if (fe_readblock_spch(P, fp_in, splen, spdata) != splen) {
00402 E_ERROR("Error reading speech data\n");
00403 fe_free(FE);
00404 return (FE_INPUT_FILE_READ_ERROR);
00405 }
00406 process_utt_return_value =
00407 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00408 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00409 warn_zero_energy = 1;
00410 }
00411 else {
00412 assert(process_utt_return_value == FE_SUCCESS);
00413 }
00414 if (frames_proc > 0)
00415 fe_writeblock_feat(P, FE, fp_out, frames_proc, cep);
00416 if (cep != NULL) {
00417 ckd_free_2d((void **) cep);
00418 cep = NULL;
00419 }
00420 curr_block++;
00421 total_frames += frames_proc;
00422 if (spdata != NULL) {
00423 free(spdata);
00424 spdata = NULL;
00425 }
00426 }
00427
00428 if (spdata != NULL) {
00429 free(spdata);
00430 spdata = NULL;
00431 }
00432 splen = last_blocksize;
00433 if ((spdata = (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00434 E_ERROR
00435 ("Unable to allocate memory block of %d shorts for input speech\n",
00436 splen);
00437 fe_free(FE);
00438 return (FE_MEM_ALLOC_ERROR);
00439 }
00440 if (fe_readblock_spch(P, fp_in, splen, spdata) != splen) {
00441 E_ERROR("Error reading speech data\n");
00442 fe_free(FE);
00443 return (FE_INPUT_FILE_READ_ERROR);
00444 }
00445 process_utt_return_value =
00446 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00447 free(spdata);
00448 spdata = NULL;
00449 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00450 warn_zero_energy = 1;
00451 }
00452 else {
00453 assert(process_utt_return_value == FE_SUCCESS);
00454 }
00455 if (frames_proc > 0)
00456 fe_writeblock_feat(P, FE, fp_out, frames_proc, cep);
00457 if (cep != NULL) {
00458 ckd_free_2d((void **) cep);
00459 cep = NULL;
00460 }
00461
00462 curr_block++;
00463 last_frame_cep =
00464 (mfcc_t **) ckd_calloc_2d(1,
00465 fe_get_output_size(FE),
00466 sizeof(float32));
00467 process_utt_return_value =
00468 fe_end_utt(FE, last_frame_cep[0], &last_frame);
00469 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00470 warn_zero_energy = 1;
00471 }
00472 else {
00473 assert(process_utt_return_value == FE_SUCCESS);
00474 }
00475 if (last_frame > 0) {
00476 fe_writeblock_feat(P, FE, fp_out,
00477 last_frame, last_frame_cep);
00478 frames_proc++;
00479 }
00480 total_frames += frames_proc;
00481
00482 fe_closefiles(fp_in, fp_out);
00483 if (last_frame_cep != NULL) {
00484 ckd_free_2d((void **) last_frame_cep);
00485 last_frame_cep = NULL;
00486 }
00487 }
00488 else {
00489 E_ERROR("fe_start_utt() failed\n");
00490 fe_free(FE);
00491 return (FE_START_ERROR);
00492 }
00493
00494 if (warn_zero_energy) {
00495 E_WARN
00496 ("File %s has some frames with zero energy. Consider using dither\n",
00497 infile);
00498 }
00499 }
00500 else {
00501 E_ERROR("Unknown mode - single or batch?\n");
00502 fe_free(FE);
00503 return (FE_UNKNOWN_SINGLE_OR_BATCH);
00504 }
00505
00506 fe_free(FE);
00507 return (FE_SUCCESS);
00508 }
00509
00510 void
00511 fe_validate_parameters(globals_t * P)
00512 {
00513
00514 if ((P->is_batch) && (P->is_single)) {
00515 E_FATAL("You cannot define an input file and a control file\n");
00516 }
00517
00518 if (P->wavfile == NULL && P->wavdir == NULL) {
00519 E_FATAL("No input file or file directory given\n");
00520 }
00521
00522 if (P->cepfile == NULL && P->cepdir == NULL) {
00523 E_FATAL("No cepstra file or file directory given\n");
00524 }
00525
00526 if (P->ctlfile == NULL && P->cepfile == NULL && P->wavfile == NULL) {
00527 E_FATAL("No control file given\n");
00528 }
00529
00530 if (P->nchans > 1) {
00531 E_INFO("Files have %d channels of data\n", P->nchans);
00532 E_INFO("Will extract features for channel %d\n", P->whichchan);
00533 }
00534
00535 if (P->whichchan > P->nchans) {
00536 E_FATAL("You cannot select channel %d out of %d\n",
00537 P->whichchan, P->nchans);
00538 }
00539
00540 if ((cmd_ln_float32_r(P->config, "-upperf") * 2)
00541 > cmd_ln_float32_r(P->config, "-samprate")) {
00542 E_WARN("Upper frequency higher than Nyquist frequency\n");
00543 }
00544
00545 if (cmd_ln_boolean_r(P->config, "-doublebw")) {
00546 E_INFO("Will use double bandwidth filters\n");
00547 }
00548
00549 }
00550
00551
00552 globals_t *
00553 fe_parse_options(int32 argc, char **argv)
00554 {
00555 globals_t *P;
00556 int32 format;
00557 char const *endian;
00558
00559 P = ckd_calloc(1, sizeof(*P));
00560 P->config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE);
00561
00562
00563 if (cmd_ln_str_r(P->config, "-argfile")) {
00564 P->config = cmd_ln_parse_file_r(P->config, defn,
00565 cmd_ln_str_r(P->config, "-argfile"),
00566 FALSE);
00567 }
00568
00569 P->nskip = P->runlen = -1;
00570 P->wavfile = cmd_ln_str_r(P->config, "-i");
00571 if (P->wavfile != NULL) {
00572 P->is_single = 1;
00573 }
00574 P->cepfile = cmd_ln_str_r(P->config, "-o");
00575 P->ctlfile = cmd_ln_str_r(P->config, "-c");
00576 if (P->ctlfile != NULL) {
00577 char const *nskip;
00578 char const *runlen;
00579
00580 P->is_batch = 1;
00581
00582 nskip = cmd_ln_str_r(P->config, "-nskip");
00583 runlen = cmd_ln_str_r(P->config, "-runlen");
00584 if (nskip != NULL) {
00585 P->nskip = atoi(nskip);
00586 }
00587 if (runlen != NULL) {
00588 P->runlen = atoi(runlen);
00589 }
00590 }
00591 P->wavdir = cmd_ln_str_r(P->config, "-di");
00592 P->cepdir = cmd_ln_str_r(P->config, "-do");
00593 P->wavext = cmd_ln_str_r(P->config, "-ei");
00594 P->cepext = cmd_ln_str_r(P->config, "-eo");
00595 format = cmd_ln_int32_r(P->config, "-raw");
00596 if (format) {
00597 P->input_format = RAW;
00598 }
00599 format = cmd_ln_int32_r(P->config, "-nist");
00600 if (format) {
00601 P->input_format = NIST;
00602 }
00603 format = cmd_ln_int32_r(P->config, "-mswav");
00604 if (format) {
00605 P->input_format = MSWAV;
00606 }
00607
00608 P->nchans = cmd_ln_int32_r(P->config, "-nchans");
00609 P->whichchan = cmd_ln_int32_r(P->config, "-whichchan");
00610 P->output_endian = BIG;
00611 P->blocksize = cmd_ln_int32_r(P->config, "-blocksize");
00612 endian = cmd_ln_str_r(P->config, "-mach_endian");
00613 if (!strcmp("big", endian)) {
00614 P->machine_endian = BIG;
00615 }
00616 else {
00617 if (!strcmp("little", endian)) {
00618 P->machine_endian = LITTLE;
00619 }
00620 else {
00621 E_FATAL("Machine must be big or little Endian\n");
00622 }
00623 }
00624 endian = cmd_ln_str_r(P->config, "-input_endian");
00625 if (!strcmp("big", endian)) {
00626 P->input_endian = BIG;
00627 }
00628 else {
00629 if (!strcmp("little", endian)) {
00630 P->input_endian = LITTLE;
00631 }
00632 else {
00633 E_FATAL("Input must be big or little Endian\n");
00634 }
00635 }
00636
00637 if (cmd_ln_boolean_r(P->config, "-logspec")
00638 || cmd_ln_boolean_r(P->config, "-smoothspec"))
00639 P->logspec = TRUE;
00640 if (cmd_ln_boolean_r(P->config, "-spec2cep"))
00641 P->convert = SPEC2CEP;
00642 if (cmd_ln_boolean_r(P->config, "-cep2spec"))
00643 P->convert = CEP2SPEC;
00644
00645 fe_validate_parameters(P);
00646
00647 return (P);
00648
00649 }
00650
00651 int32
00652 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00653 char **outfilename)
00654 {
00655 char chanlabel[32];
00656
00657 if (P->nchans > 1)
00658 sprintf(chanlabel, ".ch%d", P->whichchan);
00659
00660 if (P->is_batch) {
00661 if (infilename != NULL) {
00662 *infilename = string_join(P->wavdir, "/",
00663 fileroot, ".",
00664 P->wavext, NULL);
00665 }
00666
00667 if (outfilename != NULL) {
00668 if (P->nchans > 1)
00669 *outfilename = string_join(P->cepdir, "/",
00670 fileroot, chanlabel,
00671 ".", P->cepext, NULL);
00672 else
00673 *outfilename = string_join(P->cepdir, "/",
00674 fileroot, ".",
00675 P->cepext, NULL);
00676 }
00677 }
00678 else if (P->is_single) {
00679 if (infilename != NULL) {
00680 *infilename = ckd_salloc(P->wavfile);
00681 }
00682 if (outfilename != NULL) {
00683 *outfilename = ckd_salloc(P->cepfile);
00684 }
00685 }
00686 else {
00687 E_FATAL("Unspecified Batch or Single Mode\n");
00688 }
00689
00690 return 0;
00691 }
00692
00693 int32
00694 fe_openfiles(globals_t * P, fe_t * FE, char *infile, int32 * fp_in,
00695 int32 * nsamps, int32 * nframes, int32 * nblocks,
00696 char *outfile, int32 * fp_out)
00697 {
00698 struct stat filestats;
00699 int fp = 0, len = 0, outlen, numframes, numblocks;
00700 FILE *fp2;
00701 char line[MAXCHARS];
00702 int got_it = 0;
00703
00704
00705
00706
00707 if (P->input_format == NIST) {
00708 if ((fp2 = fopen(infile, "rb")) == NULL) {
00709 E_ERROR_SYSTEM("Cannot read %s", infile);
00710 return (FE_INPUT_FILE_READ_ERROR);
00711 }
00712 *line = 0;
00713 got_it = 0;
00714 while (strcmp(line, "end_head") && !got_it) {
00715 fscanf(fp2, "%s", line);
00716 if (!strcmp(line, "sample_byte_format")) {
00717 fscanf(fp2, "%s", line);
00718 if (!strcmp(line, "-s2")) {
00719 fscanf(fp2, "%s", line);
00720 if (!strcmp(line, "01")) {
00721 P->input_endian = LITTLE;
00722 got_it = 1;
00723 }
00724 else if (!strcmp(line, "10")) {
00725 P->input_endian = BIG;
00726 got_it = 1;
00727 }
00728 else
00729 E_ERROR("Unknown/unsupported byte order\n");
00730 }
00731 else
00732 E_ERROR("Error determining byte format\n");
00733 }
00734 }
00735 if (!got_it) {
00736 E_WARN
00737 ("Can't find byte format in header, setting to machine's endian\n");
00738 P->input_endian = P->machine_endian;
00739 }
00740 fclose(fp2);
00741 }
00742 else if (P->input_format == RAW) {
00743
00744
00745
00746 }
00747 else if (P->input_format == MSWAV) {
00748 P->input_endian = LITTLE;
00749 }
00750
00751
00752 if ((fp = open(infile, O_RDONLY | O_BINARY, 0644)) < 0) {
00753 fprintf(stderr, "Cannot open %s\n", infile);
00754 return (FE_INPUT_FILE_OPEN_ERROR);
00755 }
00756 else {
00757 if (fstat(fp, &filestats) != 0)
00758 printf("fstat failed\n");
00759
00760 if (P->input_format == NIST) {
00761 short *hdr_buf;
00762
00763 len = (filestats.st_size - HEADER_BYTES) / sizeof(short);
00764
00765 hdr_buf =
00766 (short *) calloc(HEADER_BYTES / sizeof(short),
00767 sizeof(short));
00768 if (read(fp, hdr_buf, HEADER_BYTES) != HEADER_BYTES) {
00769 E_ERROR("Cannot read %s\n", infile);
00770 return (FE_INPUT_FILE_READ_ERROR);
00771 }
00772 free(hdr_buf);
00773 }
00774 else if (P->input_format == RAW) {
00775 len = filestats.st_size / sizeof(int16);
00776 }
00777 else if (P->input_format == MSWAV) {
00778
00779 MSWAV_hdr *hdr_buf = NULL;
00780
00781 const int hdr_len_to_read = ((char *) (&hdr_buf->datatag))
00782 - (char *) hdr_buf;
00783 int data_start;
00784
00785 if ((hdr_buf =
00786 (MSWAV_hdr *) calloc(1, sizeof(MSWAV_hdr))) == NULL) {
00787 E_ERROR("Cannot allocate for input file header\n");
00788 return (FE_INPUT_FILE_READ_ERROR);
00789 }
00790 if (read(fp,hdr_buf,hdr_len_to_read) != hdr_len_to_read){
00791 E_ERROR("Cannot allocate for input file header\n");
00792 return (FE_INPUT_FILE_READ_ERROR);
00793 }
00794
00795 if (strncmp(hdr_buf->rifftag, "RIFF", 4) != 0 ||
00796 strncmp(hdr_buf->wavefmttag, "WAVEfmt", 7) != 0) {
00797 E_ERROR("Error in mswav file header\n");
00798 return (FE_INPUT_FILE_READ_ERROR);
00799 }
00800 {
00801
00802
00803
00804
00805
00806 int16 found = 0;
00807 char readChar;
00808 char *dataString = "data";
00809 int16 charPointer = 0;
00810 while (!found) {
00811 if (read(fp, &readChar, sizeof(char)) != sizeof(char)) {
00812 E_ERROR("Failed reading wav file.\n");
00813 return (FE_INPUT_FILE_READ_ERROR);
00814 }
00815 if (readChar == dataString[charPointer]) {
00816 charPointer++;
00817 }
00818 if (charPointer == (int) strlen(dataString)) {
00819 found = 1;
00820 strcpy(hdr_buf->datatag, dataString);
00821 if (read
00822 (fp,
00823 &(hdr_buf->
00824 datalength),
00825 sizeof(int32)) != sizeof(int32)) {
00826 E_ERROR("Failed reading wav file.\n");
00827 return (FE_INPUT_FILE_READ_ERROR);
00828 }
00829 }
00830 }
00831 }
00832 data_start = lseek(fp, 0, SEEK_CUR);
00833 if (P->input_endian != P->machine_endian) {
00834 hdr_buf->datalength = SWAP_INT32(&(hdr_buf->datalength));
00835 hdr_buf->data_format = SWAP_INT16(&(hdr_buf->data_format));
00836 hdr_buf->numchannels = SWAP_INT16(&(hdr_buf->numchannels));
00837 hdr_buf->BitsPerSample =
00838 SWAP_INT16(&(hdr_buf->BitsPerSample));
00839 hdr_buf->SamplingFreq =
00840 SWAP_INT32(&(hdr_buf->SamplingFreq));
00841 hdr_buf->BytesPerSec = SWAP_INT32(&(hdr_buf->BytesPerSec));
00842 }
00843
00844 if (hdr_buf->data_format != 1 || hdr_buf->BitsPerSample != 16) {
00845 E_ERROR("MS WAV file not in 16-bit PCM format\n");
00846 return (FE_INPUT_FILE_READ_ERROR);
00847 }
00848
00849 len = hdr_buf->datalength / sizeof(short);
00850 if (len > (filestats.st_size - data_start) / sizeof(short))
00851 len = (filestats.st_size - data_start) / sizeof(short);
00852
00853 P->nchans = hdr_buf->numchannels;
00854
00855 if (P->verbose) {
00856 E_INFO("Reading MS Wav file %s:\n", infile);
00857 E_INFO
00858 ("\t16 bit PCM data, %d channels %d samples\n",
00859 P->nchans, len);
00860 E_INFO("\tSampled at %d\n", hdr_buf->SamplingFreq);
00861 }
00862 free(hdr_buf);
00863 }
00864 else {
00865 E_ERROR("Unknown input file format\n");
00866 return (FE_INPUT_FILE_OPEN_ERROR);
00867 }
00868 }
00869
00870
00871 len = len / P->nchans;
00872 *nsamps = len;
00873 *fp_in = fp;
00874
00875 numblocks = (int) ((float) len / (float) P->blocksize);
00876 if (numblocks * P->blocksize < len)
00877 numblocks++;
00878
00879 *nblocks = numblocks;
00880
00881 if ((fp =
00882 open(outfile, O_CREAT | O_WRONLY | O_TRUNC | O_BINARY,
00883 0644)) < 0) {
00884 E_ERROR("Unable to open %s for writing features\n", outfile);
00885 return (FE_OUTPUT_FILE_OPEN_ERROR);
00886 }
00887 else {
00888 size_t nsamps = len;
00889 int frame_shift, frame_size;
00890
00891
00892 fe_process_frames(FE, NULL, &nsamps, NULL, &numframes);
00893
00894
00895 fe_get_input_size(FE, &frame_shift, &frame_size);
00896
00897 if (frame_size + (numframes - 1) * frame_shift <= len)
00898 ++numframes;
00899
00900 outlen = numframes * fe_get_output_size(FE);
00901 if (P->output_endian != P->machine_endian)
00902 SWAP_INT32(&outlen);
00903 if (write(fp, &outlen, 4) != 4) {
00904 E_ERROR("Data write error on %s\n", outfile);
00905 close(fp);
00906 return (FE_OUTPUT_FILE_WRITE_ERROR);
00907 }
00908 if (P->output_endian != P->machine_endian)
00909 SWAP_INT32(&outlen);
00910 }
00911
00912 *nframes = numframes;
00913 *fp_out = fp;
00914
00915 return 0;
00916 }
00917
00918 int32
00919 fe_readblock_spch(globals_t * P, int32 fp, int32 nsamps, int16 * buf)
00920 {
00921 int32 bytes_read, cum_bytes_read, nreadbytes, actsamps, offset, i,
00922 j, k;
00923 int16 *tmpbuf;
00924 int32 nchans, whichchan;
00925
00926 nchans = P->nchans;
00927 whichchan = P->whichchan;
00928
00929 if (nchans == 1) {
00930 if (P->input_format == RAW
00931 || P->input_format == NIST
00932 || P->input_format == MSWAV) {
00933 nreadbytes = nsamps * sizeof(int16);
00934 if ((bytes_read = read(fp, buf, nreadbytes)) != nreadbytes) {
00935 E_ERROR_SYSTEM("error reading block: %ld != %d",
00936 bytes_read, nreadbytes);
00937 return (0);
00938 }
00939 }
00940 else {
00941 E_ERROR("unknown input file format\n");
00942 return (0);
00943 }
00944 cum_bytes_read = bytes_read;
00945 }
00946 else if (nchans > 1) {
00947
00948 if (nsamps < P->blocksize) {
00949 actsamps = nsamps * nchans;
00950 tmpbuf = (int16 *) calloc(nsamps * nchans, sizeof(int16));
00951 cum_bytes_read = 0;
00952 if (P->input_format == RAW
00953 || P->input_format == MSWAV || P->input_format == NIST) {
00954
00955 k = 0;
00956 nreadbytes = actsamps * sizeof(int16);
00957
00958 if ((bytes_read =
00959 read(fp, tmpbuf, nreadbytes)) != nreadbytes) {
00960 E_ERROR
00961 ("error reading block (got %d not %d)\n",
00962 bytes_read, nreadbytes);
00963 return (0);
00964 }
00965
00966 for (j = whichchan - 1; j < actsamps; j = j + nchans) {
00967 buf[k] = tmpbuf[j];
00968 k++;
00969 }
00970 cum_bytes_read += bytes_read / nchans;
00971 }
00972 else {
00973 E_ERROR("unknown input file format\n");
00974 return (0);
00975 }
00976 free(tmpbuf);
00977 }
00978 else {
00979 tmpbuf = (int16 *) calloc(nsamps, sizeof(int16));
00980 actsamps = nsamps / nchans;
00981 cum_bytes_read = 0;
00982
00983 if (actsamps * nchans != nsamps) {
00984 E_WARN
00985 ("Blocksize %d is not an integer multiple of Number of channels %d\n",
00986 nsamps, nchans);
00987 }
00988
00989 if (P->input_format == RAW
00990 || P->input_format == MSWAV || P->input_format == NIST) {
00991 for (i = 0; i < nchans; i++) {
00992
00993 offset = i * actsamps;
00994 k = 0;
00995 nreadbytes = nsamps * sizeof(int16);
00996
00997 if ((bytes_read =
00998 read(fp, tmpbuf, nreadbytes)) != nreadbytes) {
00999 E_ERROR
01000 ("error reading block (got %d not %d)\n",
01001 bytes_read, nreadbytes);
01002 return (0);
01003 }
01004
01005 for (j = whichchan - 1; j < nsamps; j = j + nchans) {
01006 buf[offset + k] = tmpbuf[j];
01007 k++;
01008 }
01009 cum_bytes_read += bytes_read / nchans;
01010 }
01011 }
01012 else {
01013 E_ERROR("unknown input file format\n");
01014 return (0);
01015 }
01016 free(tmpbuf);
01017 }
01018 }
01019
01020 else {
01021 E_ERROR("unknown number of channels!\n");
01022 return (0);
01023 }
01024
01025 if (P->input_endian != P->machine_endian) {
01026 for (i = 0; i < nsamps; i++)
01027 SWAP_INT16(&buf[i]);
01028 }
01029
01030 return (cum_bytes_read / sizeof(int16));
01031
01032 }
01033
01034 int32
01035 fe_writeblock_feat(globals_t * P, fe_t * FE, int32 fp, int32 nframes,
01036 mfcc_t ** feat)
01037 {
01038
01039 int32 i, length, nwritebytes;
01040 float32 **ffeat;
01041
01042 length = nframes * fe_get_output_size(FE);
01043
01044 ffeat = (float32 **) feat;
01045 fe_mfcc_to_float(FE, feat, ffeat, nframes);
01046 if (P->output_endian != P->machine_endian) {
01047 for (i = 0; i < length; ++i)
01048 SWAP_FLOAT32(ffeat[0] + i);
01049 }
01050
01051 nwritebytes = length * sizeof(float32);
01052 if (write(fp, ffeat[0], nwritebytes) != nwritebytes) {
01053 close(fp);
01054 E_FATAL("Error writing block of features\n");
01055 }
01056
01057 return (length);
01058 }
01059
01060
01061 int32
01062 fe_closefiles(int32 fp_in, int32 fp_out)
01063 {
01064 close(fp_in);
01065 close(fp_out);
01066 return 0;
01067 }
01068
01069 int32
01070 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile)
01071 {
01072 FILE *ifh, *ofh;
01073 int32 ifsize, nfloats, swap = 0;
01074 int32 input_ncoeffs, output_ncoeffs;
01075 float32 *logspec;
01076
01077 if ((ifh = fopen(infile, "rb")) == NULL) {
01078 E_ERROR_SYSTEM("Cannot read %s", infile);
01079 return (FE_INPUT_FILE_READ_ERROR);
01080 }
01081 if ((ofh = fopen(outfile, "wb")) == NULL) {
01082 E_ERROR_SYSTEM("Unable to open %s for writing features", outfile);
01083 return (FE_OUTPUT_FILE_OPEN_ERROR);
01084 }
01085
01086 fseek(ifh, 0, SEEK_END);
01087 ifsize = ftell(ifh);
01088 fseek(ifh, 0, SEEK_SET);
01089 fread(&nfloats, 4, 1, ifh);
01090 if (nfloats != ifsize / 4 - 1) {
01091 E_INFO("Will byteswap %s (%x != %x)\n",
01092 infile, nfloats, ifsize / 4 - 1);
01093 SWAP_INT32(&nfloats);
01094 swap = 1;
01095 }
01096 if (nfloats != ifsize / 4 - 1) {
01097 E_ERROR("Size of file doesn't match header: %d != %d\n",
01098 nfloats, ifsize / 4 - 1);
01099 return (FE_INPUT_FILE_READ_ERROR);
01100 }
01101 if (P->convert == CEP2SPEC) {
01102 input_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01103 output_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01104 }
01105 else {
01106 input_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01107 output_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01108 }
01109 nfloats = nfloats * output_ncoeffs / input_ncoeffs;
01110
01111 if (swap)
01112 SWAP_INT32(&nfloats);
01113 fwrite(&nfloats, 4, 1, ofh);
01114
01115 logspec = ckd_calloc(cmd_ln_int32_r(P->config, "-nfilt"),
01116 sizeof(*logspec));
01117
01118 while (fread(logspec, 4, input_ncoeffs, ifh) == input_ncoeffs) {
01119 int32 i;
01120 if (swap) {
01121 for (i = 0; i < input_ncoeffs; ++i) {
01122 SWAP_FLOAT32(logspec + i);
01123 }
01124 }
01125 fe_float_to_mfcc(FE, &logspec, (mfcc_t **)&logspec, 1);
01126 if (P->convert == CEP2SPEC) {
01127 fe_mfcc_dct3(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01128 }
01129 else {
01130 if (0 == strcmp(cmd_ln_str_r(P->config, "-transform"), "legacy"))
01131 fe_logspec_to_mfcc(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01132 else
01133 fe_logspec_dct2(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01134 }
01135 fe_mfcc_to_float(FE, (mfcc_t **)&logspec, &logspec, 1);
01136 if (swap) {
01137 for (i = 0; i < output_ncoeffs; ++i) {
01138 SWAP_FLOAT32(logspec + i);
01139 }
01140 }
01141 if (fwrite(logspec, 4, output_ncoeffs, ofh) < output_ncoeffs) {
01142 E_ERROR_SYSTEM("Failed to write %d coeffs to %s",
01143 output_ncoeffs, outfile);
01144 ckd_free(logspec);
01145 return (FE_OUTPUT_FILE_WRITE_ERROR);
01146 }
01147 }
01148 if (!feof(ifh)) {
01149 E_ERROR("Short read in input file %s\n", infile);
01150 ckd_free(logspec);
01151 return (FE_INPUT_FILE_READ_ERROR);
01152 }
01153 fclose(ifh);
01154 fclose(ofh);
01155 ckd_free(logspec);
01156
01157 return FE_SUCCESS;
01158 }
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214