43 #include "sphinxbase/byteorder.h"
63 uint32 nbits, codeword;
85 huff_node_new_int(int32 val)
93 huff_node_new_str(
char const *val)
107 if (r->nbits > l->nbits)
108 hn->nbits = r->nbits + 1;
110 hn->nbits = l->nbits + 1;
118 huff_node_free_int(root->l);
119 huff_node_free_int(root->r.r);
128 huff_node_free_str(root->l, freestr);
129 huff_node_free_str(root->r.r, freestr);
139 huff_code_build_tree(
heap_t *q)
150 p = huff_node_new_parent(l, r);
164 hc->firstcode =
ckd_calloc(hc->maxbits+1,
sizeof(*hc->firstcode));
165 hc->syms =
ckd_calloc(hc->maxbits+1,
sizeof(*hc->syms));
166 hc->numl =
ckd_calloc(hc->maxbits+1,
sizeof(*nextcode));
167 nextcode =
ckd_calloc(hc->maxbits+1,
sizeof(*nextcode));
178 node->l->nbits = node->nbits + 1;
180 node->r.r->nbits = node->nbits + 1;
184 hc->numl[node->nbits]++;
189 hc->syms[hc->maxbits] =
ckd_calloc(hc->numl[hc->maxbits],
sizeof(**hc->syms));
190 for (i = hc->maxbits - 1; i > 0; --i) {
191 hc->firstcode[i] = (hc->firstcode[i+1] + hc->numl[i+1]) / 2;
192 hc->syms[i] =
ckd_calloc(hc->numl[i],
sizeof(**hc->syms));
194 memcpy(nextcode, hc->firstcode, (hc->maxbits + 1) *
sizeof(*nextcode));
208 uint32 codeword = nextcode[node->nbits] & ((1 << node->nbits) - 1);
209 cw = hc->syms[node->nbits] + (codeword - hc->firstcode[node->nbits]);
210 cw->nbits = node->nbits;
211 cw->r.sval = node->r.sval;
212 cw->codeword = codeword;
213 if (hc->type == HUFF_CODE_INT) {
215 (
char const *)&cw->r.ival,
222 ++nextcode[node->nbits];
238 hc->type = HUFF_CODE_INT;
242 for (i = 0; i < nvals; ++i) {
244 huff_node_new_int(values[i]),
249 root = huff_code_build_tree(q);
251 if (root == NULL || root->nbits > 32) {
252 E_ERROR(
"Huffman trees currently limited to 32 bits\n");
253 huff_node_free_int(root);
259 hc->maxbits = root->nbits;
260 huff_code_canonicalize(hc, root);
263 huff_node_free_int(root);
278 hc->type = HUFF_CODE_STR;
282 for (i = 0; i < nvals; ++i) {
284 huff_node_new_str(values[i]),
289 root = huff_code_build_tree(q);
291 if (root == NULL || root->nbits > 32) {
292 E_ERROR(
"Huffman trees currently limited to 32 bits\n");
293 huff_node_free_str(root, TRUE);
299 hc->maxbits = root->nbits;
300 huff_code_canonicalize(hc, root);
303 huff_node_free_str(root, FALSE);
317 hc->maxbits = fgetc(infh);
318 hc->type = fgetc(infh);
325 hc->firstcode =
ckd_calloc(hc->maxbits + 1,
sizeof(*hc->firstcode));
326 hc->numl =
ckd_calloc(hc->maxbits + 1,
sizeof(*hc->numl));
327 hc->syms =
ckd_calloc(hc->maxbits + 1,
sizeof(*hc->syms));
331 for (i = 1; i <= hc->maxbits; ++i) {
332 if (fread(&hc->firstcode[i], 4, 1, infh) != 1)
334 SWAP_BE_32(&hc->firstcode[i]);
335 if (fread(&hc->numl[i], 4, 1, infh) != 1)
337 SWAP_BE_32(&hc->numl[i]);
338 hc->syms[i] =
ckd_calloc(hc->numl[i],
sizeof(**hc->syms));
339 for (j = 0; j < hc->numl[i]; ++j) {
342 cw->codeword = hc->firstcode[i] + j;
343 if (hc->type == HUFF_CODE_INT) {
344 if (fread(&cw->r.ival, 4, 1, infh) != 1)
346 SWAP_BE_32(&cw->r.ival);
348 (
char const *)&cw->r.ival,
355 cw->r.sval[len-1] =
'\0';
373 fputc(hc->maxbits, outfh);
375 fputc(hc->type, outfh);
380 for (i = 1; i <= hc->maxbits; ++i) {
384 val = hc->firstcode[i];
387 fwrite(&val, 4, 1, outfh);
390 fwrite(&val, 4, 1, outfh);
393 for (j = 0; j < hc->numl[i]; ++j) {
394 if (hc->type == HUFF_CODE_INT) {
395 int32 val = hc->syms[i][j].r.ival;
397 fwrite(&val, 4, 1, outfh);
402 fprintf(outfh,
"%s\n", hc->syms[i][j].r.sval);
410 huff_code_dump_codebits(FILE *dumpfh, uint32 nbits, uint32 codeword)
414 for (i = 0; i < nbits; ++i)
415 fputc((codeword & (1<<(nbits-i-1))) ?
'1' :
'0', dumpfh);
425 fprintf(dumpfh,
"Maximum codeword length: %d\n", hc->maxbits);
426 fprintf(dumpfh,
"Symbols are %s\n", (hc->type == HUFF_CODE_STR) ?
"strings" :
"ints");
427 fprintf(dumpfh,
"Codewords:\n");
428 for (i = 1; i <= hc->maxbits; ++i) {
429 for (j = 0; j < hc->numl[i]; ++j) {
430 if (hc->type == HUFF_CODE_STR)
431 fprintf(dumpfh,
"%-30s", hc->syms[i][j].r.sval);
433 fprintf(dumpfh,
"%-30d", hc->syms[i][j].r.ival);
434 huff_code_dump_codebits(dumpfh, hc->syms[i][j].nbits,
435 hc->syms[i][j].codeword);
436 fprintf(dumpfh,
"\n");
456 if (--hc->refcount > 0)
458 for (i = 0; i <= hc->maxbits; ++i) {
460 for (j = 0; j < hc->numl[i]; ++j) {
461 if (hc->type == HUFF_CODE_STR)
488 FILE *oldfh = hc->fh;
511 if (outcw) *outcw = cw->codeword;
526 if (outcw) *outcw = cw->codeword;
531 huff_code_decode_data(
huff_code_t *hc,
char const **inout_data,
532 size_t *inout_data_len,
int *inout_offset)
534 char const *data = *inout_data;
535 char const *end = data + *inout_data_len;
536 int offset = *inout_offset;
544 cw = !!(byte & (1 << (7-offset++)));
547 while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
556 cw |= !!(byte & (1 << (7-offset++)));
559 if (cwlen > hc->maxbits)
569 *inout_data_len = end - data;
571 *inout_offset = offset;
572 return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
582 if ((byte = fgetc(hc->fh)) == EOF)
584 cw = !!(byte & (1 << (7-hc->boff++)));
587 while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
591 if ((byte = fgetc(hc->fh)) == EOF)
595 cw |= !!(byte & (1 << (7-hc->boff++)));
598 if (cwlen > hc->maxbits)
603 ungetc(byte, hc->fh);
608 return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
613 char const **inout_data,
614 size_t *inout_data_len,
int *inout_offset)
619 cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
621 cw = huff_code_decode_fh(hc);
628 *outval = cw->r.ival;
635 char const **inout_data,
636 size_t *inout_data_len,
int *inout_offset)
641 cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
643 cw = huff_code_decode_fh(hc);