media/pocketsphinx/src/phone_loop_search.c
author Andrea Marchesini <amarchesini@mozilla.com>
Thu, 04 Jun 2015 19:51:57 +0100
changeset 247941 fc4444e384ba699a67da8c1b1c286b7fdba3d919
parent 246938 72d3499011ff584d99b67fef64c5bb231f4d1a78
permissions -rw-r--r--
Bug 1155153 - about:serviceworkers should work in e10s mode, r=nsm, r=bholley

/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
 * Copyright (c) 2008 Carnegie Mellon University.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * This work was supported in part by funding from the Defense Advanced 
 * Research Projects Agency and the National Science Foundation of the 
 * United States of America, and the CMU Sphinx Speech Consortium.
 *
 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ====================================================================
 *
 */

/**
 * @file phone_loop_search.h Fast and rough context-independent phoneme loop search.
 */

#include <sphinxbase/err.h>

#include "phone_loop_search.h"

static int phone_loop_search_start(ps_search_t *search);
static int phone_loop_search_step(ps_search_t *search, int frame_idx);
static int phone_loop_search_finish(ps_search_t *search);
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
static void phone_loop_search_free(ps_search_t *search);
static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final);
static int32 phone_loop_search_prob(ps_search_t *search);
static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score);

static ps_searchfuncs_t phone_loop_search_funcs = {
    /* name: */   "phone_loop",
    /* start: */  phone_loop_search_start,
    /* step: */   phone_loop_search_step,
    /* finish: */ phone_loop_search_finish,
    /* reinit: */ phone_loop_search_reinit,
    /* free: */   phone_loop_search_free,
    /* lattice: */  NULL,
    /* hyp: */      phone_loop_search_hyp,
    /* prob: */     phone_loop_search_prob,
    /* seg_iter: */ phone_loop_search_seg_iter,
};

static int
phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
{
    phone_loop_search_t *pls = (phone_loop_search_t *)search;
    cmd_ln_t *config = ps_search_config(search);
    acmod_t *acmod = ps_search_acmod(search);
    int i;

    /* Free old dict2pid, dict, if necessary. */
    ps_search_base_reinit(search, dict, d2p);

    /* Initialize HMM context. */
    if (pls->hmmctx)
        hmm_context_free(pls->hmmctx);
    pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
                                   acmod->tmat->tp, NULL, acmod->mdef->sseq);
    if (pls->hmmctx == NULL)
        return -1;

    /* Initialize penalty storage */
    pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
    pls->window = cmd_ln_int32_r(config, "-pl_window");
    if (pls->penalties)
        ckd_free(pls->penalties);
    pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties));
    if (pls->pen_buf)
        ckd_free_2d(pls->pen_buf);
    pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf));

    /* Initialize phone HMMs. */
    if (pls->hmms) {
        for (i = 0; i < pls->n_phones; ++i)
            hmm_deinit((hmm_t *)&pls->hmms[i]);
        ckd_free(pls->hmms);
    }
    pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms));
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i],
                 FALSE,
                 bin_mdef_pid2ssid(acmod->mdef, i),
                 bin_mdef_pid2tmatid(acmod->mdef, i));
    }
    pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight");
    pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT;
    pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT;
    pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT;
    E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
           pls->beam, pls->pbeam, pls->pip);

    return 0;
}

ps_search_t *
phone_loop_search_init(cmd_ln_t *config,
               acmod_t *acmod,
               dict_t *dict)
{
    phone_loop_search_t *pls;

    /* Allocate and initialize. */
    pls = (phone_loop_search_t *)ckd_calloc(1, sizeof(*pls));
    ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
                   config, acmod, dict, NULL);
    phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls),
                             ps_search_dict2pid(pls));

    return ps_search_base(pls);
}

static void
phone_loop_search_free_renorm(phone_loop_search_t *pls)
{
    gnode_t *gn;
    for (gn = pls->renorm; gn; gn = gnode_next(gn))
        ckd_free(gnode_ptr(gn));
    glist_free(pls->renorm);
    pls->renorm = NULL;
}

static void
phone_loop_search_free(ps_search_t *search)
{
    phone_loop_search_t *pls = (phone_loop_search_t *)search;
    int i;

    ps_search_deinit(search);
    for (i = 0; i < pls->n_phones; ++i)
        hmm_deinit((hmm_t *)&pls->hmms[i]);
    phone_loop_search_free_renorm(pls);
    ckd_free_2d(pls->pen_buf);
    ckd_free(pls->hmms);
    ckd_free(pls->penalties);
    hmm_context_free(pls->hmmctx);
    ckd_free(pls);
}

static int
phone_loop_search_start(ps_search_t *search)
{
    phone_loop_search_t *pls = (phone_loop_search_t *)search;
    int i;

    /* Reset and enter all phone HMMs. */
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->hmms[i];
        hmm_clear(hmm);
        hmm_enter(hmm, 0, -1, 0);
    }
    memset(pls->penalties, 0, pls->n_phones * sizeof(*pls->penalties));
    for (i = 0; i < pls->window; i++)
        memset(pls->pen_buf[i], 0, pls->n_phones * sizeof(*pls->pen_buf[i]));
    phone_loop_search_free_renorm(pls);
    pls->best_score = 0;
    pls->pen_buf_ptr = 0;

    return 0;
}

static void
renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
{
    phone_loop_renorm_t *rn = (phone_loop_renorm_t *)ckd_calloc(1, sizeof(*rn));
    int i;

    pls->renorm = glist_add_ptr(pls->renorm, rn);
    rn->frame_idx = frame_idx;
    rn->norm = norm;

    for (i = 0; i < pls->n_phones; ++i) {
        hmm_normalize((hmm_t *)&pls->hmms[i], norm);
    }
}

static void
evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
{
    int32 bs = WORST_SCORE;
    int i;

    hmm_context_set_senscore(pls->hmmctx, senscr);

    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->hmms[i];
        int32 score;

        if (hmm_frame(hmm) < frame_idx)
            continue;
        score = hmm_vit_eval(hmm);
        if (score BETTER_THAN bs) {
            bs = score;
        }
    }
    pls->best_score = bs;
}

static void
store_scores(phone_loop_search_t *pls, int frame_idx)
{
    int i, j, itr;

    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->hmms[i];
        pls->pen_buf[pls->pen_buf_ptr][i] = (hmm_bestscore(hmm) - pls->best_score) * pls->penalty_weight;
    }
    pls->pen_buf_ptr++;
    pls->pen_buf_ptr = pls->pen_buf_ptr % pls->window;

    //update penalties
    for (i = 0; i < pls->n_phones; ++i) {
        pls->penalties[i] = WORST_SCORE;
        for (j = 0, itr = pls->pen_buf_ptr + 1; j < pls->window; j++, itr++) {
            itr = itr % pls->window;
            if (pls->pen_buf[itr][i] > pls->penalties[i])
                pls->penalties[i] = pls->pen_buf[itr][i];
        }
    }
}

static void
prune_hmms(phone_loop_search_t *pls, int frame_idx)
{
    int32 thresh = pls->best_score + pls->beam;
    int nf = frame_idx + 1;
    int i;

    /* Check all phones to see if they remain active in the next frame. */
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->hmms[i];

        if (hmm_frame(hmm) < frame_idx)
            continue;
        /* Retain if score better than threshold. */
        if (hmm_bestscore(hmm) BETTER_THAN thresh) {
            hmm_frame(hmm) = nf;
        }
        else
            hmm_clear_scores(hmm);
    }
}

static void
phone_transition(phone_loop_search_t *pls, int frame_idx)
{
    int32 thresh = pls->best_score + pls->pbeam;
    int nf = frame_idx + 1;
    int i;

    /* Now transition out of phones whose last states are inside the
     * phone transition beam. */
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->hmms[i];
        int32 newphone_score;
        int j;

        if (hmm_frame(hmm) != nf)
            continue;

        newphone_score = hmm_out_score(hmm) + pls->pip;
        if (newphone_score BETTER_THAN thresh) {
            /* Transition into all phones using the usual Viterbi rule. */
            for (j = 0; j < pls->n_phones; ++j) {
                hmm_t *nhmm = (hmm_t *)&pls->hmms[j];

                if (hmm_frame(nhmm) < frame_idx
                    || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
                    hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
                }
            }
        }
    }
}

static int
phone_loop_search_step(ps_search_t *search, int frame_idx)
{
    phone_loop_search_t *pls = (phone_loop_search_t *)search;
    acmod_t *acmod = ps_search_acmod(search);
    int16 const *senscr;
    int i;

    /* All CI senones are active all the time. */
    if (!ps_search_acmod(pls)->compallsen) {
        acmod_clear_active(ps_search_acmod(pls));
        for (i = 0; i < pls->n_phones; ++i)
            acmod_activate_hmm(acmod, (hmm_t *)&pls->hmms[i]);
    }

    /* Calculate senone scores for current frame. */
    senscr = acmod_score(acmod, &frame_idx);

    /* Renormalize, if necessary. */
    if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
        E_INFO("Renormalizing Scores at frame %d, best score %d\n",
               frame_idx, pls->best_score);
        renormalize_hmms(pls, frame_idx, pls->best_score);
    }

    /* Evaluate phone HMMs for current frame. */
    evaluate_hmms(pls, senscr, frame_idx);

    /* Store hmm scores for senone penaly calculation */
    store_scores(pls, frame_idx);

    /* Prune phone HMMs. */
    prune_hmms(pls, frame_idx);

    /* Do phone transitions. */
    phone_transition(pls, frame_idx);

    return 0;
}

static int
phone_loop_search_finish(ps_search_t *search)
{
    /* Actually nothing to do here really. */
    return 0;
}

static char const *
phone_loop_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final)
{
    E_WARN("Hypotheses are not returned from phone loop search");
    return NULL;
}

static int32
phone_loop_search_prob(ps_search_t *search)
{
    /* FIXME: Actually... they ought to be. */
    E_WARN("Posterior probabilities are not returned from phone loop search");
    return 0;
}

static ps_seg_t *
phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score)
{
    E_WARN("Hypotheses are not returned from phone loop search");
    return NULL;
}