android13/external/libwebsockets/minimal-examples/secure-streams/minimal-secure-streams-alexa/alexa.c

676 lines
15 KiB
C

/*
* lws-minimal-secure-streams-alexa
*
* This file is made available under the Creative Commons CC0 1.0
* Universal Public Domain Dedication.
*/
#include <libwebsockets.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <mpg123.h>
#include "private.h"
struct lws_ss_handle *hss_avs_event, *hss_avs_sync;
/* this is the type for the long poll event channel */
typedef struct ss_avs_event {
struct lws_ss_handle *ss;
void *opaque_data;
/* ... application specific state ... */
struct lejp_ctx jctx;
} ss_avs_event_t;
enum {
LAMP3STATE_IDLE,
LAMP3STATE_SPOOLING,
LAMP3STATE_DRAINING,
};
/* this is the type for the utterance metadata (and audio rideshares) */
typedef struct ss_avs_metadata {
struct lws_ss_handle *ss;
void *opaque_data;
/* ... application specific state ... */
struct lws_buflist *dribble; /* next mp3 data while draining last */
struct lejp_ctx jctx;
size_t pos;
size_t mp3_in;
mpg123_handle *mh;
lws_sorted_usec_list_t sul;
uint8_t stash_eom[16];
uint8_t se_head;
uint8_t se_tail;
char mp3_state;
char first_mp3;
uint8_t mp3_mime_match;
uint8_t seen;
uint8_t inside_mp3;
} ss_avs_metadata_t;
/*
* The remote server only seems to give us a budget of 10s to consume the
* results, after that it doesn't drop the stream, but doesn't send us anything
* further on it.
*
* This makes it impossible to optimize buffering for incoming mp3 since we
* have to go ahead and take it before the 10s is up.
*/
#define MAX_MP3_IN_BUFFERING_BYTES 32768
/*
* Structure of JSON metadata for utterance handling
*/
static const char *metadata = "{"
"\"event\": {"
"\"header\": {"
"\"namespace\": \"SpeechRecognizer\","
"\"name\": \"Recognize\","
"\"messageId\": \"message-123\","
"\"dialogRequestId\": \"dialog-request-321\""
"},"
"\"payload\": {"
"\"profile\":" "\"CLOSE_TALK\","
"\"format\":" "\"AUDIO_L16_RATE_16000_CHANNELS_1\""
"}"
"}"
"}";
/*
* avs metadata
*/
static void
use_buffer_250ms(lws_sorted_usec_list_t *sul)
{
ss_avs_metadata_t *m = lws_container_of(sul, ss_avs_metadata_t, sul);
struct lws_context *context = (struct lws_context *)m->opaque_data;
int est = lws_ss_get_est_peer_tx_credit(m->ss);
lwsl_notice("%s: est txcr %d\n", __func__, est);
if (est < MAX_MP3_IN_BUFFERING_BYTES - (MAX_MP3_IN_BUFFERING_BYTES / 4)) {
lwsl_notice(" adding %d\n", MAX_MP3_IN_BUFFERING_BYTES / 4);
lws_ss_add_peer_tx_credit(m->ss, MAX_MP3_IN_BUFFERING_BYTES / 4);
}
lws_sul_schedule(context, 0, &m->sul, use_buffer_250ms,
250 * LWS_US_PER_MS);
}
static const char *mp3_mimetype = "application/octet-stream",
*match2 = "\x0d\x0a\x0d\x0a";
static int
ss_avs_mp3_open(ss_avs_metadata_t *m)
{
int r;
lwsl_notice("%s\n", __func__);
m->first_mp3 = 1;
m->mh = mpg123_new(NULL, NULL);
if (!m->mh) {
lwsl_err("%s: unable to make new mp3\n",
__func__);
goto bail;
}
mpg123_format_none(m->mh);
r = mpg123_format(m->mh, 16000, MPG123_M_MONO,
MPG123_ENC_SIGNED_16);
if (r) {
lwsl_err("%s: mpg123 format failed %d\n",
__func__, r);
goto bail1;
}
r = mpg123_open_feed(m->mh);
if (r) {
lwsl_err("%s: mpg123 open feed failed %d\n",
__func__, r);
goto bail1;
}
return 0;
bail1:
mpg123_delete(m->mh);
m->mh = NULL;
bail:
return 1;
}
static lws_ss_state_return_t
ss_avs_metadata_rx(void *userobj, const uint8_t *buf, size_t len, int flags);
/*
* This is called when the mp3 has drained it's input buffer and destroyed
* itself.
*/
static int
drain_end_cb(void *v)
{
ss_avs_metadata_t *m = (ss_avs_metadata_t *)v;
struct lws_context *context = (struct lws_context *)m->opaque_data;
int tot = 0;
lwsl_err("%s\n", __func__);
/*
* We have drained and destroyed the existing mp3 session. Is there
* a new one pending?
*/
m->first_mp3 = 1;
m->mp3_state = LAMP3STATE_IDLE;
if (lws_buflist_total_len(&m->dribble)) {
/* we started another one */
/* resume tx credit top up */
lws_sul_schedule(context, 0, &m->sul, use_buffer_250ms, 1);
if (ss_avs_mp3_open(m))
return 1;
m->mp3_state = LAMP3STATE_SPOOLING;
/*
* Dump what we stashed from draining into the new mp3
*/
while (lws_buflist_total_len(&m->dribble)) {
size_t s;
uint8_t *u, t;
s = lws_buflist_next_segment_len(&m->dribble, &u);
t = m->stash_eom[m->se_tail];
lwsl_notice("%s: preload %d: %d\n", __func__, (int)s, t);
mpg123_feed(m->mh, u, s);
lws_buflist_use_segment(&m->dribble, s);
if (m->first_mp3) {
play_mp3(m->mh, NULL, NULL);
m->first_mp3 = 0;
}
tot += s;
m->se_tail = (m->se_tail + 1) % sizeof(m->stash_eom);
if (t) {
lwsl_notice("%s: preloaded EOM\n", __func__);
/*
* We stashed the whole of the message, we need
* to also do the EOM processing. We will come
* back here if there's another message in the
* stash.
*/
m->mp3_state = LAMP3STATE_DRAINING;
if (m->mh)
play_mp3(NULL, drain_end_cb, m);
lws_ss_add_peer_tx_credit(m->ss, tot);
#if 0
/*
* Put a hold on bringing in any more data
*/
lws_sul_cancel(&m->sul);
#endif
/* destroy our copy of the handle */
m->mh = NULL;
break;
}
}
lws_ss_add_peer_tx_credit(m->ss, tot);
}
return 0;
}
static lws_ss_state_return_t
ss_avs_metadata_rx(void *userobj, const uint8_t *buf, size_t len, int flags)
{
ss_avs_metadata_t *m = (ss_avs_metadata_t *)userobj;
struct lws_context *context = (struct lws_context *)m->opaque_data;
int n = 0, hit = 0;
lwsl_notice("%s: len %d, flags %d (est peer txcr %d)\n", __func__,
(int)len, flags, lws_ss_get_est_peer_tx_credit(m->ss));
// lwsl_hexdump_warn(buf, len);
if ((flags & LWSSS_FLAG_SOM) && !m->mh && !m->seen) {
m->mp3_mime_match = 0;
m->seen = 0;
m->inside_mp3 = 0;
}
if (!m->inside_mp3) {
/*
* Identify the part with the mp3 in, if any
*/
while (n < (int)len - 24) {
if (!m->seen) {
if (buf[n] == mp3_mimetype[m->mp3_mime_match]) {
m->mp3_mime_match++;
if (m->mp3_mime_match == 24) {
m->mp3_mime_match = 0;
m->seen = 1;
n++;
continue;
}
} else
m->mp3_mime_match = 0;
} else {
if (buf[n] == match2[m->mp3_mime_match]) {
m->mp3_mime_match++;
if (m->mp3_mime_match == 4) {
m->seen = 0;
m->mp3_mime_match = 0;
hit = 1;
n++;
buf += n;
len -= n;
lwsl_notice("identified reply...\n");
m->inside_mp3 = 1;
break;
}
} else
m->mp3_mime_match = 0;
}
n++;
}
if (!hit) {
lws_ss_add_peer_tx_credit(m->ss, len);
return 0;
}
}
// lwsl_notice("%s: state %d\n", __func__, m->mp3_state);
switch (m->mp3_state) {
case LAMP3STATE_IDLE:
if (hit) {
lws_ss_add_peer_tx_credit(m->ss, n);
if (ss_avs_mp3_open(m))
goto bail;
lws_sul_schedule(context, 0, &m->sul, use_buffer_250ms, 1);
m->mp3_state = LAMP3STATE_SPOOLING;
break;
}
lws_ss_add_peer_tx_credit(m->ss, len);
if (!m->inside_mp3)
break;
/* fallthru */
case LAMP3STATE_SPOOLING:
if (m->dribble)
goto draining;
if (len) {
/*
* We are shoving encoded mp3 into mpg123-allocated heap
* buffers... unfortunately mpg123 doesn't seem to
* expose where it is in its allocated input so we can
* track how much is stashed. Instead while in playback
* mode, we assume 64kbps mp3 encoding, ie, 8KB/s, and
* run a sul that allows an additional 2KB tx credit
* every 250ms, with 4KB initial credit.
*/
lwsl_notice("%s: SPOOL %d\n", __func__, (int)len);
mpg123_feed(m->mh, buf, len);
if (m->first_mp3) {
lws_sul_schedule(context, 0, &m->sul,
use_buffer_250ms, 1);
// lws_ss_add_peer_tx_credit(m->ss,
// len + (MAX_MP3_IN_BUFFERING_BYTES / 2));
play_mp3(m->mh, NULL, NULL);
} //else
// lws_ss_add_peer_tx_credit(m->ss, len);
m->first_mp3 = 0;
}
if (flags & LWSSS_FLAG_EOM) {
/*
* This means one "message" / mime part with mp3 data
* has finished coming in. But there may be whole other
* parts with other mp3s following, with potentially
* different mp3 parameters. So we want to tell this
* one to drain and finish and destroy the current mp3
* object before we go on.
*
* But not knowing the length of the current one, there
* will already be outstanding tx credit at the server,
* so it's going to spam us with the next part before we
* have the new mp3 sink for it.
*/
lwsl_notice("%s: EOM\n", __func__);
m->mp3_mime_match = 0;
m->seen = 0;
m->mp3_state = LAMP3STATE_DRAINING;
/* from input POV, we're no longer inside an mp3 */
m->inside_mp3 = 0;
if (m->mh)
play_mp3(NULL, drain_end_cb, m);
#if 0
/*
* Put a hold on bringing in any more data
*/
lws_sul_cancel(&m->sul);
#endif
/* destroy our copy of the handle */
m->mh = NULL;
}
break;
case LAMP3STATE_DRAINING:
draining:
if (buf && len && m->inside_mp3) {
lwsl_notice("%s: DRAINING: stashing %d: %d %d %d\n",
__func__, (int)len, !!(flags & LWSSS_FLAG_EOM),
m->se_head, m->se_tail);
lwsl_hexdump_notice(buf, len);
if (lws_buflist_append_segment(&m->dribble, buf, len) < 0)
goto bail;
m->stash_eom[m->se_head] = !!(flags & LWSSS_FLAG_EOM);
m->se_head = (m->se_head + 1) % sizeof(m->stash_eom);
lwsl_notice("%s: next head %d\n", __func__, m->se_head);
lws_ss_add_peer_tx_credit(m->ss, len);
}
if (flags & LWSSS_FLAG_EOM) {
if (!len && m->se_head != m->se_tail) {
/* 0-len EOM... retrospectively mark last stash */
lwsl_notice("%s: retro EOM\n", __func__);
m->stash_eom[(m->se_head - 1) % sizeof(m->stash_eom)] = 1;
}
lwsl_notice("%s: Draining EOM\n", __func__);
m->inside_mp3 = 0;
}
/*
* Don't provide any additional tx credit... we're just
* mopping up the overspill from the previous mp3 credit
*/
break;
}
return 0;
bail:
return -1;
}
/*
* Because this is multipart mime in h2 currently, use a "rideshare" to handle
* first the native metadata on this secure stream, then the "rideshare" audio
* stream mentioned in the policy.
*
* Lws takes care of interleaving the multipart mime pieces since the policy
* calls for it.
*/
static lws_ss_state_return_t
ss_avs_metadata_tx(void *userobj, lws_ss_tx_ordinal_t ord, uint8_t *buf,
size_t *len, int *flags)
{
ss_avs_metadata_t *m = (ss_avs_metadata_t *)userobj;
size_t tot;
int n;
// lwsl_notice("%s %d\n", __func__, (int)m->pos);
if ((long)m->pos < 0) {
*len = 0;
lwsl_info("%s: skip\n", __func__);
return 1;
}
if (!strcmp(lws_ss_rideshare(m->ss), "avs_audio")) {
/* audio rideshare part */
if (!m->pos)
*flags |= LWSSS_FLAG_SOM;
n = spool_capture(buf, *len);
if (n > 0)
*len = n;
else
*len = 0;
if (!n) {
lwsl_info("%s: trying to skip tx\n", __func__);
return 1;
}
m->pos += *len;
if (n < 0) {
*flags |= LWSSS_FLAG_EOM;
m->pos = (long)-1l; /* ban subsequent until new stream */
}
lwsl_notice("%s: tx audio %d\n", __func__, (int)*len);
#if 0
{
int ff = open("/tmp/z1", O_RDWR | O_CREAT | O_APPEND, 0666);
if (ff == -1)
lwsl_err("%s: errno %d\n", __func__, errno);
write(ff, buf, *len);
close(ff);
}
#endif
return 0;
}
/* metadata part */
tot = strlen(metadata);
if (!m->pos)
*flags |= LWSSS_FLAG_SOM;
if (*len > tot - m->pos)
*len = tot - m->pos;
memcpy(buf, metadata + m->pos, *len);
m->pos += *len;
if (m->pos == tot) {
lwsl_notice("metadata done\n");
*flags |= LWSSS_FLAG_EOM;
m->pos = 0; /* for next time */
}
return 0;
}
static lws_ss_state_return_t
ss_avs_metadata_state(void *userobj, void *sh,
lws_ss_constate_t state, lws_ss_tx_ordinal_t ack)
{
ss_avs_metadata_t *m = (ss_avs_metadata_t *)userobj;
struct lws_context *context = (struct lws_context *)m->opaque_data;
lwsl_notice("%s: %p: %s, ord 0x%x\n", __func__, m->ss,
lws_ss_state_name(state), (unsigned int)ack);
switch (state) {
case LWSSSCS_CREATING:
return lws_ss_client_connect(m->ss);
case LWSSSCS_CONNECTING:
m->pos = 0;
break;
case LWSSSCS_CONNECTED:
lwsl_info("%s: CONNECTED\n", __func__);
return lws_ss_request_tx(m->ss);
case LWSSSCS_DISCONNECTED:
lws_sul_cancel(&m->sul);
//if (m->mh) {
play_mp3(NULL, NULL, NULL);
m->mh = NULL;
//}
/*
* For this stream encapsulating an alexa exchange, dropping
* is the end of its life
*/
return 1;
case LWSSSCS_DESTROYING:
lws_buflist_destroy_all_segments(&m->dribble);
break;
default:
break;
}
return 0;
}
/*
* avs event
*/
static lws_ss_state_return_t
ss_avs_event_rx(void *userobj, const uint8_t *buf, size_t len, int flags)
{
return 0;
}
static lws_ss_state_return_t
ss_avs_event_tx(void *userobj, lws_ss_tx_ordinal_t ord, uint8_t *buf,
size_t *len, int *flags)
{
return 1; /* don't transmit anything */
}
static lws_ss_state_return_t
ss_avs_event_state(void *userobj, void *sh,
lws_ss_constate_t state, lws_ss_tx_ordinal_t ack)
{
lwsl_info("%s: %s, ord 0x%x\n", __func__, lws_ss_state_name(state),
(unsigned int)ack);
switch (state) {
case LWSSSCS_CREATING:
mpg123_init();
break;
case LWSSSCS_CONNECTING:
break;
case LWSSSCS_CONNECTED:
lwsl_user("Connected to Alexa... speak \"Alexa, ...\"\n");
break;
case LWSSSCS_DISCONNECTED:
lwsl_user("Disconnected from Alexa\n");
break;
case LWSSSCS_DESTROYING:
mpg123_exit();
break;
default:
break;
}
return 0;
}
int
avs_query_start(struct lws_context *context)
{
lws_ss_info_t ssi;
lwsl_notice("%s:\n", __func__);
memset(&ssi, 0, sizeof(ssi));
ssi.handle_offset = offsetof(ss_avs_metadata_t, ss);
ssi.opaque_user_data_offset = offsetof(ss_avs_metadata_t, opaque_data);
ssi.rx = ss_avs_metadata_rx;
ssi.tx = ss_avs_metadata_tx;
ssi.state = ss_avs_metadata_state;
ssi.user_alloc = sizeof(ss_avs_metadata_t);
ssi.streamtype = "avs_metadata";
ssi.manual_initial_tx_credit = 8192;
if (lws_ss_create(context, 0, &ssi, context, &hss_avs_sync, NULL, NULL)) {
lwsl_err("%s: failed to create avs metadata secstream\n",
__func__);
return 1;
}
lwsl_user("%s: created query stream %p\n", __func__, hss_avs_sync);
return 0;
}
int
avs_example_start(struct lws_context *context)
{
lws_ss_info_t ssi;
if (hss_avs_event)
return 0;
lwsl_info("%s: Starting AVS stream\n", __func__);
/* AVS wants us to establish the long poll event stream first */
memset(&ssi, 0, sizeof(ssi));
ssi.handle_offset = offsetof(ss_avs_event_t, ss);
ssi.opaque_user_data_offset = offsetof(ss_avs_event_t, opaque_data);
ssi.rx = ss_avs_event_rx;
ssi.tx = ss_avs_event_tx;
ssi.state = ss_avs_event_state;
ssi.user_alloc = sizeof(ss_avs_event_t);
ssi.streamtype = "avs_event";
if (lws_ss_create(context, 0, &ssi, context, &hss_avs_event, NULL, NULL)) {
lwsl_err("%s: failed to create avs event secure stream\n",
__func__);
return 1;
}
return 0;
}