1577 lines
64 KiB
C
1577 lines
64 KiB
C
/*
|
|
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/**
|
|
* @file picotok.c
|
|
*
|
|
* tokenizer
|
|
*
|
|
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
|
|
* All rights reserved.
|
|
*
|
|
* History:
|
|
* - 2009-04-20 -- initial version
|
|
*
|
|
*/
|
|
|
|
|
|
/* ************************************************************/
|
|
/* tokenisation and markup handling */
|
|
/* ************************************************************/
|
|
|
|
/** @addtogroup picotok
|
|
@b tokenisation_overview
|
|
|
|
markup handling overview:
|
|
|
|
The following markups are recognized
|
|
- ignore
|
|
- speed
|
|
- pitch
|
|
- volume
|
|
- voice
|
|
- preproccontext
|
|
- mark
|
|
- play
|
|
- usesig
|
|
- genfile
|
|
- sentence
|
|
- s
|
|
- paragraph
|
|
- p
|
|
- break
|
|
- spell (pauses between letter)
|
|
- phoneme
|
|
|
|
All markups which are recognized but are not yet implemented in pico
|
|
system have the mark.
|
|
*/
|
|
|
|
|
|
#include "picodefs.h"
|
|
#include "picoos.h"
|
|
#include "picobase.h"
|
|
#include "picodbg.h"
|
|
#include "picodata.h"
|
|
#include "picotok.h"
|
|
#include "picoktab.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
#if 0
|
|
}
|
|
#endif
|
|
|
|
/* *****************************************************************************/
|
|
|
|
#define IN_BUF_SIZE 255
|
|
#define OUT_BUF_SIZE IN_BUF_SIZE + 3 * PICODATA_ITEM_HEADSIZE + 3
|
|
|
|
#define MARKUP_STRING_BUF_SIZE (IN_BUF_SIZE*5)
|
|
#define MAX_NR_MARKUP_PARAMS 6
|
|
#define MARKUP_HANDLING_DISABLED 0
|
|
#define MARKUP_HANDLING_ENABLED 1
|
|
#define EOL '\n'
|
|
|
|
|
|
typedef picoos_int8 pico_tokenSubType;
|
|
typedef picoos_uint8 pico_tokenType;
|
|
|
|
/** @todo : consider adding these specialized exception codes: */
|
|
|
|
#define PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE PICO_ERR_OTHER
|
|
#define PICO_ERR_INVALID_MARKUP_TAG PICO_ERR_OTHER
|
|
#define PICO_ERR_INTERNAL_LIMIT PICO_ERR_OTHER
|
|
|
|
typedef enum {MIDummyStart, MIIgnore,
|
|
MIPitch, MISpeed, MIVolume,
|
|
MIVoice, MIPreprocContext, MIMarker,
|
|
MIPlay, MIUseSig, MIGenFile, MIParagraph,
|
|
MISentence, MIBreak, MISpell, MIPhoneme, MIItem, MISpeaker, MIDummyEnd
|
|
} MarkupId;
|
|
typedef enum {MSNotInMarkup, MSGotStart, MSExpectingmarkupTagName, MSInmarkupTagName,
|
|
MSGotmarkupTagName, MSInAttrName, MSGotAttrName, MSGotEqual, MSInAttrValue,
|
|
MSInAttrValueEscaped, MSGotAttrValue, MSGotEndSlash, MSGotEnd,
|
|
MSError, MSErrorTooLong, MSErrorSyntax
|
|
} MarkupState;
|
|
typedef enum {MENone, MEMissingStart, MEUnknownTag, MEIdent, MEMissingEqual,
|
|
MEMissingQuote, MEMissingEnd, MEUnexpectedChar, MEInterprete
|
|
} MarkupParseError;
|
|
|
|
typedef enum {MTNone, MTStart, MTEnd, MTEmpty} MarkupTagType;
|
|
|
|
#define UTF_CHAR_COMPLETE 2
|
|
#define UTF_CHAR_INCOMPLETE 1
|
|
#define UTF_CHAR_MALFORMED 0
|
|
|
|
#define TOK_MARKUP_KW_IGNORE (picoos_uchar*)"ignore"
|
|
#define TOK_MARKUP_KW_SPEED (picoos_uchar*)"speed"
|
|
#define TOK_MARKUP_KW_PITCH (picoos_uchar*)"pitch"
|
|
#define TOK_MARKUP_KW_VOLUME (picoos_uchar*)"volume"
|
|
#define TOK_MARKUP_KW_VOICE (picoos_uchar*)"voice"
|
|
#define TOK_MARKUP_KW_CONTEXT (picoos_uchar*)"preproccontext"
|
|
#define TOK_MARKUP_KW_MARK (picoos_uchar*)"mark"
|
|
#define TOK_MARKUP_KW_PLAY (picoos_uchar*)"play"
|
|
#define TOK_MARKUP_KW_USESIG (picoos_uchar*)"usesig"
|
|
#define TOK_MARKUP_KW_GENFILE (picoos_uchar*)"genfile"
|
|
#define TOK_MARKUP_KW_SENTENCE (picoos_uchar*)"sentence"
|
|
#define TOK_MARKUP_KW_S (picoos_uchar*)"s"
|
|
#define TOK_MARKUP_KW_PARAGRAPH (picoos_uchar*)"paragraph"
|
|
#define TOK_MARKUP_KW_P (picoos_uchar*)"p"
|
|
#define TOK_MARKUP_KW_BREAK (picoos_uchar*)"break"
|
|
#define TOK_MARKUP_KW_SPELL (picoos_uchar*)"spell"
|
|
#define TOK_MARKUP_KW_PHONEME (picoos_uchar*)"phoneme"
|
|
#define TOK_MARKUP_KW_ITEM (picoos_uchar*)"item"
|
|
#define TOK_MARKUP_KW_SPEAKER (picoos_uchar*)"speaker"
|
|
|
|
#define KWLevel (picoos_uchar *)"level"
|
|
#define KWName (picoos_uchar *)"name"
|
|
#define KWProsDomain (picoos_uchar *)"prosodydomain"
|
|
#define KWTime (picoos_uchar *)"time"
|
|
#define KWMode (picoos_uchar *)"mode"
|
|
#define KWSB (picoos_uchar *)"sb"
|
|
#define KWPB (picoos_uchar *)"pb"
|
|
#define KWFile (picoos_uchar *)"file"
|
|
#define KWType (picoos_uchar *)"type"
|
|
#define KWF0Beg (picoos_uchar *)"f0beg"
|
|
#define KWF0End (picoos_uchar *)"f0end"
|
|
#define KWXFadeBeg (picoos_uchar *)"xfadebeg"
|
|
#define KWXFadeEnd (picoos_uchar *)"xfadeend"
|
|
#define KWAlphabet (picoos_uchar *)"alphabet"
|
|
#define KWPH (picoos_uchar *)"ph"
|
|
#define KWOrthMode (picoos_uchar *)"orthmode"
|
|
#define KWIgnorePunct (picoos_uchar *)"ignorepunct"
|
|
#define KWInfo1 (picoos_uchar *)"info1"
|
|
#define KWInfo2 (picoos_uchar *)"info2"
|
|
#define KWDATA (picoos_uchar *)"data"
|
|
|
|
#define PICO_SPEED_MIN 20
|
|
#define PICO_SPEED_MAX 500
|
|
#define PICO_SPEED_DEFAULT 100
|
|
#define PICO_SPEED_FACTOR_MIN 500
|
|
#define PICO_SPEED_FACTOR_MAX 2000
|
|
|
|
#define PICO_PITCH_MIN 50
|
|
#define PICO_PITCH_MAX 200
|
|
#define PICO_PITCH_DEFAULT 100
|
|
#define PICO_PITCH_FACTOR_MIN 500
|
|
#define PICO_PITCH_FACTOR_MAX 2000
|
|
#define PICO_PITCH_ADD_MIN -100
|
|
#define PICO_PITCH_ADD_MAX 100
|
|
#define PICO_PITCH_ADD_DEFAULT 0
|
|
|
|
#define PICO_VOLUME_MIN 0
|
|
#define PICO_VOLUME_MAX 500
|
|
#define PICO_VOLUME_DEFAULT 100
|
|
#define PICO_VOLUME_FACTOR_MIN 500
|
|
#define PICO_VOLUME_FACTOR_MAX 2000
|
|
|
|
#define PICO_SPEAKER_MIN 20
|
|
#define PICO_SPEAKER_MAX 180
|
|
#define PICO_SPEAKER_DEFAULT 100
|
|
#define PICO_SPEAKER_FACTOR_MIN 500
|
|
#define PICO_SPEAKER_FACTOR_MAX 2000
|
|
|
|
#define PICO_CONTEXT_DEFAULT (picoos_uchar*)"DEFAULT"
|
|
|
|
#define PARAGRAPH_PAUSE_DUR 500
|
|
#define SPELL_WITH_PHRASE_BREAK 1
|
|
#define SPELL_WITH_SENTENCE_BREAK 2
|
|
|
|
/* *****************************************************************************/
|
|
|
|
#define TOK_PUNC_FLUSH (picoos_char) '\0'
|
|
|
|
typedef picoos_uchar Word[MARKUP_STRING_BUF_SIZE];
|
|
|
|
|
|
struct MarkupParam {
|
|
Word paramId;
|
|
Word paramVal;
|
|
};
|
|
|
|
typedef struct MarkupParam MarkupParams[MAX_NR_MARKUP_PARAMS];
|
|
|
|
typedef picoos_uchar utf8char0c[5]; /* one more than needed so it is ended always with 0c*/
|
|
|
|
/** subobject : TokenizeUnit
|
|
* shortcut : tok
|
|
*/
|
|
typedef struct tok_subobj
|
|
{
|
|
picoos_int32 ignLevel;
|
|
|
|
utf8char0c utf;
|
|
picoos_int32 utfpos;
|
|
picoos_int32 utflen;
|
|
|
|
MarkupParams markupParams;
|
|
picoos_int32 nrMarkupParams;
|
|
MarkupState markupState;
|
|
picoos_uchar markupStr[MARKUP_STRING_BUF_SIZE];
|
|
picoos_int32 markupPos;
|
|
picoos_int32 markupLevel[MIDummyEnd+1];
|
|
picoos_uchar markupTagName[IN_BUF_SIZE];
|
|
MarkupTagType markupTagType;
|
|
MarkupParseError markupTagErr;
|
|
|
|
picoos_int32 strPos;
|
|
picoos_uchar strDelim;
|
|
picoos_bool isFileAttr;
|
|
|
|
pico_tokenType tokenType;
|
|
pico_tokenSubType tokenSubType;
|
|
|
|
picoos_int32 tokenPos;
|
|
picoos_uchar tokenStr[IN_BUF_SIZE];
|
|
|
|
picoos_int32 nrEOL;
|
|
|
|
picoos_bool markupHandlingMode; /* to be moved ??? */
|
|
picoos_bool aborted; /* to be moved ??? */
|
|
|
|
picoos_bool start;
|
|
|
|
picoos_uint8 outBuf[OUT_BUF_SIZE]; /* internal output buffer */
|
|
picoos_uint16 outReadPos; /* next pos to read from outBuf */
|
|
picoos_uint16 outWritePos; /* next pos to write to outBuf */
|
|
|
|
picoos_uchar saveFile[IN_BUF_SIZE];
|
|
Word phonemes;
|
|
|
|
picotrns_SimpleTransducer transducer;
|
|
|
|
/* kbs */
|
|
|
|
picoktab_Graphs graphTab;
|
|
picokfst_FST xsampa_parser;
|
|
picokfst_FST svoxpa_parser;
|
|
picokfst_FST xsampa2svoxpa_mapper;
|
|
|
|
|
|
|
|
} tok_subobj_t;
|
|
|
|
/* *****************************************************************************/
|
|
|
|
static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
|
|
static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling);
|
|
static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok);
|
|
static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]);
|
|
static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
|
|
static MarkupId tok_markupTagId (picoos_uchar tagId[]);
|
|
|
|
/* *****************************************************************************/
|
|
|
|
static picoos_bool tok_strEqual(picoos_uchar * str1, picoos_uchar * str2)
|
|
{
|
|
return (picoos_strcmp((picoos_char*)str1, (picoos_char*)str2) == 0);
|
|
}
|
|
|
|
static void tok_reduceBlanks(picoos_uchar * str)
|
|
/* Remove leading and trailing blanks of 'str' and reduce
|
|
groups of blanks within string to exactly one blank. */
|
|
|
|
{
|
|
int i = 0;
|
|
int j = 0;
|
|
|
|
while (str[j] != 0) {
|
|
if (str[j] == (picoos_uchar)' ') {
|
|
/* note one blank except at the beginning of string */
|
|
if (i > 0) {
|
|
str[i] = (picoos_uchar)' ';
|
|
i++;
|
|
}
|
|
j++;
|
|
while (str[j] == (picoos_uchar)' ') {
|
|
j++;
|
|
}
|
|
} else {
|
|
str[i] = str[j];
|
|
j++;
|
|
i++;
|
|
}
|
|
}
|
|
|
|
/* remove blanks at end of string */
|
|
if ((i > 0) && (str[i - 1] == ' ')) {
|
|
i--;
|
|
}
|
|
str[i] = 0;
|
|
}
|
|
|
|
|
|
static void tok_startIgnore (tok_subobj_t * tok)
|
|
{
|
|
tok->ignLevel++;
|
|
}
|
|
|
|
|
|
static void tok_endIgnore (tok_subobj_t * tok)
|
|
{
|
|
if (tok->ignLevel > 0) {
|
|
tok->ignLevel--;
|
|
}
|
|
}
|
|
|
|
|
|
static void tok_getParamIntVal (MarkupParams params, picoos_uchar paramId[], picoos_int32 * paramVal, picoos_bool * paramFound)
|
|
{
|
|
int i=0;
|
|
|
|
while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) {
|
|
i++;
|
|
}
|
|
if ((i < MAX_NR_MARKUP_PARAMS)) {
|
|
(*paramVal) = picoos_atoi((picoos_char*)params[i].paramVal);
|
|
(*paramFound) = TRUE;
|
|
} else {
|
|
(*paramVal) = -1;
|
|
(*paramFound) = FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static void tok_getParamStrVal (MarkupParams params, picoos_uchar paramId[], picoos_uchar paramStrVal[], picoos_bool * paramFound)
|
|
{
|
|
int i=0;
|
|
|
|
while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) {
|
|
i++;
|
|
}
|
|
if (i < MAX_NR_MARKUP_PARAMS) {
|
|
picoos_strcpy((picoos_char*)paramStrVal, (picoos_char*)params[i].paramVal);
|
|
(*paramFound) = TRUE;
|
|
} else {
|
|
paramStrVal[0] = 0;
|
|
(*paramFound) = FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
static void tok_getParamPhonesStr (MarkupParams params, picoos_uchar paramId[], picoos_uchar alphabet[], picoos_uchar phones[], picoos_int32 phoneslen, picoos_bool * paramFound)
|
|
{
|
|
|
|
int i;
|
|
picoos_bool done;
|
|
|
|
i = 0;
|
|
while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId, params[i].paramId)) {
|
|
i++;
|
|
}
|
|
if (i < MAX_NR_MARKUP_PARAMS) {
|
|
if (tok_strEqual(alphabet, PICODATA_XSAMPA) || tok_strEqual(alphabet, (picoos_uchar*)"")) {
|
|
picoos_strlcpy((picoos_char*)phones, (picoos_char*)params[i].paramVal, phoneslen);
|
|
done = TRUE;
|
|
} else {
|
|
done = FALSE;
|
|
}
|
|
(*paramFound) = TRUE;
|
|
} else {
|
|
done = FALSE;
|
|
(*paramFound) = FALSE;
|
|
}
|
|
if (!done) {
|
|
phones[0] = 0;
|
|
}
|
|
}
|
|
|
|
|
|
static void tok_clearMarkupParams (MarkupParams params)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i<MAX_NR_MARKUP_PARAMS; i++) {
|
|
params[i].paramId[0] = 0;
|
|
params[i].paramVal[0] = 0;
|
|
}
|
|
}
|
|
|
|
|
|
static void tok_getDur (picoos_uchar durStr[], picoos_uint32 * dur, picoos_bool * done)
|
|
{
|
|
|
|
int num=0;
|
|
int i=0;
|
|
picoos_uchar tmpWord[IN_BUF_SIZE];
|
|
|
|
picoos_strlcpy((picoos_char*)tmpWord, (picoos_char*)durStr, sizeof(tmpWord));
|
|
tok_reduceBlanks(tmpWord);
|
|
while ((durStr[i] >= '0') && (durStr[i] <= '9')) {
|
|
num = 10 * num + (int)durStr[i] - (int)'0';
|
|
tmpWord[i] = ' ';
|
|
i++;
|
|
}
|
|
tok_reduceBlanks(tmpWord);
|
|
if (tok_strEqual(tmpWord, (picoos_uchar*)"s")) {
|
|
(*dur) = (1000 * num);
|
|
(*done) = TRUE;
|
|
} else if (tok_strEqual(tmpWord,(picoos_uchar*)"ms")) {
|
|
(*dur) = num;
|
|
(*done) = TRUE;
|
|
} else {
|
|
(*dur) = 0;
|
|
(*done) = FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch)
|
|
{
|
|
if (tok->utfpos < PICOBASE_UTF8_MAXLEN) {
|
|
tok->utf[tok->utfpos] = ch;
|
|
if (tok->utfpos == 0) {
|
|
tok->utflen = picobase_det_utf8_length(ch);
|
|
} else if (((ch < (picoos_uchar)'\200') || (ch >= (picoos_uchar)'\300'))) {
|
|
tok->utflen = 0;
|
|
}
|
|
(tok->utfpos)++;
|
|
if ((tok->utfpos == tok->utflen)) {
|
|
if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) {
|
|
tok->utf[tok->utfpos] = 0;
|
|
}
|
|
return UTF_CHAR_COMPLETE;
|
|
} else if (tok->utfpos < tok->utflen) {
|
|
return UTF_CHAR_INCOMPLETE;
|
|
} else {
|
|
return UTF_CHAR_MALFORMED;
|
|
}
|
|
} else {
|
|
return UTF_CHAR_MALFORMED;
|
|
}
|
|
}
|
|
|
|
|
|
static picoos_bool tok_isRelative (picoos_uchar strval[], picoos_uint32 * val)
|
|
{
|
|
picoos_int32 len;
|
|
picoos_bool rel;
|
|
|
|
rel = FALSE;
|
|
len = picoos_strlen((picoos_char*)strval);
|
|
if (len > 0) {
|
|
if (strval[len - 1] == '%') {
|
|
strval[len - 1] = 0;
|
|
if ((strval[0] == '+') || (strval[0] == '-')) {
|
|
(*val) = 1000 + (picoos_atoi((picoos_char*)strval) * 10);
|
|
} else {
|
|
(*val) = picoos_atoi((picoos_char*)strval) * 10;
|
|
}
|
|
rel = TRUE;
|
|
}
|
|
}
|
|
return rel;
|
|
}
|
|
|
|
|
|
static void tok_putItem (picodata_ProcessingUnit this, tok_subobj_t * tok,
|
|
picoos_uint8 itemType, picoos_uint8 info1, picoos_uint8 info2,
|
|
picoos_uint16 val,
|
|
picoos_uchar str[])
|
|
{
|
|
picoos_int32 len, i;
|
|
|
|
if ((itemType == PICODATA_ITEM_CMD) && (info1 == PICODATA_ITEMINFO1_CMD_FLUSH)) {
|
|
tok->outBuf[tok->outWritePos++] = itemType;
|
|
tok->outBuf[tok->outWritePos++] = info1;
|
|
tok->outBuf[tok->outWritePos++] = info2;
|
|
tok->outBuf[tok->outWritePos++] = 0;
|
|
}
|
|
else if (tok->ignLevel <= 0) {
|
|
switch (itemType) {
|
|
case PICODATA_ITEM_CMD:
|
|
switch (info1) {
|
|
case PICODATA_ITEMINFO1_CMD_CONTEXT:
|
|
case PICODATA_ITEMINFO1_CMD_VOICE:
|
|
case PICODATA_ITEMINFO1_CMD_MARKER:
|
|
case PICODATA_ITEMINFO1_CMD_PLAY:
|
|
case PICODATA_ITEMINFO1_CMD_SAVE:
|
|
case PICODATA_ITEMINFO1_CMD_UNSAVE:
|
|
case PICODATA_ITEMINFO1_CMD_PROSDOMAIN:
|
|
case PICODATA_ITEMINFO1_CMD_PHONEME:
|
|
len = picoos_strlen((picoos_char*)str);
|
|
if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
|
|
tok->outBuf[tok->outWritePos++] = itemType;
|
|
tok->outBuf[tok->outWritePos++] = info1;
|
|
tok->outBuf[tok->outWritePos++] = info2;
|
|
tok->outBuf[tok->outWritePos++] = len;
|
|
for (i=0; i<len; i++) {
|
|
tok->outBuf[tok->outWritePos++] = str[i];
|
|
}
|
|
}
|
|
else {
|
|
PICODBG_WARN(("tok_putItem: output buffer too small"));
|
|
}
|
|
break;
|
|
case PICODATA_ITEMINFO1_CMD_IGNSIG:
|
|
case PICODATA_ITEMINFO1_CMD_IGNORE:
|
|
if (tok->outWritePos + 4 < OUT_BUF_SIZE) {
|
|
tok->outBuf[tok->outWritePos++] = itemType;
|
|
tok->outBuf[tok->outWritePos++] = info1;
|
|
tok->outBuf[tok->outWritePos++] = info2;
|
|
tok->outBuf[tok->outWritePos++] = 0;
|
|
}
|
|
else {
|
|
PICODBG_WARN(("tok_putItem: output buffer too small"));
|
|
}
|
|
break;
|
|
case PICODATA_ITEMINFO1_CMD_SPEED:
|
|
case PICODATA_ITEMINFO1_CMD_PITCH:
|
|
case PICODATA_ITEMINFO1_CMD_VOLUME:
|
|
case PICODATA_ITEMINFO1_CMD_SPELL:
|
|
case PICODATA_ITEMINFO1_CMD_SIL:
|
|
case PICODATA_ITEMINFO1_CMD_SPEAKER:
|
|
if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) {
|
|
tok->outBuf[tok->outWritePos++] = itemType;
|
|
tok->outBuf[tok->outWritePos++] = info1;
|
|
tok->outBuf[tok->outWritePos++] = info2;
|
|
tok->outBuf[tok->outWritePos++] = 2;
|
|
tok->outBuf[tok->outWritePos++] = val % 256;
|
|
tok->outBuf[tok->outWritePos++] = val / 256;
|
|
}
|
|
else {
|
|
PICODBG_WARN(("tok_putItem: output buffer too small"));
|
|
}
|
|
break;
|
|
default:
|
|
PICODBG_WARN(("tok_putItem: unknown command type"));
|
|
}
|
|
break;
|
|
case PICODATA_ITEM_TOKEN:
|
|
len = picoos_strlen((picoos_char*)str);
|
|
if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
|
|
tok->outBuf[tok->outWritePos++] = itemType;
|
|
tok->outBuf[tok->outWritePos++] = info1;
|
|
tok->outBuf[tok->outWritePos++] = info2;
|
|
tok->outBuf[tok->outWritePos++] = len;
|
|
for (i=0; i<len; i++) {
|
|
tok->outBuf[tok->outWritePos++] = str[i];
|
|
}
|
|
}
|
|
else {
|
|
PICODBG_WARN(("tok_putItem: output buffer too small"));
|
|
}
|
|
break;
|
|
default:
|
|
PICODBG_WARN(("tok_putItem: unknown item type"));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void tok_putItem2 (picodata_ProcessingUnit this, tok_subobj_t * tok,
|
|
picoos_uint8 type,
|
|
picoos_uint8 info1, picoos_uint8 info2,
|
|
picoos_uint8 len,
|
|
picoos_uint8 data[])
|
|
{
|
|
picoos_int32 i;
|
|
|
|
if (is_valid_itemtype(type)) {
|
|
tok->outBuf[tok->outWritePos++] = type;
|
|
tok->outBuf[tok->outWritePos++] = info1;
|
|
tok->outBuf[tok->outWritePos++] = info2;
|
|
tok->outBuf[tok->outWritePos++] = len;
|
|
for (i=0; i<len; i++) {
|
|
tok->outBuf[tok->outWritePos++] = data[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static MarkupId tok_markupTagId (picoos_uchar tagId[])
|
|
{
|
|
if (picoos_strstr(tagId,(picoos_char *)"svox:") == (picoos_char *)tagId) {
|
|
tagId+=5;
|
|
}
|
|
if (tok_strEqual(tagId, TOK_MARKUP_KW_IGNORE)) {
|
|
return MIIgnore;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEED)) {
|
|
return MISpeed;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_PITCH)) {
|
|
return MIPitch;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOLUME)) {
|
|
return MIVolume;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEAKER)) {
|
|
return MISpeaker;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOICE)) {
|
|
return MIVoice;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_CONTEXT)) {
|
|
return MIPreprocContext;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_MARK)) {
|
|
return MIMarker;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_PLAY)) {
|
|
return MIPlay;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_USESIG)) {
|
|
return MIUseSig;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_GENFILE)) {
|
|
return MIGenFile;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_SENTENCE) || tok_strEqual(tagId, TOK_MARKUP_KW_S)) {
|
|
return MISentence;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_PARAGRAPH) || tok_strEqual(tagId, TOK_MARKUP_KW_P)) {
|
|
return MIParagraph;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_BREAK)) {
|
|
return MIBreak;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPELL)) {
|
|
return MISpell;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_PHONEME)) {
|
|
return MIPhoneme;
|
|
} else if (tok_strEqual(tagId, TOK_MARKUP_KW_ITEM)) {
|
|
return MIItem;
|
|
} else {
|
|
return MIDummyEnd;
|
|
}
|
|
}
|
|
|
|
|
|
static void tok_checkLimits (picodata_ProcessingUnit this, picoos_uint32 * value, picoos_uint32 min, picoos_uint32 max, picoos_uchar valueType[])
|
|
{
|
|
if ((((*value) < min) || ((*value) > max))) {
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %i for %s", *value, valueType);
|
|
if (((*value) < min)) {
|
|
(*value) = min;
|
|
} else if (((*value) > max)) {
|
|
(*value) = max;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
static void tok_checkRealLimits (picodata_ProcessingUnit this, picoos_single * value, picoos_single min, picoos_single max, picoos_uchar valueType[])
|
|
{
|
|
if ((((*value) < min) || ((*value) > max))) {
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %f for %s", *value, valueType);
|
|
if (((*value) < min)) {
|
|
(*value) = min;
|
|
} else if (((*value) > max)) {
|
|
(*value) = max;
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
#define VAL_STR_LEN 21
|
|
|
|
static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId)
|
|
{
|
|
picoos_bool done;
|
|
picoos_int32 ival;
|
|
picoos_uint32 uval;
|
|
picoos_int32 ival2;
|
|
picoos_uchar valStr[VAL_STR_LEN];
|
|
picoos_uchar valStr2[VAL_STR_LEN];
|
|
picoos_uchar valStr3[VAL_STR_LEN];
|
|
picoos_int32 i2;
|
|
picoos_uint32 dur;
|
|
picoos_bool done1;
|
|
picoos_bool paramFound;
|
|
picoos_uint8 type, info1, info2;
|
|
picoos_uint8 data[256];
|
|
picoos_int32 pos, n, len;
|
|
picoos_uchar part[10];
|
|
|
|
done = FALSE;
|
|
switch (mId) {
|
|
case MIIgnore:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
|
|
tok_startIgnore(tok);
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_endIgnore(tok);
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MISpeed:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
|
|
if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
|
|
tok_checkLimits(this, & uval, PICO_SPEED_FACTOR_MIN, PICO_SPEED_FACTOR_MAX,(picoos_uchar*)"relative speed factor");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
|
|
} else {
|
|
uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
|
|
tok_checkLimits(this, & uval, PICO_SPEED_MIN, PICO_SPEED_MAX,(picoos_uchar*)"speed");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
|
|
}
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIPitch:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
|
|
if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
|
|
tok_checkLimits(this, & uval,PICO_PITCH_FACTOR_MIN,PICO_PITCH_FACTOR_MAX, (picoos_uchar*)"relative pitch factor");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
|
|
} else {
|
|
uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
|
|
tok_checkLimits(this, & uval,PICO_PITCH_MIN,PICO_PITCH_MAX, (picoos_uchar*)"pitch");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
|
|
}
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIVolume:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
|
|
if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
|
|
tok_checkLimits(this, & uval, PICO_VOLUME_FACTOR_MIN, PICO_VOLUME_FACTOR_MAX, (picoos_uchar*)"relative volume factor");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
|
|
} else {
|
|
uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
|
|
tok_checkLimits(this, & uval, PICO_VOLUME_MIN, PICO_VOLUME_MAX, (picoos_uchar*)"volume");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
|
|
}
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MISpeaker:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
|
|
if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
|
|
tok_checkLimits(this, & uval, PICO_SPEAKER_FACTOR_MIN, PICO_SPEAKER_FACTOR_MAX, (picoos_uchar*)"relative speaker factor");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
|
|
} else {
|
|
uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
|
|
tok_checkLimits(this, & uval, PICO_SPEAKER_MIN, PICO_SPEAKER_MAX, (picoos_uchar*)"volume");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
|
|
}
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
|
|
case MIVoice:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIPreprocContext:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT);
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIMarker:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MISentence:
|
|
if (isStartTag) {
|
|
tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr);
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIParagraph:
|
|
if (isStartTag) {
|
|
tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr);
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)"");
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIBreak:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) {
|
|
tok_getDur(tok->markupParams[0].paramVal, & dur, & done1);
|
|
tok_checkLimits (this, &dur, 0, 65535, (picoos_uchar*)"time");
|
|
if (done1) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MISpell:
|
|
if (isStartTag) {
|
|
if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) {
|
|
if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) {
|
|
uval = SPELL_WITH_PHRASE_BREAK;
|
|
} else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) {
|
|
uval = SPELL_WITH_SENTENCE_BREAK;
|
|
} else {
|
|
tok_getDur(tok->markupParams[0].paramVal, & uval, & done1);
|
|
tok_checkLimits (this, & uval, 0, 65535, (picoos_uchar*)"time");
|
|
if (done1) {
|
|
done = TRUE;
|
|
}
|
|
}
|
|
} else {
|
|
uval = SPELL_WITH_PHRASE_BREAK;
|
|
}
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_START, uval, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIGenFile:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
|
|
if (tok->saveFile[0] != 0) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
|
|
picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile);
|
|
tok->saveFile[0] = 0;
|
|
}
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE,
|
|
picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/FALSE), 0, tok->markupParams[0].paramVal);
|
|
picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal);
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
if (tok->saveFile[0] != 0) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
|
|
picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)"");
|
|
tok->saveFile[0] = 0;
|
|
}
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIPlay:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
|
|
if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
|
|
tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
|
|
tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
|
|
tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound);
|
|
tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
|
|
tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
|
|
picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
|
|
tok_startIgnore(tok);
|
|
} else {
|
|
if (tok->ignLevel > 0) {
|
|
tok_startIgnore(tok);
|
|
} else {
|
|
picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal);
|
|
}
|
|
}
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_endIgnore(tok);
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIUseSig:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
|
|
if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
|
|
tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
|
|
tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
|
|
tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound);
|
|
tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
|
|
tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
|
|
picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)"");
|
|
} else {
|
|
if (tok->ignLevel <= 0) {
|
|
picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal);
|
|
}
|
|
}
|
|
done = TRUE;
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIPhoneme:
|
|
i2 = 0;
|
|
if (isStartTag) {
|
|
if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) {
|
|
if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode)
|
|
&& tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) {
|
|
i2 = 1;
|
|
}
|
|
if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
|
|
PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
|
|
done = TRUE;
|
|
} else {
|
|
PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal);
|
|
done = TRUE;
|
|
}
|
|
} else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) {
|
|
if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode)
|
|
&& tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) {
|
|
i2 = 1;
|
|
}
|
|
if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
|
|
PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
|
|
done = TRUE;
|
|
}
|
|
else {
|
|
PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal);
|
|
done = TRUE;
|
|
}
|
|
}
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
|
|
PICODATA_ITEMINFO2_CMD_END, i2, (picoos_uchar*)"");
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
case MIItem:
|
|
if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) &&
|
|
tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&&
|
|
tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&&
|
|
tok_strEqual(tok->markupParams[3].paramId, KWDATA)) {
|
|
picoos_int32 len2, n2;
|
|
type = picoos_atoi(tok->markupParams[0].paramVal);
|
|
info1 = picoos_atoi(tok->markupParams[1].paramVal);
|
|
info2 = picoos_atoi(tok->markupParams[2].paramVal);
|
|
n = 0; n2 = 0;
|
|
len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal);
|
|
while (n<len2) {
|
|
while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) {
|
|
n++;
|
|
}
|
|
tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n];
|
|
n++;
|
|
n2++;
|
|
}
|
|
if (is_valid_itemtype(type)) {
|
|
done = TRUE;
|
|
len = 0;
|
|
pos = 0;
|
|
picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
|
|
&pos, ',', part, 10, &done1);
|
|
while (done && done1) {
|
|
n = picoos_atoi(part);
|
|
if ((n>=0) && (n<256) && (len<256)) {
|
|
data[len++] = n;
|
|
}
|
|
else {
|
|
done = FALSE;
|
|
}
|
|
picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
|
|
&pos, ',', part, 10, &done1);
|
|
}
|
|
if (done) {
|
|
tok_putItem2(this, tok, type, info1, info2, len, data);
|
|
}
|
|
}
|
|
else {
|
|
done = FALSE;
|
|
}
|
|
} else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
|
|
done = TRUE;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (!done) {
|
|
tok->markupTagErr = MEInterprete;
|
|
}
|
|
if (isStartTag) {
|
|
tok->markupLevel[mId]++;
|
|
} else if ((tok->markupLevel[mId] > 0)) {
|
|
tok->markupLevel[mId]--;
|
|
}
|
|
}
|
|
|
|
|
|
static picoos_bool tok_attrChar (picoos_uchar ch, picoos_bool first)
|
|
{
|
|
return ((((ch >= (picoos_uchar)'A') && (ch <= (picoos_uchar)'Z')) ||
|
|
((ch >= (picoos_uchar)'a') && (ch <= (picoos_uchar)'z'))) ||
|
|
( !(first) && ((ch >= (picoos_uchar)'0') && (ch <= (picoos_uchar)'9'))));
|
|
}
|
|
|
|
|
|
|
|
static picoos_bool tok_idChar (picoos_uchar ch, picoos_bool first)
|
|
{
|
|
return tok_attrChar(ch, first) || ( !(first) && (ch == (picoos_uchar)':'));
|
|
}
|
|
|
|
|
|
static void tok_setIsFileAttr (picoos_uchar name[], picoos_bool * isFile)
|
|
{
|
|
(*isFile) = tok_strEqual(name, KWFile);
|
|
}
|
|
|
|
/* *****************************************************************************/
|
|
|
|
static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype)
|
|
{
|
|
int i, len;
|
|
|
|
if (str[0] != 0) {
|
|
len = picoos_strlen((picoos_char*)str);
|
|
for (i = 0; i < len; i++) {
|
|
if (tok->tokenPos >= IN_BUF_SIZE) {
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT, (picoos_char*)"", (picoos_char*)"simple token too long; forced treatment");
|
|
tok_treatSimpleToken(this, tok);
|
|
}
|
|
tok->tokenStr[tok->tokenPos] = str[i];
|
|
tok->tokenPos++;
|
|
}
|
|
}
|
|
tok->tokenType = type;
|
|
tok->tokenSubType = subtype;
|
|
}
|
|
|
|
|
|
static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[])
|
|
{
|
|
picoos_int32 i, len;
|
|
picoos_uint8 ok;
|
|
|
|
tok->markupTagErr = MENone;
|
|
len = picoos_strlen((picoos_char*)str);
|
|
for (i = 0; i< len; i++) {
|
|
if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) {
|
|
if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"markup tag too long");
|
|
}
|
|
tok->markupState = MSErrorTooLong;
|
|
} else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) {
|
|
} else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
|
|
tok->markupState = MSGotEnd;
|
|
} else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
|
|
if (tok->markupTagType == MTEnd) {
|
|
tok->markupTagErr = MEUnexpectedChar;
|
|
tok->markupState = MSError;
|
|
} else {
|
|
tok->markupTagType = MTEmpty;
|
|
tok->markupState = MSGotEndSlash;
|
|
}
|
|
} else {
|
|
switch (tok->markupState) {
|
|
case MSNotInMarkup:
|
|
if (str[i] == (picoos_uchar)'<') {
|
|
tok_clearMarkupParams(tok->markupParams);
|
|
tok->nrMarkupParams = 0;
|
|
tok->strPos = 0;
|
|
tok->markupTagType = MTStart;
|
|
tok->markupState = MSGotStart;
|
|
} else {
|
|
tok->markupTagErr = MEMissingStart;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
case MSGotStart:
|
|
if (str[i] == (picoos_uchar)'/') {
|
|
tok->markupTagType = MTEnd;
|
|
tok->markupState = MSExpectingmarkupTagName;
|
|
} else if (str[i] == (picoos_uchar)' ') {
|
|
tok->markupState = MSExpectingmarkupTagName;
|
|
} else if (tok_idChar(str[i],TRUE)) {
|
|
tok->markupTagType = MTStart;
|
|
tok->markupTagName[tok->strPos] = str[i];
|
|
tok->strPos++;
|
|
tok->markupTagName[tok->strPos] = 0;
|
|
tok->markupState = MSInmarkupTagName;
|
|
} else {
|
|
tok->markupTagErr = MEUnexpectedChar;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
case MSInmarkupTagName: case MSExpectingmarkupTagName:
|
|
if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) {
|
|
tok->markupTagName[tok->strPos] = str[i];
|
|
tok->strPos++;
|
|
tok->markupTagName[(tok->strPos)] = 0;
|
|
tok->markupState = MSInmarkupTagName;
|
|
} else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) {
|
|
tok->markupState = MSGotmarkupTagName;
|
|
picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok);
|
|
tok->strPos = 0;
|
|
} else {
|
|
tok->markupTagErr = MEIdent;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
case MSGotmarkupTagName: case MSGotAttrValue:
|
|
if (tok_attrChar(str[i], TRUE)) {
|
|
if (tok->markupTagType == MTEnd) {
|
|
tok->markupTagErr = MEUnexpectedChar;
|
|
tok->markupState = MSError;
|
|
} else {
|
|
if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
|
|
tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
|
|
tok->strPos++;
|
|
tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
|
|
} else {
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"too many attributes in markup; ignoring");
|
|
}
|
|
tok->markupState = MSInAttrName;
|
|
}
|
|
} else {
|
|
tok->markupTagErr = MEUnexpectedChar;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
case MSInAttrName:
|
|
if (tok_attrChar(str[i], FALSE)) {
|
|
if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
|
|
tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
|
|
tok->strPos++;
|
|
tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
|
|
}
|
|
tok->markupState = MSInAttrName;
|
|
} else if (str[i] == (picoos_uchar)' ') {
|
|
picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
|
|
tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
|
|
tok->markupState = MSGotAttrName;
|
|
} else if (str[i] == (picoos_uchar)'=') {
|
|
picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
|
|
tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
|
|
tok->markupState = MSGotEqual;
|
|
} else {
|
|
tok->markupTagErr = MEMissingEqual;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
case MSGotAttrName:
|
|
if (str[i] == (picoos_uchar)'=') {
|
|
tok->markupState = MSGotEqual;
|
|
} else {
|
|
tok->markupTagErr = MEMissingEqual;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
case MSGotEqual:
|
|
if ((str[i] == (picoos_uchar)'"') || (str[i] == (picoos_uchar)'\'')) {
|
|
tok->strDelim = str[i];
|
|
tok->strPos = 0;
|
|
tok->markupState = MSInAttrValue;
|
|
} else {
|
|
tok->markupTagErr = MEMissingQuote;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
case MSInAttrValue:
|
|
if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) {
|
|
tok->markupState = MSInAttrValueEscaped;
|
|
} else if (str[i] == tok->strDelim) {
|
|
if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
|
|
tok->nrMarkupParams++;
|
|
}
|
|
tok->strPos = 0;
|
|
tok->markupState = MSGotAttrValue;
|
|
} else {
|
|
if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
|
|
tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
|
|
tok->strPos++;
|
|
tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
|
|
}
|
|
tok->markupState = MSInAttrValue;
|
|
}
|
|
break;
|
|
case MSInAttrValueEscaped:
|
|
if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
|
|
tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
|
|
tok->strPos++;
|
|
tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
|
|
}
|
|
tok->markupState = MSInAttrValue;
|
|
break;
|
|
case MSGotEndSlash:
|
|
if (str[i] == (picoos_uchar)'>') {
|
|
tok->markupState = MSGotEnd;
|
|
} else {
|
|
tok->markupTagErr = MEUnexpectedChar;
|
|
tok->markupState = MSError;
|
|
}
|
|
break;
|
|
default:
|
|
tok->markupTagErr = MEUnexpectedChar;
|
|
tok->markupState = MSError;
|
|
break;
|
|
}
|
|
}
|
|
if (tok->markupTagErr == MENone) {
|
|
tok->markupStr[tok->markupPos] = str[i];
|
|
tok->markupPos++;
|
|
} /* else restart parsing at current char */
|
|
tok->markupStr[tok->markupPos] = 0;
|
|
}
|
|
/*
|
|
PICODBG_DEBUG(("putToMarkup %s", tok->markupStr));
|
|
*/
|
|
}
|
|
|
|
/* *****************************************************************************/
|
|
|
|
static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
|
|
{
|
|
picoos_int32 i;
|
|
|
|
tok->utfpos = 0;
|
|
tok->utflen = 0;
|
|
tok->markupState = MSNotInMarkup;
|
|
for (i = 0; i < tok->markupPos; i++) {
|
|
tok_treatChar(this, tok, tok->markupStr[i], FALSE);
|
|
}
|
|
tok->markupPos = 0;
|
|
tok->strPos = 0;
|
|
}
|
|
|
|
|
|
static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok)
|
|
{
|
|
MarkupId mId;
|
|
|
|
if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) {
|
|
if (tok->markupTagErr == MENone) {
|
|
tok->markupState = MSNotInMarkup;
|
|
if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) {
|
|
tok_treatSimpleToken(this, tok);
|
|
}
|
|
tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1);
|
|
mId = tok_markupTagId(tok->markupTagName);
|
|
if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) {
|
|
tok_interpretMarkup(this, tok, TRUE, mId);
|
|
}
|
|
if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) {
|
|
tok_clearMarkupParams(tok->markupParams);
|
|
tok->nrMarkupParams = 0;
|
|
tok_interpretMarkup(this, tok, FALSE,mId);
|
|
}
|
|
}
|
|
if (tok->markupTagErr != MENone) {
|
|
if (!tok->aborted) {
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr);
|
|
}
|
|
tok_treatMarkupAsSimpleToken(this, tok);
|
|
}
|
|
} else {
|
|
tok_treatMarkupAsSimpleToken(this, tok);
|
|
}
|
|
tok->markupState = MSNotInMarkup;
|
|
tok->markupPos = 0;
|
|
tok->strPos = 0;
|
|
}
|
|
|
|
|
|
|
|
static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling)
|
|
{
|
|
picoos_int32 i, id;
|
|
picoos_uint8 uval8;
|
|
pico_tokenType type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
|
|
pico_tokenSubType subtype = -1;
|
|
picoos_bool dummy;
|
|
utf8char0c utf2;
|
|
picoos_int32 utf2pos;
|
|
|
|
if (ch == NULLC) {
|
|
tok_treatSimpleToken(this, tok);
|
|
tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
|
|
}
|
|
else {
|
|
switch (tok_putToUtf(tok, ch)) {
|
|
case UTF_CHAR_MALFORMED:
|
|
tok->utfpos = 0;
|
|
tok->utflen = 0;
|
|
break;
|
|
case UTF_CHAR_INCOMPLETE:
|
|
break;
|
|
case UTF_CHAR_COMPLETE:
|
|
markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED));
|
|
id = picoktab_graphOffset(tok->graphTab, tok->utf);
|
|
if (id > 0) {
|
|
if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) {
|
|
type = (pico_tokenType)uval8;
|
|
if (type == PICODATA_ITEMINFO1_TOKTYPE_LETTERV) {
|
|
type = PICODATA_ITEMINFO1_TOKTYPE_LETTER;
|
|
}
|
|
}
|
|
dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype);
|
|
} else if (tok->utf[tok->utfpos-1] <= (picoos_uchar)' ') {
|
|
type = PICODATA_ITEMINFO1_TOKTYPE_SPACE;
|
|
subtype = -1;
|
|
} else {
|
|
type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
|
|
subtype = -1;
|
|
}
|
|
if ((tok->utf[tok->utfpos-1] > (picoos_uchar)' ')) {
|
|
tok->nrEOL = 0;
|
|
} else if ((tok->utf[tok->utfpos-1] == EOL)) {
|
|
tok->nrEOL++;
|
|
}
|
|
if (markupHandling && (tok->markupState != MSNotInMarkup)) {
|
|
tok_putToMarkup(this, tok, tok->utf);
|
|
if (tok->markupState >= MSError) {
|
|
picoos_strlcpy(utf2, tok->utf, 5);
|
|
utf2pos = tok->utfpos;
|
|
/* treat string up to (but not including) current char as simple
|
|
token and restart markup tag parsing with current char */
|
|
tok_treatMarkupAsSimpleToken(this, tok);
|
|
for (i = 0; i < utf2pos; i++) {
|
|
tok_treatChar(this, tok, utf2[i], markupHandling);
|
|
}
|
|
} else if (tok->markupState == MSGotEnd) {
|
|
tok_treatMarkup(this, tok);
|
|
}
|
|
} else if ((markupHandling && (tok->utf[tok->utfpos-1] == (picoos_uchar)'<'))) {
|
|
tok_putToMarkup(this, tok, tok->utf);
|
|
} else if (type != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED) {
|
|
if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) {
|
|
tok_treatSimpleToken(this, tok);
|
|
} else if ((tok->utf[tok->utfpos-1] == EOL) && (tok->nrEOL == 2)) {
|
|
tok_treatSimpleToken(this, tok);
|
|
tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1);
|
|
tok_treatSimpleToken(this, tok);
|
|
}
|
|
tok_putToSimpleToken(this, tok, tok->utf, type, subtype);
|
|
} else {
|
|
tok_treatSimpleToken(this, tok);
|
|
}
|
|
tok->utfpos = 0;
|
|
tok->utflen = 0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
|
|
{
|
|
if (tok->tokenPos < IN_BUF_SIZE) {
|
|
tok->tokenStr[tok->tokenPos] = 0;
|
|
}
|
|
if (tok->markupState != MSNotInMarkup) {
|
|
if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
|
|
picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr);
|
|
}
|
|
tok_treatMarkupAsSimpleToken(this, tok);
|
|
tok_treatSimpleToken(this, tok);
|
|
} else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) {
|
|
tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr);
|
|
}
|
|
tok->tokenPos = 0;
|
|
tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
|
|
tok->tokenSubType = -1;
|
|
}
|
|
|
|
/* *****************************************************************************/
|
|
|
|
static pico_status_t tokReset(register picodata_ProcessingUnit this, picoos_int32 resetMode)
|
|
{
|
|
tok_subobj_t * tok;
|
|
MarkupId mId;
|
|
|
|
if (NULL == this || NULL == this->subObj) {
|
|
return PICO_ERR_OTHER;
|
|
}
|
|
tok = (tok_subobj_t *) this->subObj;
|
|
|
|
tok->ignLevel = 0;
|
|
|
|
tok->utfpos = 0;
|
|
tok->utflen = 0;
|
|
|
|
tok_clearMarkupParams(tok->markupParams);
|
|
tok->nrMarkupParams = 0;
|
|
tok->markupState = MSNotInMarkup;
|
|
tok->markupPos = 0;
|
|
for (mId = MIDummyStart; mId <= MIDummyEnd; mId++) {
|
|
tok->markupLevel[mId] = 0;
|
|
}
|
|
tok->markupTagName[0] = 0;
|
|
tok->markupTagType = MTNone;
|
|
tok->markupTagErr = MENone;
|
|
|
|
tok->strPos = 0;
|
|
tok->strDelim = 0;
|
|
tok->isFileAttr = FALSE;
|
|
|
|
tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
|
|
tok->tokenSubType = -1;
|
|
tok->tokenPos = 0;
|
|
|
|
tok->nrEOL = 0;
|
|
|
|
|
|
tok->markupHandlingMode = TRUE;
|
|
tok->aborted = FALSE;
|
|
|
|
tok->start = TRUE;
|
|
|
|
tok->outReadPos = 0;
|
|
tok->outWritePos = 0;
|
|
|
|
tok->saveFile[0] = 0;
|
|
|
|
|
|
tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
|
|
|
|
tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]);
|
|
PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser));
|
|
|
|
tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]);
|
|
PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser));
|
|
|
|
tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]);
|
|
PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper));
|
|
|
|
|
|
|
|
return PICO_OK;
|
|
}
|
|
|
|
static pico_status_t tokInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode)
|
|
{
|
|
/*
|
|
|
|
tok_subobj_t * tok;
|
|
|
|
if (NULL == this || NULL == this->subObj) {
|
|
return PICO_ERR_OTHER;
|
|
}
|
|
tok = (tok_subobj_t *) this->subObj;
|
|
*/
|
|
return tokReset(this, resetMode);
|
|
}
|
|
|
|
|
|
static pico_status_t tokTerminate(register picodata_ProcessingUnit this)
|
|
{
|
|
return PICO_OK;
|
|
}
|
|
|
|
static picodata_step_result_t tokStep(register picodata_ProcessingUnit this, picoos_int16 mode, picoos_uint16 * numBytesOutput);
|
|
|
|
static pico_status_t tokSubObjDeallocate(register picodata_ProcessingUnit this,
|
|
picoos_MemoryManager mm)
|
|
{
|
|
|
|
if (NULL != this) {
|
|
picoos_deallocate(this->common->mm, (void *) &this->subObj);
|
|
}
|
|
mm = mm; /* avoid warning "var not used in this function"*/
|
|
return PICO_OK;
|
|
}
|
|
|
|
picodata_ProcessingUnit picotok_newTokenizeUnit(picoos_MemoryManager mm, picoos_Common common,
|
|
picodata_CharBuffer cbIn, picodata_CharBuffer cbOut,
|
|
picorsrc_Voice voice)
|
|
{
|
|
tok_subobj_t * tok;
|
|
picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
|
|
if (this == NULL) {
|
|
return NULL;
|
|
}
|
|
this->initialize = tokInitialize;
|
|
PICODBG_DEBUG(("set this->step to tokStep"));
|
|
this->step = tokStep;
|
|
this->terminate = tokTerminate;
|
|
this->subDeallocate = tokSubObjDeallocate;
|
|
this->subObj = picoos_allocate(mm, sizeof(tok_subobj_t));
|
|
if (this->subObj == NULL) {
|
|
picoos_deallocate(mm, (void *)&this);
|
|
return NULL;
|
|
}
|
|
tok = (tok_subobj_t *) this->subObj;
|
|
tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2));
|
|
if (NULL == tok->transducer) {
|
|
tokSubObjDeallocate(this,mm);
|
|
picoos_deallocate(mm, (void *)&this);
|
|
return NULL;
|
|
}
|
|
tokInitialize(this, PICO_RESET_FULL);
|
|
return this;
|
|
}
|
|
|
|
/**
|
|
* fill up internal buffer, try to locate token, write token to output
|
|
*/
|
|
picodata_step_result_t tokStep(register picodata_ProcessingUnit this,
|
|
picoos_int16 mode, picoos_uint16 * numBytesOutput)
|
|
{
|
|
register tok_subobj_t * tok;
|
|
|
|
if (NULL == this || NULL == this->subObj) {
|
|
return PICODATA_PU_ERROR;
|
|
}
|
|
tok = (tok_subobj_t *) this->subObj;
|
|
|
|
mode = mode; /* avoid warning "var not used in this function"*/
|
|
|
|
*numBytesOutput = 0;
|
|
while (1) { /* exit via return */
|
|
picoos_int16 ch;
|
|
|
|
if ((tok->outWritePos - tok->outReadPos) > 0) {
|
|
if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) {
|
|
PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
|
|
(picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos);
|
|
tok->outReadPos += *numBytesOutput;
|
|
if (tok->outWritePos == tok->outReadPos) {
|
|
tok->outWritePos = 0;
|
|
tok->outReadPos = 0;
|
|
}
|
|
}
|
|
else {
|
|
return PICODATA_PU_OUT_FULL;
|
|
}
|
|
|
|
}
|
|
else if (PICO_EOF != (ch = picodata_cbGetCh(this->cbIn))) {
|
|
PICODBG_DEBUG(("read in %c", (picoos_char) ch));
|
|
tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE);
|
|
}
|
|
else {
|
|
return PICODATA_PU_IDLE;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
/* end */
|