direcs  2012-09-30
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
speak_lib.h
Go to the documentation of this file.
1 #ifndef SPEAK_LIB_H
2 #define SPEAK_LIB_H
3 /***************************************************************************
4  * Copyright (C) 2005 to 2007 by Jonathan Duddington *
5  * email: jonsd@users.sourceforge.net *
6  * *
7  * This program is free software; you can redistribute it and/or modify *
8  * it under the terms of the GNU General Public License as published by *
9  * the Free Software Foundation; either version 3 of the License, or *
10  * (at your option) any later version. *
11  * *
12  * This program is distributed in the hope that it will be useful, *
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15  * GNU General Public License for more details. *
16  * *
17  * You should have received a copy of the GNU General Public License *
18  * along with this program; if not, see: *
19  * <http://www.gnu.org/licenses/>. *
20  ***************************************************************************/
21 
22 
23 /*************************************************************/
24 /* This is the header file for the library version of espeak */
25 /* */
26 /*************************************************************/
27 
28 #include <stdio.h>
29 
30 #define ESPEAK_API_REVISION 3
31 /*
32 Revision 2
33  Added parameter "options" to eSpeakInitialize()
34 
35 Revision 3
36  Added espeakWORDGAP to espeak_PARAMETER
37 
38 */
39  /********************/
40  /* Initialization */
41  /********************/
42 
43 
44 typedef enum {
45  espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
46  espeakEVENT_WORD = 1, // Start of word
47  espeakEVENT_SENTENCE, // Start of sentence
49  espeakEVENT_PLAY, // Audio element
50  espeakEVENT_END, // End of sentence
51  espeakEVENT_MSG_TERMINATED, // End of message
52  espeakEVENT_PHONEME // Phoneme, if enabled in espeak_Initialize()
54 
55 
56 
57 typedef struct {
59  unsigned int unique_identifier; // message identifier (or 0 for key or character)
60  int text_position; // the number of characters from the start of the text
61  int length; // word length, in characters (for espeakEVENT_WORD)
62  int audio_position; // the time in mS within the generated speech output data
63  int sample; // sample id (internal use)
64  void* user_data; // pointer supplied by the calling program
65  union {
66  int number; // used for WORD and SENTENCE events. For PHONEME events this is the phoneme mnemonic.
67  const char *name; // used for MARK and PLAY events. UTF8 string
68  } id;
69 } espeak_EVENT;
70 /*
71  When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.
72 
73 
74  In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).
75 
76  In PLAYBACK mode, the callback function is called as soon as an event happens.
77 
78  For example suppose that the following message is supplied to espeak_Synth:
79  "hello, hello."
80 
81 
82  * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :
83 
84  ** Block 1:
85  <audio data> +
86  List of events: SENTENCE + WORD + LIST_TERMINATED
87 
88  ** Block 2:
89  <audio data> +
90  List of events: WORD + END + LIST_TERMINATED
91 
92  ** Block 3:
93  no audio data
94  List of events: MSG_TERMINATED + LIST_TERMINATED
95 
96 
97  * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:
98 
99  ** SENTENCE
100  ** WORD (call when the sounds are actually played)
101  ** WORD
102  ** END (call when the end of sentence is actually played.)
103  ** MSG_TERMINATED
104 
105 
106  The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
107  So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.
108 
109  A MARK event indicates a <mark> element in the text.
110  A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
111 */
112 
113 
114 
115 typedef enum {
120 
121 
122 typedef enum {
123  /* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
125 
126  /* RETRIEVAL mode: supplies audio data and events to the calling program */
128 
129  /* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
131 
132  /* Synchronous playback */
134 
136 
137 
138 typedef enum {
139  EE_OK=0,
143 } espeak_ERROR;
144 
145 
146 #ifdef __cplusplus
147 extern "C"
148 #endif
149 int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
150 /* Must be called before any synthesis functions are called.
151  output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
152 
153  buflength: The length in mS of sound buffers passed to the SynthCallback function.
154 
155  path: The directory which contains the espeak-data directory, or NULL for the default location.
156 
157  options: bit 0: 1=allow espeakEVENT_PHONEME events.
158 
159 
160  Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
161 */
162 
163 typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
164 
165 #ifdef __cplusplus
166 extern "C"
167 #endif
168 void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
169 /* Must be called before any synthesis functions are called.
170  This specifies a function in the calling program which is called when a buffer of
171  speech sound data has been produced.
172 
173 
174  The callback function is of the form:
175 
176 int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
177 
178  wav: is the speech sound data which has been produced.
179  NULL indicates that the synthesis has been completed.
180 
181  numsamples: is the number of entries in wav. This number may vary, may be less than
182  the value implied by the buflength parameter given in espeak_Initialize, and may
183  sometimes be zero (which does NOT indicate end of synthesis).
184 
185  events: an array of espeak_EVENT items which indicate word and sentence events, and
186  also the occurance if <mark> and <audio> elements within the text. The list of
187  events is terminated by an event of type = 0.
188 
189 
190  Callback returns: 0=continue synthesis, 1=abort synthesis.
191 */
192 
193 #ifdef __cplusplus
194 extern "C"
195 #endif
196 void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
197 /* This function may be called before synthesis functions are used, in order to deal with
198  <audio> tags. It specifies a callback function which is called when an <audio> element is
199  encountered and allows the calling program to indicate whether the sound file which
200  is specified in the <audio> element is available and is to be played.
201 
202  The callback function is of the form:
203 
204 int UriCallback(int type, const char *uri, const char *base);
205 
206  type: type of callback event. Currently only 1= <audio> element
207 
208  uri: the "src" attribute from the <audio> element
209 
210  base: the "xml:base" attribute (if any) from the <speak> element
211 
212  Return: 1=don't play the sound, but speak the text alternative.
213  0=place a PLAY event in the event list at the point where the <audio> element
214  occurs. The calling program can then play the sound at that point.
215 */
216 
217 
218  /********************/
219  /* Synthesis */
220  /********************/
221 
222 
223 #define espeakCHARS_AUTO 0
224 #define espeakCHARS_UTF8 1
225 #define espeakCHARS_8BIT 2
226 #define espeakCHARS_WCHAR 3
227 
228 #define espeakSSML 0x10
229 #define espeakPHONEMES 0x100
230 #define espeakENDPAUSE 0x1000
231 #define espeakKEEP_NAMEDATA 0x2000
232 
233 #ifdef __cplusplus
234 extern "C"
235 #endif
236 espeak_ERROR espeak_Synth(const void *text,
237  size_t size,
238  unsigned int position,
239  espeak_POSITION_TYPE position_type,
240  unsigned int end_position,
241  unsigned int flags,
242  unsigned int* unique_identifier,
243  void* user_data);
244 /* Synthesize speech for the specified text. The speech sound data is passed to the calling
245  program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
246 
247  text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
248  wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags"
249  parameter.
250 
251  size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order
252  to allocate internal storage space for the text. This value is not used for
253  AUDIO_OUTPUT_SYNCHRONOUS mode.
254 
255  position: The position in the text where speaking starts. Zero indicates speak from the
256  start of the text.
257 
258  position_type: Determines whether "position" is a number of characters, words, or sentences.
259  Values:
260 
261  end_position: If set, this gives a character position at which speaking will stop. A value
262  of zero indicates no end position.
263 
264  flags: These may be OR'd together:
265  Type of character codes, one of:
266  espeakCHARS_UTF8 UTF8 encoding
267  espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language.
268  espeakCHARS_AUTO 8 bit or UTF8 (this is the default)
269  espeakCHARS_WCHAR Wide characters (wchar_t)
270 
271  espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored.
272 
273  espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirshenbaum encoding).
274 
275  espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then
276  this pause is suppressed.
277 
278  unique_identifier: message identifier; helpful for identifying later
279  data supplied to the callback.
280 
281  user_data: pointer which will be passed to the callback function.
282 
283  Return: EE_OK: operation achieved
284  EE_BUFFER_FULL: the command can not be buffered;
285  you may try after a while to call the function again.
286  EE_INTERNAL_ERROR.
287 */
288 
289 #ifdef __cplusplus
290 extern "C"
291 #endif
292 espeak_ERROR espeak_Synth_Mark(const void *text,
293  size_t size,
294  const char *index_mark,
295  unsigned int end_position,
296  unsigned int flags,
297  unsigned int* unique_identifier,
298  void* user_data);
299 /* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is
300  specified by the name of a <mark> element in the text.
301 
302  index_mark: The "name" attribute of a <mark> element within the text which specified the
303  point at which synthesis starts. UTF8 string.
304 
305  For the other parameters, see espeak_Synth()
306 
307  Return: EE_OK: operation achieved
308  EE_BUFFER_FULL: the command can not be buffered;
309  you may try after a while to call the function again.
310  EE_INTERNAL_ERROR.
311 */
312 
313 #ifdef __cplusplus
314 extern "C"
315 #endif
316 espeak_ERROR espeak_Key(const char *key_name);
317 /* Speak the name of a keyboard key.
318  Currently this just speaks the "key_name" as given
319 
320  Return: EE_OK: operation achieved
321  EE_BUFFER_FULL: the command can not be buffered;
322  you may try after a while to call the function again.
323  EE_INTERNAL_ERROR.
324 */
325 
326 #ifdef __cplusplus
327 extern "C"
328 #endif
329 espeak_ERROR espeak_Char(wchar_t character);
330 /* Speak the name of the given character
331 
332  Return: EE_OK: operation achieved
333  EE_BUFFER_FULL: the command can not be buffered;
334  you may try after a while to call the function again.
335  EE_INTERNAL_ERROR.
336 */
337 
338 
339 
340 
341  /***********************/
342  /* Speech Parameters */
343  /***********************/
344 
345 typedef enum {
346  espeakSILENCE=0, /* internal use */
354  espeakOPTIONS=8, // reserved for misc. options. not yet used
356 
359  espeakEMPHASIS, /* internal use */
360  espeakLINELENGTH, /* internal use */
361  espeakVOICETYPE, // internal, 1=mbrola
362  N_SPEECH_PARAM /* last enum */
364 
365 typedef enum {
370 
371 #ifdef __cplusplus
372 extern "C"
373 #endif
374 espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
375 /* Sets the value of the specified parameter.
376  relative=0 Sets the absolute value of the parameter.
377  relative=1 Sets a relative value of the parameter.
378 
379  parameter:
380  espeakRATE: speaking speed in word per minute.
381 
382  espeakVOLUME: volume in range 0-100 0=silence
383 
384  espeakPITCH: base pitch, range 0-100. 50=normal
385 
386  espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
387 
388  espeakPUNCTUATION: which punctuation characters to announce:
389  value in espeak_PUNCT_TYPE (none, all, some),
390  see espeak_GetParameter() to specify which characters are announced.
391 
392  espeakCAPITALS: announce capital letters by:
393  0=none,
394  1=sound icon,
395  2=spelling,
396  3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch
397  of a word raised to indicate it has a capital letter.
398 
399  espeakWORDGAP: pause between words, units of 10mS (at the default speed)
400 
401  Return: EE_OK: operation achieved
402  EE_BUFFER_FULL: the command can not be buffered;
403  you may try after a while to call the function again.
404  EE_INTERNAL_ERROR.
405 */
406 
407 #ifdef __cplusplus
408 extern "C"
409 #endif
410 int espeak_GetParameter(espeak_PARAMETER parameter, int current);
411 /* current=0 Returns the default value of the specified parameter.
412  current=1 Returns the current value of the specified parameter, as set by SetParameter()
413 */
414 
415 #ifdef __cplusplus
416 extern "C"
417 #endif
418 espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
419 /* Specified a list of punctuation characters whose names are to be spoken when the
420  value of the Punctuation parameter is set to "some".
421 
422  punctlist: A list of character codes, terminated by a zero character.
423 
424  Return: EE_OK: operation achieved
425  EE_BUFFER_FULL: the command can not be buffered;
426  you may try after a while to call the function again.
427  EE_INTERNAL_ERROR.
428 */
429 
430 #ifdef __cplusplus
431 extern "C"
432 #endif
433 void espeak_SetPhonemeTrace(int value, FILE *stream);
434 /* Controls the output of phoneme symbols for the text
435  value=0 No phoneme output (default)
436  value=1 Output the translated phoneme symbols for the text
437  value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries)
438 
439  stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout.
440 */
441 
442 #ifdef __cplusplus
443 extern "C"
444 #endif
445 void espeak_CompileDictionary(const char *path, FILE *log);
446 /* Compile pronunciation dictionary for a language which corresponds to the currently
447  selected voice. The required voice should be selected before calling this function.
448 
449  path: The directory which contains the language's '_rules' and '_list' files.
450  'path' should end with a path separator character ('/').
451  log: Stream for error reports and statistics information. If log=NULL then stderr will be used.
452 */
453  /***********************/
454  /* Voice Selection */
455  /***********************/
456 
457 
458 // voice table
459 typedef struct {
460  const char *name; // a given name for this voice. UTF8 string.
461  const char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier)
462  const char *identifier; // the filename for this voice within espeak-data/voices
463  unsigned char gender; // 0=none 1=male, 2=female,
464  unsigned char age; // 0=not specified, or age in years
465  unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
466  unsigned char xx1; // for internal use
467  int score; // for internal use
468  void *spare; // for internal use
469 } espeak_VOICE;
470 
471 /* Note: The espeak_VOICE structure is used for two purposes:
472  1. To return the details of the available voices.
473  2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria.
474 
475  In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
476  may be used, each language name in the list is terminated by a zero byte and is also preceded by
477  a single byte which gives a "priority" number. The list of languages is terminated by an
478  additional zero byte.
479 
480  A language name consists of a language code, optionally followed by one or more qualifier (dialect)
481  names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and
482  "en". Even without "en" listed, voice would still be selected for the "en" language (because
483  "en-uk" is related) but at a lower priority.
484 
485  The priority byte indicates how the voice is preferred for the language. A low number indicates a
486  more preferred voice, a higher number indicates a less preferred voice.
487 
488  In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
489  priority byte.
490 */
491 
492 #ifdef __cplusplus
493 extern "C"
494 #endif
495 const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
496 /* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers.
497  The list is terminated by a NULL pointer
498 
499  If voice_spec is NULL then all voices are listed.
500  If voice spec is give, then only the voices which are compatible with the voice_spec
501  are listed, and they are listed in preference order.
502 */
503 
504 #ifdef __cplusplus
505 extern "C"
506 #endif
507 espeak_ERROR espeak_SetVoiceByName(const char *name);
508 /* Searches for a voice with a matching "name" field. Language is not considered.
509  "name" is a UTF8 string.
510 
511  Return: EE_OK: operation achieved
512  EE_BUFFER_FULL: the command can not be buffered;
513  you may try after a while to call the function again.
514  EE_INTERNAL_ERROR.
515 */
516 
517 #ifdef __cplusplus
518 extern "C"
519 #endif
521 /* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following
522  fields may be set:
523 
524  name NULL, or a voice name
525 
526  languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
527 
528  gender 0=not specified, 1=male, 2=female
529 
530  age 0=not specified, or an age in years
531 
532  variant After a list of candidates is produced, scored and sorted, "variant" is used to index
533  that list and choose a voice.
534  variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
535 */
536 
537 #ifdef __cplusplus
538 extern "C"
539 #endif
541 /* Returns the espeak_VOICE data for the currently selected voice.
542  This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
543 */
544 
545 #ifdef __cplusplus
546 extern "C"
547 #endif
549 /* Stop immediately synthesis and audio output of the current text. When this
550  function returns, the audio output is fully stopped and the synthesizer is ready to
551  synthesize a new message.
552 
553  Return: EE_OK: operation achieved
554  EE_INTERNAL_ERROR.
555 */
556 
557 
558 #ifdef __cplusplus
559 extern "C"
560 #endif
561 int espeak_IsPlaying(void);
562 /* Returns 1 if audio is played, 0 otherwise.
563 */
564 
565 #ifdef __cplusplus
566 extern "C"
567 #endif
569 /* This function returns when all data have been spoken.
570  Return: EE_OK: operation achieved
571  EE_INTERNAL_ERROR.
572 */
573 
574 #ifdef __cplusplus
575 extern "C"
576 #endif
578 /* last function to be called.
579  Return: EE_OK: operation achieved
580  EE_INTERNAL_ERROR.
581 */
582 
583 
584 #ifdef __cplusplus
585 extern "C"
586 #endif
587 const char *espeak_Info(void* ptr);
588 /* Returns the version number string.
589  The parameter is for future use, and should be set to NULL
590 */
591 #endif