1 '''
2 Defines the base class for all audio output devices.
3
4 @author: Brett Clippingdale
5 @author: Peter Parente
6 @organization: IBM Corporation
7 @copyright: Copyright (c) 2005, 2007 IBM Corporation
8
9 @author: Frank Zenker
10 @organization: IT Science Center Ruegen gGmbH, Germany
11 @copyright: Copyright (c) 2007, 2008 ITSC Ruegen
12
13 @license: I{The BSD License}
14 All rights reserved. This program and the accompanying materials are made
15 available under the terms of the BSD license which accompanies
16 this distribution, and is available at
17 U{http://www.opensource.org/licenses/bsd-license.php}
18 '''
19 import time
20 import AccessEngine
21 import AccessEngine.AEDevice.AEOutput
22 from AccessEngine.AEDevice.AEOutput.Style import Style
23 from AccessEngine.AEDevice.AEOutput.Word import *
24 from AccessEngine import AEConstants
25 from Tools.i18n import _
26 from ThreadProxy import *
27 from AccessEngine.AEConstants import *
28
30 '''
31 Defines the basic style attributes for all L{AEOutput.Audio} devices. Most
32 attributes documented below do not exist in this base class. It is the job
33 for a specific output device definition to add new settings to its own style
34 class class if it supports any of the missing concepts. Only those settings
35 related to text parsing are pre-defined here for use by the default
36 implementation of parseString in L{AEOutput.Audio}.
37
38 CapString (string): Set to the string that should be presented prior to the
39 spelling of capitalized letters and presented twice before spelling fully
40 capitalized words. Defaults to 'cap'.
41
42 Volume (range): Current volume
43
44 Rate (range): Current speech rate
45
46 Pitch (range): Current baseline pitch
47
48 Voice (enum): Current voice number
49
50 PosX (integer): Current spatial position x-coordinate, left negative
51
52 PosY (integer): Current spatial position y-coordinate, down negative
53
54 PosZ (integer): Current spatial position z-coordinate, behind negative
55
56 Language (choice): Current language in use by the speech synthesizer
57 formatted according to the syntax set forth in RFC 4646 and drawn from the
58 registry created by RFC 4645. See U{http://www.ietf.org/rfc/rfc4646.txt} and
59 U{http://www.iana.org/assignments/language-subtag-registry}. If an engine
60 implements some unknown derivation, use the 'x' private marker in the name
61 and continue with additional designations. See IBMSpeech for an example
62 implementation. The device is responsible for case-insensitive comparisons.
63
64 Instrument (enum): Current MIDI instrument number
65
66 Continuous (bool): Is output continuous (i.e. looping)?
67
68 Channel (integer): Channel number indicating concurrency of output. Styles
69 sharing a channel number are output sequentially. Styles having different
70 channel numbers are played concurrently if possible.
71
72 Stoppable (bool): Can output be stopped?
73
74 Speakable (bool): Can output be spoken?
75
76 Soundable (bool): Can output be played as a non-speech sound?
77
78 Gender (enum): Speech vocal tract gender
79
80 Aspiration (range): Speech breathiness
81
82 Frication (range): Emphasis of fricatives in speech
83
84 Intonation (range): Speech pitch variation
85
86 HeadSize (range): Speech speaker head size for reverberation
87
88 SpellCaps (bool): When set to True, fully capitalized words will be spelled
89 for presentation if the device supports spelling.
90
91 Blank (string): String to substitute for blank output. Defaults to 'blank'
92 localized.
93
94 CapExpand (bool): When set to True, a space will be inserted before
95 embedded capital letters in the presentation of a word. Defaults to True.
96
97 NumExpand (bool): When set to True, a space will be inserted before embedded
98 numbers in the presentation of a word. Defaults to False.
99
100 SpellFormat (enum): Set to FORMAT_TEXT to present words without additional
101 spelling. Set to FORMAT_PRONOUNCE to spell punctuation characters for
102 presentation. Set to FORMAT_SPELL to spell all characters for
103 presentation. Set to FORMAT_PHONETIC to spell all characters phonetically
104 for presentation. Defaults to FORMAT_TEXT. Any values other than
105 FORMAT_TEXT may be ignored by a device.
106 '''
108 '''
109 Called automatically by the L{Style} base class when this style is a
110 default. Initializes all default audio parsing options. If you want these
111 settings created, be sure to call this base class implementation if you
112 override in a subclass. Defaults to creating all
113 L{AccessEngine.AEDevice.AEOutput.Word.WordState} settings.
114 '''
115 self._initWordSettings()
116 self.newString('CapString', 'cap', _('Cap string'),
117 _('String to say once before spelling capitalized letters '
118 ' and twice before spelling fully capitalized words.'))
119 self.newString('Blank', _('blank'), _('Blank string'),
120 _('String to speak when a blank or ignored character is '
121 'output on its own.'))
122 self.newBool('CapExpand', True, _('Expand caps?'),
123 _('When set, insert a space before each capital letter in the '
124 'output.'))
125 self.newBool('NumExpand', False, _('Expand numbers?'),
126 _('When set, insert a space before each number in the '
127 'output.'))
128 self.newEnum('SpellFormat', FORMAT_TEXT, _('Pronunciation rule'),
129 {_('Pronounce words') : FORMAT_TEXT,
130 _('Pronounce punctuation') : FORMAT_PRONOUNCE,
131 _('Pronounce characters') : FORMAT_SPELL,
132 _('Pronounce phonetic') : FORMAT_PHONETIC},
133 _('Defines what and how characters are pronounced.'), False)
134
136 '''
137 Builds a group containing settings that should be available for all audio
138 devices given that the L{Word} parser and the
139 L{Audio.parseString} methods support them without any extra
140 work on the part of the device.
141
142 @param group: Parent group for the word settings group
143 @type group: L{AEState.Setting.Group}
144 @return: Group containing the word settings, can be extended as needed
145 @rtype: L{AEState.Setting.Group}
146 '''
147 w = group.newGroup(_('Words'))
148 w.extend(['WordDef',
149 'SpellFormat',
150 'MaxRepeat',
151 'Ignore',
152 'Blank',
153 'CapString',
154 'Caps',
155 'CapExpand',
156 'NumExpand'])
157 return w
158
159 -class Audio(AEOutput.AEOutput):
160 '''
161 Defines the base class for all audio output devices. Provides default
162 implementations of L{sendStringSync} and L{parseString} specific to audio
163 devices.
164
165 Some methods defined here raise NotImplementedError to ensure that derived
166 classes create appropriate implementions. Implementing L{sendIndex} is
167 optional. The L{AccessEngine.AEDeviceManager} catches the not implemented
168 exception in this case.
169
170 @ivar listeners: List of callables that should be notified when a marker
171 inserted with L{sendIndex} is encountered
172 @type listeners: list
173 '''
174 STYLE = AudioStyle
175
182
184 '''
185 @return: 'audio' as the only capability of this device.
186 @rtype: list of string
187 '''
188 return ['audio']
189
191 '''
192 Adds a listener that should be notified when speech has progressed to the
193 point where a marker was inserted with L{sendIndex}.
194
195 @param listener: The method that should be called when markers received.
196 @type listener: callable
197 '''
198 self.listeners.append(listener)
199
201 '''
202 Removes the specified listener.
203
204 @param listener: The method that should no longer be called when markers
205 received.
206 @type listener: callable
207 @raise ValueError: When the given listener is not already registered
208 '''
209 self.listeners.remove(listener)
210
212 '''
213 Looks at the L{USE_THREAD <AccessEngine.AEDevice.AEOutput.Base.AEOutput.USE_THREAD>}
214 flag to see if the device implementing this interface wants a thread proxy
215 or not.
216
217 @return: self or L{ThreadProxy.AudioThreadProxy}
218 @rtype: L{AEOutput}
219 '''
220 if self.USE_THREAD == True:
221 return AudioThreadProxy(self)
222 elif self.USE_THREAD == False:
223 return self
224 else:
225 raise NotImplementedError('USE_THREAD not specified')
226
227 - def _parseMain(self, word, style, ch, part):
228 '''
229 Adds support for the L{AEOutput.Style.AudioStyle} settings of
230 CapExpand and NumExpand. This method is notified during parsing of the main
231 part of a L{Word}.
232
233 @param word: Word currently parsing its source text
234 @type word: L{Word}
235 @param style: Style used to configure the parsing process
236 @type style: L{AEOutput.Style.AudioStyle}
237 @param ch: Character in the word
238 @type ch: string
239 @param part: List of characters already processed in this part of the word
240 @type part: list
241 @return: Character(s) to be appended to the list
242 @rtype: string
243 '''
244 sf = style.SpellFormat
245 if sf and (word.isPunctuation(ch) or word.isSymbol(ch)):
246
247 if part:
248 part.append(', ')
249
250 return word.getCharName(ch) or ch
251 elif sf == AEConstants.FORMAT_SPELL:
252
253 if part:
254 part.append(', ')
255 try:
256
257 return AEConstants.SPELLED_MAP[ch]
258 except KeyError:
259
260 return _('uni-code %s') % word.getCharValue(ch)
261 elif sf == AEConstants.FORMAT_PHONETIC:
262
263 if part:
264 part.append(', ')
265 try:
266
267 return AEConstants.NATO_MAP[ch]
268 except KeyError:
269
270 return _('uni-code %s %s') % (word.getCharValue(ch),
271 word.getCharDescription(ch))
272
273
274 if style.CapExpand and word.isCap(ch) and word.getMainLength():
275 part.append(' ')
276 elif style.NumExpand and word.isNumeric(ch) and word.getMainLength():
277 part.append(' ')
278
279 return ch
280
282 '''
283 Provides a default implementation of parsing that formats words for audio
284 devices supporting speech output. The base L{Word} class is used plus some
285 additional processing for:
286
287 - blank words
288 - expanded caps
289 - expanded numbers
290 - spelling format
291 - individual characters
292
293 @param text: Text to be parsed
294 @type text: string
295 @param style: Style object defining how the text should be parsed
296 @type style: L{AEOutput.Style}
297 @param por: Point of regard for the first character in the text, or None if
298 the text is not associated with a POR
299 @type por: L{AEPor}
300 @param sem: Semantic tag for the text to aid parsing
301 @type sem: integer
302 @return: Parsed words
303 @rtype: 3-tuple of lists of string, L{AEPor}, L{AEOutput.Style}
304 '''
305 rv = []
306
307 words = buildWordsFromString(text, por, style, self._parseMain)
308 for i, w in enumerate(words):
309 ns = None
310 if len(text) == 1 and style.SpellFormat == AEConstants.FORMAT_TEXT:
311
312
313 try:
314
315 ns = AEConstants.SPELLED_MAP[text]
316 except KeyError:
317 ns = text
318
319 if w.isAllBlank():
320 if i == 0 and len(words) > 1:
321
322
323 continue
324
325 if sem != AEConstants.SEM_CHAR:
326 ns = style.Blank
327
328 if ns is None:
329
330 ns = unicode(w)
331 if por is None:
332
333 por = None
334 else:
335 por = w.getPOR()
336
337 rv.append((ns, por, style))
338 return rv
339
340 - def send(self, name, value, style=None):
372
374 '''
375 Sends a string of one or more characters to the device. The style object
376 is used by the device in deciding how the given text should be presented.
377
378 @param text: Text to send to the device
379 @type text: string
380 @param style: Style with which this text should be output
381 @type style: L{AEOutput.Style}
382 @raise NotImplementedError: When not overridden in a subclass
383 '''
384 raise NotImplementedError
385
387 '''
388 Sends a string filename to the device, the contents of which should be
389 output. The style object is used by the device in decided how the given
390 text should be presented.
391
392 Typically, this method should be implemented by an audio device that
393 supports playback of waveform or sequencer files. It might also be used
394 by devices as a way of synthesizing the entire contents of a file without
395 having to pass all of the contents through the rest of the system.
396
397 @param fn: Absolute filename
398 @type fn: string
399 @param style: Style with which this text should be output
400 @type style: L{AEOutput.Style}
401 @raise NotImplementedError: When not overridden in a subclass
402 '''
403 raise NotImplementedError
404
406 '''
407 Purges buffered text and styles, and interrupts on-going output.
408
409 @param style: Style indicating which channel on which the stop should be
410 performed; None indicates stop on all channels
411 @type style: L{AEOutput.Style}
412 @raise NotImplementedError: When not overridden in a subclass
413 '''
414 raise NotImplementedError
415
417 '''
418 Indicates all text buffered by L{sendString} should now be output.
419 For devices that do the buffering in the driver, this action may mean
420 simply sending the command. For devices that do not buffer, this action
421 means sending text and styles buffered in the SUE device definition.
422
423 @param style: Style indicating which channel on which the talk should be
424 performed; None indicates talk on all channels
425 @type style: L{AEOutput.Style}
426 @raise NotImplementedError: When not overridden in a subclass
427 '''
428 raise NotImplementedError
429
431 '''
432 Inserts a marker in the output stream. The device should notify all
433 listeners when the marker is reached. The marker is typically a
434 monotonically increase integer number mod the maximum integer value
435 supported by the device.
436
437 @param style: Style indicating which channel on which the marker should be
438 inserted
439 @type style: L{AEOutput.Style}
440 @return: Unique marker identifying the index inserted
441 @rtype: integer
442 @raise NotImplementedError: When not overridden in a subclass
443 '''
444 raise NotImplementedError
445
447 '''
448 Buffers a complete string to send to the device synchronously.
449
450 This should B{not} be used in place of L{sendString} since this will not
451 return until the string is finished being output. This is provided B{only}
452 for the convenience of utility writers. Uses L{sendStop}, L{sendString},
453 L{sendTalk}, and then sleeps until L{AEOutput.isActive
454 <AccessEngine.AEDevice.AEOutput.Base.AEOutput.isActive>} returns False.
455
456 This method sends the stop, string, and talk commands directly to the
457 device, bypassing any thread proxy. This could be problematic if the audio
458 library in question cannot be used in more than one thread.
459
460 @param text: String to send to the device
461 @type text: string
462 @param style: Style on which this string should be output; None implies
463 some reasonable default should be used
464 @type style: L{AEOutput.Style}
465 '''
466 if style is None:
467 style = self.default_style
468 self.send(AEConstants.CMD_STOP, None, None)
469 self.send(AEConstants.CMD_STRING, text, style)
470 self.send(AEConstants.CMD_TALK, None, None)
471
472
473 while self.isActive():
474 time.sleep(1)
475
477 '''
478 Maps a language tag to the closest one possible as supported by this
479 device. The result may be an exact match or only a partial match. If
480 absolutely nothing matches the requested tag, not even starting at the
481 major language, None should be returned.
482
483 @param tag: IANA language tag, lower case
484 @type tag: string
485 @return: IANA language tag or None
486 @rtype: string
487 '''
488 raise NotImplementedError
489