Package AccessEngine :: Package AEDevice :: Package AEOutput :: Module Word
[hide private]
[frames] | no frames]

Source Code for Module AccessEngine.AEDevice.AEOutput.Word

  1  ''' 
  2  Defines classes and functions for parsing bodies of text to find words and  
  3  prepare them for output to the user. 
  4   
  5  The top-level functions in this module are optimized to build L{Word}s from 
  6  bodies of text containing more than a single L{Word}. A chunking scheme based 
  7  on the average length of words in the English language reduces the number of 
  8  calls to L{Word.append} and generally outperforms single character at a time 
  9  processing (at least for English text). 
 10   
 11  @var VOWELS: Vowels in the used to determine if a word can be spoken 
 12  @type VOWELS: string 
 13   
 14  @author: Peter Parente 
 15  @author: Larry Weiss 
 16  @organization: IBM Corporation 
 17  @copyright: Copyright (c) 2005, 2007 IBM Corporation 
 18  @license: The BSD License 
 19   
 20  All rights reserved. This program and the accompanying materials are made 
 21  available under the terms of the BSD license which accompanies 
 22  this distribution, and is available at 
 23  U{http://www.opensource.org/licenses/bsd-license.php} 
 24  ''' 
 25  import unicodedata as ud 
 26  from AccessEngine import AEState 
 27  from AccessEngine.AEPor import AEPor 
 28  from AccessEngine.AEConstants import WORD_NON_BLANK, WORD_ALPHABETIC, WORD_ALPHA_NUMERIC, \ 
 29       WORD_ALPHA_PUNCT, WORD_ALPHA_NUMERIC_PUNCT, WORD_LAST 
 30  from Tools.i18n import _ 
 31   
 32  # define vowels if there are any in a language 
 33  VOWELS = _('AEIOUYaeiouy') 
 34   
35 -class WordState(AEState.AEState):
36 ''' 37 Settings for L{Word} parsing. This class contains the set of all settings 38 that will be respected by the parser. Subclasses may override these settings 39 40 The following variables are not truly instance variables, but are proxied 41 by L{AEState.Setting} objects. 42 43 Caps (bool): When set to True, capitalization is preserved when presenting 44 text. Defaults to True. 45 46 MaxRepeat (integer): Specifies the minimum number of times a character must 47 be found in sequence before it is considered a repeat. Defaults to 4. 48 49 WordDef (enum): Set to NON_BLANK to define the main part of a word to only 50 include non-blank characters. Set to ALPHABETIC to define the main part 51 of a word to only include alphabetic characters. Set to ALPHA_NUMERIC to 52 define the main part of a word to only include alphabetic and numeric 53 characters. Set to ALPHA_NUMERIC_PUNCT to define the main part of a word 54 to only include characters that are alphabetic, numeric, or punctuation. 55 Defaults to NON_BLANK. 56 57 Ignore (string): Set to a string of characters that should be treated as if 58 they were blank. Defaults to the NBSP character. 59 '''
60 - def __init__(self):
61 ''' 62 Initializes all settings objects. 63 ''' 64 super(WordState, self).__init__() 65 self._initWordSettings()
66
67 - def _initWordSettings(self):
68 ''' 69 Creates all settings for L{Word} parsing. 70 ''' 71 self.newBool('Caps', True, _('Preserve caps?'), 72 _('When set, capitalization is preserved in the output. ' 73 'Otherwise, all strings are lowercased.')) 74 self.newNumeric('MaxRepeat', 4, _('Maximum repeat'), 1, 100, 0, 75 _('Defines the maximum number of times a ' 76 'character should be allowed to repeat without the word ' 77 'being spelled.')) 78 self.newEnum('WordDef', WORD_NON_BLANK, _('Word definition'), 79 {_('Non-blank') : WORD_NON_BLANK, 80 _('Alphanumeric and punct.') : WORD_ALPHA_NUMERIC_PUNCT, 81 _('Alphanumeric') : WORD_ALPHA_NUMERIC, 82 _('Alpha and punct.') : WORD_ALPHA_PUNCT, 83 _('Alphabetic') : WORD_ALPHABETIC}, 84 _('Defines the characters that comprise a word.')) 85 self.newString('Ignore', u'\u00a0', _('Ignored characters'), 86 _('Defines the characters to be treated as blanks.'))
87
88 -class DefaultWordState(object):
89 ''' 90 Dummy L{WordState} look-alike used by L{getContextFromString} as a default 91 set of settings when no state object is specified. Defines all the same 92 attributes as L{WordState}, but without instantiating unecessary settings. 93 ''' 94 Caps = True 95 MaxRepeat = 4 96 WordDef = WORD_NON_BLANK 97 Ignore = u'\u00a0'
98
99 -def getContextFromString(string, por, state=DefaultWordState):
100 ''' 101 Gets the previous, current, and next L{Word}s relative to the given L{AEPor}. 102 If any word is missing, a None value is returned in its place. The string is 103 considered to be at the zero offset of the Item indicated by the L{AEPor}. Uses 104 a default L{WordState} object if none is provided. 105 106 @param string: Text to parse for words 107 @type string: string 108 @param state: Settings used to define a word 109 @type state: L{WordState} 110 @param por: Point of regard indicating the source accessible and Item for the 111 string 112 @return: Previous, current, and next words surrounding the L{AEPor} 113 @rtype: 3-tuple of L{Word} 114 ''' 115 words = buildWordsFromString(string, por, state) 116 c_off = por.char_offset 117 # handle degenerate cases of no words or one word 118 if len(words) == 0: 119 return None, por, None 120 elif len(words) == 1: 121 return None, words[0], None 122 # handle 0 to len(words)-2 cases by searching 123 for i, w in enumerate(words): 124 w_off = w.getPOR().char_offset 125 if w_off > c_off: 126 if i == 1: 127 # the por is in the first word 128 return None, words[0], words[1] 129 else: 130 return words[i-2], words[i-1], words[i] 131 # handle the case when the por is in the last word 132 return words[i-1], words[i], None
133
134 -def buildWordsFromString(string, por=None, state=DefaultWordState, 135 main_ob=None, trail_ob = None):
136 ''' 137 Parses the given string to build a list of L{Word}s using the given state and 138 the given L{AEPor}. When no L{AEPor} is given a dummy POR is constructed. Each 139 L{Word} constructed will use the provided or constructed L{AEPor} to indicate 140 it's position as if the string was from the same component and Item. The 141 character offset from the given or constructed L{AEPor} is not used. The string 142 is always considered to be at the zero offset of the Item indicated by the 143 L{AEPor}. Uses a default L{WordState} object if none is provided. 144 145 @param string: Text to parse for words 146 @type string: string 147 @param state: System settings used to define a word 148 @type state: L{WordState} 149 @param por: Point of regard indicating the source accessible and Item for the 150 string 151 @type por: L{AEPor} 152 @param main_ob: Function to invoke for each character in the main part of a 153 word 154 @type main_ob: callable 155 @param trail_ob: Function to invoke for each character in the trailing part 156 of a word 157 @type trail_ob: callable 158 @return: L{Word}s parsed from the string 159 @rtype: list of L{Word} 160 ''' 161 # keep track of the number of characters parsed so far 162 count = 0 163 # initialize an empty list 164 words = [] 165 if por is None: 166 # build a default POR when none provided 167 por = AEPor(None, None, 0) 168 else: 169 # else use the accessible and item info from the given POR 170 por = AEPor(por.accessible, por.item_offset, 0) 171 # build a first, empty word 172 w = Word(state, por, main_ob, trail_ob) 173 # split the source text into chunks the size of the average word length in 174 # the English language 175 chunks = (string[i:i+6] for i in xrange(0, len(string), 6)) 176 for chunk in chunks: 177 # loop over all chunks in the source text 178 while chunk is not None: 179 # try to append all of this chunk to the current word 180 chunk = w.append(chunk) 181 if chunk is not None: 182 # store this word 183 words.append(w) 184 # update the char count 185 count += w.getSourceLength() 186 # create a new word and set its POR 187 w = Word(state, AEPor(por.accessible, por.item_offset, count), 188 main_ob, trail_ob) 189 words.append(w) 190 return words
191
192 -class Word(object):
193 ''' 194 Represents a word in a body of text. Each L{Word} has a main and a trailing 195 part where the main part is processed according to other flags in the current 196 L{WordState} to improve its presentation to the user via a speech or other 197 output device while the trailing part remains unprocessed. The value of 198 WordDef determines what characters lie in the main and trailing parts of each 199 word. The following constants are available in L{AEConstants}. 200 201 - WORD_NON_BLANK: All non-blank characters are added to the main part 202 - WORD_ALPHABETIC: All characters considered letters in the current locale 203 are added to the main part 204 - WORD_ALPHA_NUMERIC: All characters considered letters and digits in the 205 current locale are added to the main part 206 - WORD_ALPHA_PUNCT: All characters considered letters and punctuation in 207 the current locale are added to the main part 208 - WORD_ALPHA_NUMERIC_PUNCT: All characters considered letters, digits, and 209 punctuation in the current locale are added to the main part 210 211 Characters in the ignore list are considered blank. A L{AEPor} 212 can be associated with a L{Word} to indicate its context in a larger body of 213 text. 214 215 Callables may be specified as observers for characters processed by the main 216 and trail parts of each L{Word}. An observer must take four parameters, this 217 L{Word} instance, the L{WordState} in use, the current character, and the 218 list of all characters in the main or trail part of the word. The observer 219 should return the character to be added. The list may be modified in place 220 to affect the final contents of the word. 221 222 @ivar state: Settings that determine the definition of a L{Word} and how 223 it is prepared for output 224 @type state: L{WordState} 225 @ivar por: Point of regard indicating where this L{Word} originated 226 @type por: L{AEPor} 227 @ivar source_word: Original text of this L{Word} without any preparation for 228 output applied 229 @type source_word: list 230 @ivar has_main: Has at least one main character been parsed? 231 @type has_main: boolean 232 @ivar main_part: Part of this L{Word} that will receive extra preparation for 233 output 234 @type main_part: list 235 @ivar trail_part: Part of the word that will receive little preparation for 236 output 237 @type trail_part: list 238 @ivar main_done: Is the L{main_part} complete? 239 @type main_done: boolean 240 @ivar trail_done: Is the L{trail_part} complete? 241 @type trail_done: boolean 242 @ivar more: Are there likely more L{Word}s after this one in the text source 243 where this L{Word} originated? 244 @type more: boolean 245 @ivar curr_repeat: Indicates a character should be considered a repeat iff 246 this value > MaxRepeat. It is not the exact number of repetitions of a 247 character as it is optimized for speed, not accuracy 248 @type curr_repeat: integer 249 @ivar last_char: Last character appended to this L{Word} 250 @type last_char: string 251 @ivar main_ob: Function to invoke for each character in the main part of a 252 word 253 @type main_ob: callable 254 @ivar trail_ob: Function to invoke for each character in the trailing part 255 of a word 256 @type trail_ob: callable 257 '''
258 - def __init__(self, state, por, main_ob=None, trail_ob=None):
259 ''' 260 Stores the L{WordState} and initializes all instance variables. 261 262 @param state: State that defines L{Word}s and how they are processed 263 @type state: L{WordState} 264 @param por: Point of regard indicating where this L{Word} originated 265 @type por: L{AEPor} 266 @param main_ob: Function to invoke for each character in the main part of a 267 word 268 @type main_ob: callable 269 @param trail_ob: Function to invoke for each character in the trailing part 270 of a word 271 @type trail_ob: callable 272 ''' 273 self.state = state 274 self.por = por 275 self.source_word = [] 276 self.main_part = [] 277 self.trail_part = [] 278 self.has_main = False 279 self.main_done = False 280 self.trail_done = False 281 self.more = False 282 self.curr_repeat = 0 283 self.last_char = None 284 self.main_ob = main_ob 285 self.trail_ob = trail_ob
286
287 - def __eq__(self, other):
288 ''' 289 Compares this L{Word} to the one provided based on their L{AEPor}s and 290 content. If their L{source_word}s and L{AEPor}s are the same, they are 291 considered equal. 292 293 @param other: Word to compare 294 @type other: L{Word} 295 ''' 296 return (self.por == other.por) and (self.source_word == other.source_word)
297
298 - def __unicode__(self):
299 ''' 300 Gets this L{Word} as a unicode string. 301 302 @return: Main part of the string joined with the trail 303 @rtype: string 304 ''' 305 return u''.join(self.main_part+self.trail_part)
306
307 - def __str__(self):
308 ''' 309 Gets this L{Word} as a non-unicode string. 310 311 @return: Main part of the string joined with the trail 312 @rtype: string 313 ''' 314 return ''.join(self.main_part+self.trail_part)
315
316 - def _isMainChar(self, ch):
317 ''' 318 Determines if the given character should be considered a part of the main 319 part of this word or not based on the definition of the word given by 320 L{WordState}. 321 322 @param ch: Character to test 323 @type ch: string 324 ''' 325 if self.state.WordDef == WORD_NON_BLANK: 326 return not self.isBlank(ch) 327 elif self.state.WordDef == WORD_ALPHABETIC: 328 return self.isAlpha(ch) 329 elif self.state.WordDef == WORD_ALPHA_NUMERIC: 330 return self.isAlpha(ch) or self.isNumeric(ch) 331 elif self.state.WordDef == WORD_ALPHA_PUNCT: 332 return self.isAlpha(ch) or self.isPunctuation(ch) 333 elif self.state.WordDef == WORD_ALPHA_NUMERIC_PUNCT: 334 return self.isAlpha(ch) or self.isNumeric(ch) or self.isPunctuation(ch) 335 else: 336 return False
337
338 - def replaceMain(self, text):
339 ''' 340 Replaces the main part of the word with the given string. 341 342 @param text: Text to use as the main part of the word 343 @type text: string 344 ''' 345 self.main_part = text
346
347 - def replaceTrail(self, text):
348 ''' 349 Replaces the main part of the word with the given string. 350 351 @param text: Text to use as the main part of the word 352 @type text: string 353 ''' 354 self.main_part = text
355 356
357 - def getPOR(self):
358 ''' 359 Gets the L{AEPor} associated with the start of this L{Word}. 360 361 @return: Point of regard pointing to the start of this word 362 @rtype: L{AEPor} 363 ''' 364 return self.por
365
366 - def isBlank(self, ch):
367 ''' 368 Determines if the given character is blank or ignored. 369 370 @param ch: Character to test 371 @type ch: string 372 @return: Is the character a blank? 373 @rtype: boolean 374 ''' 375 return ch.isspace() or ch in self.state.Ignore
376
377 - def isAlpha(self, ch):
378 ''' 379 Determines if the given character is a letter in the current locale. 380 381 @param ch: Character to test 382 @type ch: string 383 @return: Is the character a letter? 384 @rtype: boolean 385 ''' 386 return ch.isalpha()
387
388 - def isNumeric(self, ch):
389 ''' 390 Determines if the given character is a number in the current locale. 391 392 @param ch: Character to test 393 @type ch: string 394 @return: Is the character a number? 395 @rtype: boolean 396 ''' 397 return ch.isdigit()
398
399 - def isPunctuation(self, ch):
400 ''' 401 Determines if the given character is a punctuation mark. 402 403 @param ch: Character to test 404 @type ch: string 405 @return: Is the character a punctuation mark? 406 @rtype: boolean 407 ''' 408 cat = ud.category(unicode(ch)) 409 return (cat == 'Lm' or cat[0] in ['M', 'P', 'S'])
410
411 - def isSymbol(self, ch):
412 ''' 413 Determines if the given character is a symbol. 414 415 @param ch: Character to test 416 @type ch: string 417 @return: Is the character a symbol? 418 @rtype: boolean 419 ''' 420 return ud.category(unicode(ch)).startswith('C')
421
422 - def isVowel(self, ch):
423 ''' 424 Determines if the given character is a vowel. Relies on a translator to 425 list all vowels in the current locale. 426 427 @param ch: Character to test 428 @type ch: string 429 @return: Is the character a Latin vowel? 430 @rtype: boolean 431 ''' 432 return ch in VOWELS
433
434 - def isCap(self, ch):
435 ''' 436 Determines if the given character is an upper case letter. 437 438 @param ch: Character to test 439 @type ch: string 440 @return: Is the character capitalized? 441 @rtype: boolean 442 ''' 443 return ch.isupper()
444
445 - def getCharValue(self, ch):
446 ''' 447 Gets the unicode hex value for a character sans the 0x prefix. 448 449 @param ch: Single character 450 @type ch: string 451 @return: Hex value of the character 452 @rtype: string 453 ''' 454 return hex(ord(ch))[2:]
455
456 - def getCharName(self, ch):
457 ''' 458 Gets the unicode name of the character, one of the strings listed in the 459 U{http://unicode.org/charts/charindex.html}. If the character could not be 460 determined from the given string, returns an empty string. Note that these 461 names are not localized. 462 463 @param ch: Single character 464 @type ch: string 465 @return: Name of the character 466 @rtype: string 467 ''' 468 try: 469 return ud.name(unicode(ch)).lower() 470 except Exception: 471 return ''
472
473 - def getCharDescription(self, ch):
474 ''' 475 Gets a localized description of the given character. The most detailed 476 description for a character is returned so that, for instance, 'e' is 477 described as a vowel and not just a letter. 478 479 @param ch: Character to test 480 @type ch: string 481 @return: Localized description of the character according to the processing 482 done by this L{Word} class and based on the current state 483 @rtype: boolean 484 ''' 485 if ch in self.state.Ignore: 486 return _('ignored') 487 elif self.isBlank(ch): 488 return _('blank') 489 elif self.isAlpha(ch): 490 if self.isCap(ch): 491 return _('capital') 492 elif self.isVowel(ch): 493 return _('vowel') 494 else: 495 return _('letter') 496 elif self.isNumeric(ch): 497 return _('number') 498 elif self.isPunctuation(ch): 499 return _('punctuation') 500 elif self.isSymbol(ch): 501 return _('symbol')
502
503 - def getSource(self):
504 ''' 505 Gets the unprocessed text of this word as it was seen in the original text 506 source. 507 508 @return: Parsed word without any processing applied 509 @rtype: string 510 ''' 511 return self.source_word
512
513 - def getSourceLength(self):
514 ''' 515 Gets the length of the unprocessed source text of this L{Word}. 516 517 @return: Length of the L{source_word} 518 @rtype: integer 519 ''' 520 return len(self.source_word)
521
522 - def getMainLength(self):
523 ''' 524 Gets the length of the processed main part of this L{Word}. 525 526 @return: Length of the L{main_part} 527 @rtype: integer 528 ''' 529 return len(self.main_part)
530
531 - def moreAvailable(self):
532 ''' 533 Makes a guess as to whether or not there are more L{Word}s in the body of 534 text from which this word originated. This guess is based on whether or not 535 the last chunk passed to L{append} was processed in full. 536 537 @return: Are there likely more L{Word}s in the original body of text 538 @rtype: boolean 539 ''' 540 return self.more
541
542 - def hasRepeat(self):
543 ''' 544 Gets if this L{Word} has a character repeated more than the maximum number 545 of repetitions allowed or not. 546 547 @return: Does this L{Word} containg a repeated character? 548 @rtype: boolean 549 ''' 550 if self.curr_repeat > self.state.MaxRepeat and not self.isAllNumeric(): 551 return True 552 return False
553
554 - def hasCap(self):
555 ''' 556 Gets if this L{Word} contains an uppercase letter or not. 557 558 @return: Does this L{Word} contain a capital letter? 559 @rtype: boolean 560 ''' 561 for ch in self.source_word: 562 if self.isCap(ch): 563 return True 564 return False
565
566 - def hasVowel(self):
567 ''' 568 Gets if this L{Word} contains a vowel or not. 569 570 @return: Does this L{Word} contain a vowel? 571 @rtype: boolean 572 ''' 573 for ch in self.source_word: 574 if self.isVowel(ch): 575 return True 576 return False
577
578 - def isAllCaps(self):
579 ''' 580 Gets if this L{Word} is all capitals or not. 581 582 @return: Is this L{Word} all capital letters? 583 @rtype: boolean 584 ''' 585 return self.isCap(self.source_word)
586
587 - def isAllNumeric(self):
588 ''' 589 Gets if this L{Word} is all numbers or not. 590 591 @return: Is this L{Word} all numbers? 592 @rtype: boolean 593 ''' 594 return self.isNumeric(self.source_word)
595
596 - def isAllBlank(self):
597 ''' 598 Gets if this L{Word} is all blanks or not. 599 600 @return: Is this L{Word} all blanks? 601 @rtype: boolean 602 ''' 603 for ch in self.source_word: 604 if not self.isBlank(ch): 605 return False 606 return True
607 608 #def shouldBeSpelled(self): 609 #''' 610 #Gets if this L{Word} should be spelled based on if it is a single 611 #character, if ForceSpell is on and the word is not all numbers and does not 612 #have a vowel, if SpellCaps is on and the word is all caps, and if the word 613 #has a repeating letter and is all numbers. 614 615 #@return: Should this L{Word} be spelled rather than sounded out as written? 616 #@rtype: boolean 617 #''' 618 #if (self.state.Format == FORMAT_SPELL or 619 #self.state.Format == FORMAT_PHONETIC): 620 #return True 621 #elif not self.isAllBlank(): 622 #if (self.getMainLength() == 1 and 623 #(self.state.ForceSpell and not self.state.Format == FORMAT_TEXT)): 624 ## single character 625 #return True 626 #elif (self.state.ForceSpell and 627 #(not self.isAllNumeric() and not self.hasVowel())): 628 ## force spelling of words without vowels 629 #return True 630 #elif self.state.SpellCaps and self.isAllCaps(): 631 ## force spelling of acronyms 632 #return True 633 #elif self.curr_repeat > self.state.MaxRepeat and not self.isAllNumeric(): 634 ## for spelling of words with repeats in them 635 #return True 636 #return False 637
638 - def append(self, chunk):
639 ''' 640 Parses the given chunk of text for characters that should be added to the 641 L{main_part} or L{trail_part} of this L{Word}. If this word has neither 642 L{main_done} or L{trail_done} set, then all main characters determined by 643 L{_isMainChar} up to the first non-main character are added to the main 644 part of this word. When the first non-main word is encountered, 645 L{main_done} is set. If this word has L{main_done} set and L{trail_done} 646 unset, all non-main characters are added to the trail part of this word. 647 When another main character is encountered after L{main_done} is set, 648 L{trail_done} is set and the remainder of the given chunk is returned 649 unprocessed to be added to another L{Word}. Once L{trail_done} is set, no 650 further text can be appended to this L{Word}. 651 652 @param chunk: Chunk of text to parse for words 653 @type chunk: string 654 @return: Unprocessed portion of the chunk or None if fully processed 655 @rtype: string or None 656 @see: L{_processMain} 657 @see: L{_processTrail} 658 ''' 659 if self.trail_done: 660 # don't add anything new after the trail is complete 661 return chunk 662 for i, ch in enumerate(chunk): 663 mc = self._isMainChar(ch) 664 if mc: 665 if self.main_done: 666 # not accepting more main characters 667 self.trail_done = True 668 self.more = True 669 return chunk[i:] 670 else: 671 self.has_main = True 672 # process a new main character 673 ch = self._processMain(ch) 674 else: 675 # process a new trail character 676 ch = self._processTrail(ch) 677 self.main_done = True 678 # detect character repeitions for everything except the ellipsis 679 # this method does not keep accurate track of the number of repetitions, 680 # only that some character was repeated more than REPEAT number of times 681 # in this word 682 if ch == self.last_char and ch != '.': 683 self.curr_repeat += 1 684 elif self.curr_repeat < self.state.MaxRepeat: 685 self.last_char = ch 686 self.curr_repeat = 1 687 return None
688
689 - def _processMain(self, ch):
690 ''' 691 Adds the given character to the L{source_word}. If Caps is unset, makes the 692 character lowercase. If CapExpand and the character is a capital letter or 693 NumExpand and the character is a number, inserts a space in L{main_part}. 694 Finally inserts the possibly lowercased character in L{main_part}. 695 696 @param ch: Character to process 697 @type ch: string 698 @return: Character inserted in L{trail_part} 699 @rtype: string 700 ''' 701 self.source_word.append(ch) 702 if ch in self.state.Ignore: 703 ch = u' ' 704 if not self.state.Caps: 705 ch = ch.lower() 706 if self.main_ob: 707 ch = self.main_ob(self, self.state, ch, self.main_part) 708 self.main_part.append(ch) 709 return ch
710
711 - def _processTrail(self, ch):
712 ''' 713 Adds the given character to the L{source_word}. If the character is a 714 blank, inserts a space in L{trail_part}, else inserts the character. 715 716 @param ch: Character to process 717 @type ch: string 718 @return: Character inserted in L{trail_part} 719 @rtype: string 720 ''' 721 if ch in self.state.Ignore: 722 ch = u' ' 723 self.source_word.append(ch) 724 if self.trail_ob: 725 ch = self.trail_ob(self, self.state, ch, self.trail_part) 726 self.trail_part.append(ch) 727 return ch
728 729 #if __name__ == '__main__': 730 #import sys 731 #def printWords(words): 732 #for w in words: 733 #sys.stdout.write('"%s"' % str(w)) 734 #for w in words: 735 #print w.getPOR() 736 737 #class TestState(object): 738 #def __init__(self, **kwargs): 739 #self.__dict__.update(kwargs) 740 741 #test = "JAWS is a screen reader. I'd like to say, 'I'd like $10.00 for free.' Hey--look at that!" 742 #state = TestState(WordDef=WORD_NON_BLANK, Caps=True, CapExpand=True, 743 #NumExpand=True, Ignore='', MaxRepeat=3) 744 #printWords(buildWordsFromString(test, None, state)) 745 #for i in range(len(test)): 746 #p, c, n = getContextFromString(test, state, AEPor(None, None, i)) 747 #try: print p.getPOR(), p, 748 #except: print 'no prev', 749 #try: print c.getPOR(), c, 750 #except: print 'no current', 751 #try: print n.getPOR(), n, 752 #except: print 'no next', 753 #print 754