# coding: windows-1252

# This program transforms the FEWL dictionary into an SRS4g dictionary.
# It is a simple example of a FEWL conversion program.  Because SRS4g
# is a simple spelling system, there is no need for rules involving 
# stress or prefixes/suffixes.  More complex systems will doubtless 
# require much more complex programming.

# More complex transformation programs may well be organized into three
# or more phases, an initial phase of transformations of common sound
# sequences or affixes (e.g., special spellings for the -ness and -ous 
# ending), followed by a main letter-by-letter transformation phase,
# followed by a final cleanup phase for processing of common letter
# sequences (e.g., abbreviating common sequences, or removing unneeded
# diacritics).  Because SRS4g is relatively simple, the first phase is
# very short, and the third phase is not required.

import string
import re

inp = open('FEWL.txt', 'r')
out = open('SRS4g.dict', 'w')

# Recognize the various forms of schwa-r
Rpat = re.compile('[aeiou3]r')

nulltab = string.maketrans('', '')

# Define a table for straightforward replacement of FEWL vowels with
# SRS4g vowels
voweltab = string.maketrans('EIYOU', 'ou')

# Define a table for upper-casing
uptab = string.maketrans('abcdefghijklmnopqrstuvwxyz', \
                         'ABCDEFGHIJKLMNOPQRSTUVWXYZ')

# Define a table for translating RP-related letters to American
# equivalents
rptab = string.maketrans('@' ,'V')

def isvowel ( let ):
    return "aeiou3AEIOUQRVWY&".find(let) >= 0

# Return the next letter, skipping over ? and hyphenation marks
def nextafter( s, p ):
    if s[p] == '?': p += 1
    if "-=".find(s[p]) >= 0: p += 1
    return p

# Determine if the next letter is a vowel
def vowelnext( s, p ):
    if p >= len(s): return 0
    n = nextafter(s,p)
    if n >= len(s): return 0
    return isvowel(s[n])

# Process each input line in turn
for line in inp:
    
# Parse the input line.  The traditional spelling goes into eng, and
# the FLEW representation into flew        
    parts = line[:-1].split('  - ')
    flew = parts[0]
    eng = parts[1]
    ilet = ''

# If the word is something like T-shirt, the initial letter is stored
# as :T:.  Store this letter in ilet
    if flew[0] == ':':
        ilet = flew[1]
        flew = flew[3:]

# Replace schwa-r with the symbol R.  SRS4g handles them all the same.
    flew = Rpat.sub('R', flew)

# Change the symbols representing RP sounds to the corresponding
# American symbols.  SRS4g is only for American English anyway.
    flew = flew.translate(rptab)

# Replace *'d letters.  *l [mjk(*l] becomes simply l.
# *y [gIn*yus] becomes .  *w [*ws3l] becomes , which changes to wh
# later.  (Replacing *w by a single letter makes it easier to process.) 
# *X [atn*Xon] becomes X.
    flew = flew.replace('*l', 'l')
    flew = flew.replace('*y', '')
    flew = flew.replace('*w', '')
    flew = flew.replace('*X', 'X')

# Remove stress and affix markers.
    flew = flew.translate(nulltab, "'}/\\{(")

# Change r (representing an RP only pronunciation) to &.  This is
# easier to day if we do it after the stress markers are removed.
# (Since SRS4g spells & as ur, this step is technically unnecessary.
# But it's better to put it in for didactic purposes.)
    flew = flew.replace('r', '&')

# and? [rk-and?-rOl] becomes an.
    flew = flew.replace('and?', 'an')

# -the- within a word (follow-the-leader) ends up spelled as -the-. 
# We lie about the vowel to force this outcome.
    if eng.find('-the-') >= 0:
        flew = flew.replace('De', 'D')

# Start of the mainline processing
    srs = ''
    fi = 0

# Handle certain sight words specially.
    if eng == 'a': srs = 'a'
    elif eng == 'the (before c)': srs = 'the'
    elif eng == 'to': srs = 't'
    elif eng == 'into': srs = 'int'
    elif eng == 'I': srs = 'I'

    else:
        
# Walk through the FEWL spelling a letter at a time, building up the
# SRS4g spelling in the variable srs.
        while fi < len(flew):
            
# Drop any questionable y [mgnity?Ud].
            if fi < len(flew)-2 and flew[fi:fi+2] == 'y?':
                fi += 1
                pass

# Leave in any other ?, to be handled manually.
            elif flew[fi] == '?': 
                srs += '?'

# Treat either a definite hyphen [mdel-kls] or dubious hyphen [f=XUt]
# as a hyphen for now.  It may or may not persist to the end.
            elif flew[fi] == '-' or flew[fi] == '=':
                srs += '-'

# The quote indicates a traditional contraction.  Put an apostrophe 
# there.
            elif flew[fi] == '"':
                srs += "'"

# Just copy letters which SRS4g never changes.  SRS4g does not have
# rules for the nasal vowels (, ) - just copying them is the default
# action.
            elif "bdfghjlmnprstvwyz".find(flew[fi]) >= 0:
                srs += flew[fi]

# C, X and J turn into ch, sh, and zh.
            elif flew[fi] == 'C': srs += 'ch'
            elif flew[fi] == 'X': srs += 'sh'
            elif flew[fi] == 'J': srs += 'zh'

# Both voiced and voiceless th are th in SRS4g.  *W becomes wh.
            elif flew[fi] == 'D' or flew[fi] == 'T': srs += 'th'
            elif flew[fi] == '': srs += 'wh'

# The SRS rules do not cover the nasal u of uh-huh, or the ch of loch.
# Translate them to nh and kh respectively.
            elif flew[fi] == 'H': srs += 'nh'
            elif flew[fi] == 'K': srs += 'kh'

# SRS4g spells both /@r/ and /3r/ as ur.
            elif flew[fi] == 'R' or flew[fi] == '&': srs += 'ur'

# The schwa in SRS4g is spelled with a u, its inventor belonging to the
# school which holds /V/ and /@/ to be the same sound.  uh is used at
# the end of a word.
            elif "aeiou3".find(flew[fi]) >= 0: 
                if fi == len(flew)-1 or "-=".find(flew[fi+1]) >= 0: srs += 'uh'
                else: srs += 'u'

# All the different ways of marking short i or indistinct i in FEWL
# are translated to i, except that when it is spelled e in traditional
# spelling [gVdns], it is spelled e.
            elif "".find(flew[fi]) >= 0: srs += 'i'
            elif flew[fi] == '' or flew[fi] == '': srs += 'e'

# Handle the other untroublesome SRS4g vowels
            elif "EIYOU".find(flew[fi]) >= 0: 
                srs += flew[fi].translate(voweltab)

# Translate the short oo sound (V) to .  Except that the inventor of
# SRS4g holds that the use of /Ur/ in dictionaries is a systematic
# error for /u:r/, so we change its spelling accordingly.
            elif flew[fi] == 'V':
                if fi == len(flew)-1 or flew[fi+1] != 'r': srs += ''
                else: srs += ''

# Use the spelling aw for , but use or for r.
            elif flew[fi] == '': 
                if fi == len(flew)-1 or flew[fi+1] != 'r': srs += 'aw'
                else: srs += 'o'

# The broad A sound is spelled o, except before r (car) or at the end
# of a word (spah).
            elif flew[fi] == 'A': 
                if fi == len(flew)-1 or flew[fi+1] == 'h': srs += 'ah'
                elif fi == len(flew)-1 or flew[fi+1] != 'r': srs += 'o'
                else: srs += 'a'

# Short a is spelled a.  But r is spelled as if it were r (indeed, 
# the way it is pronounced by many).
            elif flew[fi] == '': 
                if fi == len(flew)-1 or flew[fi+1] != 'r': srs += 'a'
                else: srs += 'e'

# The ou and oy sounds are spelled with w/y at word end, or before a
# vowel.  Otherwise, they are spelled with u/i.
            elif flew[fi] == 'W':
                if fi == len(flew) - 1 or vowelnext(flew,fi+1): srs += 'ow'
                else: srs += 'ou'
            elif flew[fi] == 'Q':
                if fi == len(flew) - 1 or vowelnext(flew,fi+1): srs += 'oy'
                else: srs += 'oi'

# The ng nasal is spelled ng at word end, including before the -s/d
# inflection.  The ngk sound is spelled with an unmarked n at word end,
# or before the -s/d/ing inflection.  In all other cases/combinations,
# the ng nasal is spelled .
            elif flew[fi] == 'G':
                if fi == len(flew)-1 or '$-='.find(flew[fi+1]) >= 0: srs += 'ng'
                elif flew[fi+1:] == 'k':
                    if fi+1 == len(flew)-1 or '$-='.find(flew[fi+1]) >= 0 or \
                       flew[fi+2:].startswith('G'): srs += 'n'
                    else: srs += ''
                else: srs += ''
            elif flew[fi] == 'k':
                
# The k sound is spelled with a c before a, o or u, and k otherwise.
# We must remember that r will end up spelled with an e.
                if fi == len(flew)-1 or "-=$".find(flew[fi+1]) >= 0:
                    srs += 'k'
                else:
                    fn = fi+1
                    if flew[fn:].startswith('r'):
                        srs += 'k'
                    elif "aeiou3AEOVUWQ@R&".find(flew[fn]) >= 0:
                        srs += 'c'
                    else: srs += 'k'

# The plural ending is spelled phonetically, with s or z.
            elif flew[fi] == '$':
                if "fkKpt".find(flew[fi-1]) >= 0: srs += 's'
                else: srs += 'z'

# The past tense inflection is spelled phonetically, with d or t.
            elif flew[fi] == '':
                if "CfkKpsX".find(flew[fi-1]) >= 0: srs += 't'
                else: srs += 'd'

# Has anything been left out?
            else:
                print "I wasn't expecting this: %s." % flew
            fi += 1

# Remove hyphens from the output, unless the input had hyphens.
# In a few cases, the output may have more hyphens than the input.
    if srs.find('-') >= 0 and eng.find('-') < 0:
        srs = srs.translate(nulltab, '-')

# Put the output line back together and write it out.
    if ilet != '':
        srs = ilet + '-' + srs
    elif eng[0].isupper():
        srs = srs[0].translate(uptab) + srs[1:]
    out.write(srs + '  - ' + eng + '\n')

out.close()
inp.close()
