# coding: windows-1252

import string
import sys
import types

dict = open('SRS4g.dict', 'r')
inp = open(sys.argv[1] + '.txt', 'r')
out = open(sys.argv[1] + '.srs4g.txt', 'w')

# dtab will end up holding the SRS4g dictionary
dtab = { }

# thlist contains a list of vowel which indicate that a following
# th is voiced (e.g., bathe, breathe, writhe, loathe, soothe).
thlist = ('', '', '', '', '')

# If c is an upper-case character, upper case the first character of t.
# c will be the first character of the input word, t the SRS4g
# transliteration
def condup (c, t):
    if c.islower():
        return t
    if ord(t[0]) >= 0xe0:
        x = chr(ord(t[0]) - 32)
        t = x + t[1:]
    else:
        t = t[0].upper() + t[1:]
    return t
    
# If p is a string, append s.  If it is a list of strings, append s
# to each of them.
def lappend ( p,s ):
    if type(p) is types.StringType:
        return p + s
    r = []
    for w in p: 
        r.append(w + s)
    return r

# Test to see if l is a "true" consonant - not a vowel or semivowel
def istcons ( l ):
    return 'aeiouwy'.find(l) < 0

# Test to see if l is a doublable consonant - not a vowel or semivowel
# or h.
def isdcons ( l ):
    return 'aeiouwyh'.find(l) < 0

# Given an English possessive (<whatever>'s), look up the root word
def findposs ( w ):
    t = dtab.get(w[:-2], '')
    return t

# Given an English plural, look up the root word, if we can identify it
def findplu ( w ):
    guess = ''
    # Try it without the ending s
    t = dtab.get(w[:-1], '')
    # attaches could be attach(es) or attache(s)
    if t != '' and len(w) > 3 and w[-2] == 'e':
        guess = t
        t = ''
    # If it ends in -ies, look it up with -y
    if t == '' and len(w) > 3 and w[-3:-1] == 'ie':
        t = dtab.get(w[:-3] + 'y', '')
    # Try it without an ending es (if the preceding consonant makes sense)
    if t == '' and len(w) > 3 and w[-2] == 'e' and ("ioujsxz".find(w[-3]) >= 0 or \
         (w[:-2].endswith('ch') or w[:-2].endswith('sh'))):
        t = dtab.get(w[:-2], '')
    # If it ends with -<xx>es, look it up with -<x> (<x> s or z only)
    if t == '' and len(w) > 4 and w[-2] == 'e' and w[-3] == w[-4] and \
       "sz".find(w[-3]) >= 0:
        t = dtab.get(w[:-3], '')
    # If we found two possible plurals, return both
    if t != '' and guess != '': 
        return [t, guess]
    # If we found -<xx>e for -<xx>es, and nothing else, return that
    if t == '' and guess != '': t = guess
    return t

# Given an English past tense, look up the root word, if we can identify it
def findpast ( w ):
    guess = ''
    # Try it without the ending d
    t = dtab.get(w[:-1], '')
    # attaches could be attach(ed) or attache(d) (if attache were a verb anyway)
    if t != '' and len(w) > 3 and w[-2] == 'e' and istcons(w[-3]):
        guess = t
        t = ''
    # Try it without an ending ed
    if t == '':
        t = dtab.get(w[:-2], '')
    # If it ends in -ied, look it up with -y
    if t == '' and len(w) > 3 and w[-3] == 'i':
        t = dtab.get(w[:-3] + 'y', '')
    # If it ends with -<xx>ed, look it up with -<x> (<x> any consonant)
    if t == '' and len(w) > 4 and w[-2] == 'e' and w[-3] == w[-4] and isdcons(w[-3]):
        t = dtab.get(w[:-3], '')
    # Treat cked as a variant of cced
    if t == '' and len(w) > 4 and w[-4:-1] == 'cke':
        t = dtab.get(w[:-3], '')
    # If we found two possible pasts, return both
    if t != '' and guess != '': 
        return [t, guess]
    # If we found -<xx>e for -<xx>ed, and nothing else, return that
    if t == '' and guess != '': t = guess
    return t

# Given an English present participle, look up the root word, if we can 
# identify it
def findpres ( w ):
    t = ''
    # Try the root form -<x>e for -<x>ing, unless this root ends in ng
    # (Avoiding finding singe for singing)
    if istcons(w[-4]) and (len(w) <= 6 or w[-5:-3] != 'ng'):
        t = dtab.get(w[:-3] + 'e', '')
    # Try it without the ending -ing
    if t == '':
        t = dtab.get(w[:-3], '')
    # Now try -<nge> for <nging>
    if t == '' and istcons(w[-4]) and (len(w) > 6 and w[-5:-3] == 'ng'):
        t = dtab.get(w[:-3] + 'e', '')
    # If it end with -<xx>ing, look it up with -<x> (<x> any consonant)
    if t == '' and len(w) > 5 and w[-4] == w[-5] and isdcons(w[-4]):
        t = dtab.get(w[:-4], '')
    # Treat cking as a variant of ccing
    if t == '' and len(w) > 5 and w[-5:-3] == 'ck':
        t = dtab.get(w[:-4], '')
    return t

# Given a comparative adjective form, look up the root word, if we can
# identify it
def findmore ( w ):
    # Try it without the ending -r
    t = dtab.get(w[:-1], '')
    # Try it without the ending -er
    if t == '':
        t = dtab.get(w[:-2], '')
    # If it ends in -ier, look it up with -y
    if t == '' and len(w) > 3 and w[-3] == 'i':
        t = dtab.get(w[:-3] + 'y', '')
    # If it ends with -<xx>er, look it up with -<x> (<x> any consonant)
    if t == '' and len(w) > 4 and w[-3] == w[-4] and isdcons(w[-3]):
        t = dtab.get(w[:-3], '')
    return t

# Given a superlative adjective form, look up the root word, if we can
# identify it
def findmost ( w ):
    # Try it without the ending -st
    t = dtab.get(w[:-2], '')
    # Try it without the ending -est
    if t == '':
        t = dtab.get(w[:-3], '')
    # If it ends in -iest, look it up with -y
    if t == '' and len(w) > 4 and w[-4] == 'i':
        t = dtab.get(w[:-4] + 'y', '')
    # If it ends with -<xx>est, look it up with -<x> (<x> any consonant)
    if t == '' and len(w) > 5 and w[-4] == w[-5] and isdcons(w[-4]):
        t = dtab.get(w[:-4], '')
    return t

# Return the possessive of an SRS4g word, or list of words
def possess ( w ):
    if w == '': return ''
    if type(w) is types.ListType:
        r = []
        # Combine all the possibilities into a single list
        for ww in w:
            ww = possess(ww)
            if type(ww) is types.ListType:
                r.extend(ww)
            else:
                r.append(ww)
        return r
    if 'cfkpt'.find(w[-1]) >= 0 or (w.endswith('h') and not w.endswith('zh')):
        return w + "'s"
    return w + "'z"

# Return the regular plural of an SRS4g word, or list of words
def plural ( w ):
    if w == '': return ''
    if type(w) is types.ListType:
        r = []
        # Combine all the possibilities into a single list
        for ww in w:
            ww = plural(ww)
            if type(ww) is types.ListType:
                r.extend(ww)
            else:
                r.append(ww)
        return r
    if w[-1] == 'j' or w[-1] == 's' or w[-1] == 'z' or \
       (w[-1] == 'h' and 'czs'.find(w[-2]) >= 0):
        w = w + 'ez'
    elif 'cfkpt'.find(w[-1]) >= 0 or (w[-1] == 'h' and 'cs'.find(w[-2]) >= 0):
        w = w + 's'
    # We guess whether an ending th is voiced or not based on the preceding
    # vowel
    elif w.endswith('th') and (len(w) < 3 or w[-3] not in thlist):
        w = w + 's'
    else:
        w = w + 'z'
    return w

# Return the regular past form of an SRS4g word, or list of words
def past ( w ):
    if w == '': return ''
    if type(w) is types.ListType:
        r = []
        # Combine all the possibilities into a single list
        for ww in w:
            ww = past(ww)
            if type(ww) is types.ListType:
                r.extend(ww)
            else:
                r.append(ww)
        return r
    if w[-1] == 'd' or w[-1] == 't':
        w = w + 'ed'
    elif 'cfkps'.find(w[-1]) >= 0 or (w[-1] == 'h' and 'cs'.find(w[-2]) >= 0):
        w = w + 't'
    # We guess whether an ending th is voiced or not based on the preceding
    # vowel
    elif w.endswith('th') and (len(w) < 3 or w[-3] not in thlist):
        w = w + 't'
    else:
        w = w + 'd'
    return w

# Return the regular present participle of an SRS4g word, or list of 
# words
def pres ( w ):
    if w == '': return ''
    if type(w) is types.ListType:
        r = []
        # Combine all the possibilities into a single list
        for ww in w:
            ww = pres(ww)
            if type(ww) is types.ListType:
                r.extend(ww)
            else:
                r.append(ww)
        return r
    # Deal with the mutation of ng from ending to medial position
    if w.endswith('ng'):
        w = w[:-2] + 'ing'
    else:
        w = w + 'ing'
    return w

# Return the comparative form of an SRS4g word, or list of words
def more ( w ):
    if w == '': return ''
    if type(w) is types.ListType:
        r = []
        # Combine all the possibilities into a single list
        for ww in w:
            ww = more(ww)
            if type(ww) is types.ListType:
                r.extend(ww)
            else:
                r.append(ww)
        return r
    # Words like "long" and "strong" pick up an extra g sound in their
    # comparative
    if w.endswith('ng'):
        w = w[:-2] + 'gur'
    else:
        w = w + 'ur'
    return w

# Return the superlative form of an SRS4g word, or list of words
def most ( w ):
    if w == '': return ''
    if type(w) is types.ListType:
        r = []
        # Combine all the possibilities into a single list
        for ww in w:
            ww = most(ww)
            if type(ww) is types.ListType:
                r.extend(ww)
            else:
                r.append(ww)
        return r
    # Words like "long" and "strong" pick up an extra g sound in their
    # superlative
    if w.endswith('ng'):
        w = w[:-2] + 'gest'
    else:
        w = w + 'est'
    return w

# Determine if w is an inflection.  If it is, produce the SRS4g 
# inflection of the root form.
def inflect ( w ):
    if w.endswith("'s"):
        t = possess(findposs(w))
    elif w.endswith('s'):
        t = plural(findplu(w))
    elif w.endswith('ed'):
        t = past(findpast(w))
    elif w.endswith('ing'):
        t = pres(findpres(w))
    elif w.endswith('er'):
        t = more(findmore(w))
    elif w.endswith('est'):
        t = most(findmost(w))
    else:
        t = ''
    return t

# Process the hyphenated word w as a compound, translating each portion
# separately, and combining the results
def compound ( w ):
    l = w.split('-')
    r = ''
    for op in l:
        p = op.lower()
        r += trans(p) + '-'
    r = r[:-1]
    return r

# The word w begins or ends with apostrophe.  This may be part of the
# word, or may be a quotation mark.  Try to figure it out.
def quoted ( w ):
    if w == "'": return "'"
    if w[0] == "'" and w[-1] == "'":
        return "'" + trans(w[1:-1]) + "'"
    if w[-1] == "'":
        return trans(w[:-1]) + "'"
    return "'" + trans(w[1:])

# Transliterate the argument into SRS4g.  If there is more than one
# possible transliteration (due to words like "use"), return them in
# [square brackets], separated by slashes.  If the word can't be
# found in the dictionary, or surmised, return it enclosed in 
# <angle brackets>.
def trans ( w ):
    if w == '': return w
    lw = w.lower()
    # A word with capital letters not following a hyphen or initial
    # apostrophe is something strange.  Don't try to process it.
    for i in range(1, len(w)):
        if w[i].isupper() and w[i-1] != '-' and (i != 1 or w[0] != "'"):
            return '<' + w + '>'
    # Try to ignore bare hyphens, or one at the end of a word.  This may
    # cause the results to be incorrect for a word split between two lines.
    if w == '-': return '-'
    if w[0] == '-': return '-' + trans(w[1:])
    if w[-1] == '-': return trans(w[:-1]) + '-'
    # Look the word up in the dictionary
    t = dtab.get(lw, '')
    if t == '': 
        # If not there, is it an inflection?
        t = inflect(lw)
        # Deal with single quotes.  The previous steps allow 'tis to
        # be handled successfully
        if t == '' and (lw[0] == "'" or lw[-1] == "'"):
            t = quoted(w)
        # If the word is a hyphenated compound, try breaking it apart
        if t == '' and lw.find('-') >= 1:
            t = compound(w)
        # Eventually, give up
        if t == '':
            return '<' + w + '>'
    # If only one transliteration was found, return it
    if type(t) is types.StringType:
        return condup(w[0], t)
    # There are two forms of "the" in the dictionary, but we always want 
    # the first in SRS4g
    elif lw == 'the':
        return condup(w[0], t[0])
    # If there are multiple transliterations, combine them in [brackets]
    r = '['
    for tt in t:
        r += condup(w[0], tt) + '/'
    r = r[:-1] + ']'
    return r
        
# This is the main dictionary processing loop 
while 1:
    line = dict.readline()[:-1]
    if not line: break
    # Only store lower-case in the dictionary table
    if not line[0].islower():
        line = line.lower()
    # Split the entry into the SRS4g spelling and the TS spelling
    ent = line.split('  - ')
    # Throw away any parenthesized info in the TS
    paren = ent[1].find('(')
    if paren >= 0:
        ent[1] = ent[1][:paren-1]
    # If this is a new word, just add it.  If it's an old word, add
    # the new SRS4g spelling to the list of spellings for the word
    if dtab.has_key(ent[1]):
        if type(dtab[ent[1]]) == types.ListType: 
            if ent[0] not in dtab[ent[1]]:
                dtab[ent[1]].append(ent[0]) 
        else:
            if ent[0] != dtab[ent[1]]:
                dtab[ent[1]] = [dtab[ent[1]], ent[0]]
    else:
        dtab[ent[1]] = ent[0]

dict.close()

# This is the input processing loop
while 1:
    line = inp.readline()
    if not line: break
    w = ''
    for c in line:
        # A word is a continuous sequence of letters, hyphens and
        # apostrophes.  Uses of the hyphen and apostrophe as
        # puntuation are dealt with in lower level routines
        if c == '-' or c == "'" or c.isalpha():
            w += c
        else:
        # Once we've gathered a word together, transliterate it, and
        # write the result out    
            if w:
                w = trans(w)
                out.write(w)
                w = ''
        # And then write out the non-word character (including the line-
        # end character).
            out.write(c)

out.close()
inp.close()
