...: # the full lexicon

    ...:

    ...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()

    ...:

    ...: lexiconPlural = lexicon @ plural @ phonology

    ...:

    ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

    ...:


In [24]: makePl("gus")

Out[24]: 'gis'


In [25]: makePl("nus")

Out[25]: 'nusɪz'


In [26]: makePl("guz")

Out[26]: 'guz'


In [27]: %load EngPlurals.py


In [28]: # %load EngPlurals.py

    ...:

    ...: # Jeff Heinz

    ...: # October 2018

    ...: # English Plural with Pynini

    ...:

    ...: import pynini

    ...: import functools

    ...:

    ...: A = functools.partial(pynini.acceptor, token_type="utf8")

    ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

    ...:

    ...:

    ...: # some natural classes

    ...:

    ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

    ...:

    ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

    ...:

    ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

    ...:

    ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

    ...:

    ...: # all segments

    ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

    ...:

    ...:

    ...: # phonological changes

    ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

    ...:

    ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

    ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

    ...:

    ...: phonology = (epenthesis @ devoicing).optimize()

    ...:

    ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

    ...:

    ...: wish = A("wɪʃ")

    ...: cat = A("kæt")

    ...: cable = A("keɪbl")

    ...: dog = A("dɔg")

    ...:

    ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

    ...: wishes = wishesT.stringify(token_type="utf8")

    ...:

    ...:

    ...: # THIS WORKS! :-) ☺

    ...:

    ...: def makeRegPl(stem):

    ...: x = (((A(stem)) @ regPlural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # a lexicon of memorized exceptions

    ...:

    ...: pluralExceptions = ( T("fʊt","fit") # foot/feet

    ...: | T("gus","gis") # goose/geese

    ...: | T("fɪʃ","fɪʃ")

    ...: | T("gutz","gutz")).optimize() # fish/fish

    ...:

    ...: lexicalExceptions = pynini.project(pluralExceptions)

    ...:

    ...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()

    ...:

    ...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()

    ...:

    ...: def makePl(stem):

    ...: x = (((A(stem)) @ plural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # the full lexicon

    ...:

    ...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()

    ...:

    ...: lexiconPlural = lexicon @ plural @ phonology

    ...:

    ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

    ...:


In [29]: makePl("gutz")

Out[29]: 'guts'


In [30]: vowel

Out[30]:




In [31]: vowel.plus

Out[31]:




In [32]: vowel

Out[32]:




In [33]: vowel.closure(2,2)

Out[33]:




In [34]: vowel

Out[34]:




In [35]: %load EngPlurals.py


In [36]: # %load EngPlurals.py

    ...:

    ...: # Jeff Heinz

    ...: # October 2018

    ...: # English Plural with Pynini

    ...:

    ...: import pynini

    ...: import functools

    ...:

    ...: A = functools.partial(pynini.acceptor, token_type="utf8")

    ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

    ...:

    ...:

    ...: # some natural classes

    ...:

    ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

    ...:

    ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

    ...:

    ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

    ...:

    ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

    ...:

    ...: # all segments

    ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

    ...:

    ...:

    ...: # phonological changes

    ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

    ...:

    ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

    ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

    ...:

    ...: phonology = (epenthesis @ devoicing).optimize()

    ...:

    ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

    ...:

    ...: wish = A("wɪʃ")

    ...: cat = A("kæt")

    ...: cable = A("keɪbl")

    ...: dog = A("dɔg")

    ...:

    ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

    ...: wishes = wishesT.stringify(token_type="utf8")

    ...:

    ...:

    ...: # THIS WORKS! :-) ☺

    ...:

    ...: def makeRegPl(stem):

    ...: x = (((A(stem)) @ regPlural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # a lexicon of memorized exceptions

    ...:

    ...: pluralExceptions = ( T("fʊt","fit") # foot/feet

    ...: | T("gus","gis") # goose/geese

    ...: | T("fɪʃ","fɪʃ")

    ...: | T("gutz","gutz")).optimize() # fish/fish

    ...:

    ...: lexicalExceptions = pynini.project(pluralExceptions)

    ...:

    ...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()

    ...:

    ...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()

    ...:

    ...: def makePl(stem):

    ...: x = (((A(stem)) @ plural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # the full lexicon

    ...:

    ...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()

    ...:

    ...: lexiconPlural = lexicon @ plural @ phonology

    ...:

    ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

    ...:


In [37]: vowel

Out[37]:




In [38]: (vowel.closure(2,2)).optimize()

Out[38]:




In [39]: vowel

Out[39]:




In [40]: %load EngPlurals.py


In [41]: # %load EngPlurals.py

    ...:

    ...: # Jeff Heinz

    ...: # October 2018

    ...: # English Plural with Pynini

    ...:

    ...: import pynini

    ...: import functools

    ...:

    ...: A = functools.partial(pynini.acceptor, token_type="utf8")

    ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

    ...:

    ...:

    ...: # some natural classes

    ...:

    ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

    ...:

    ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

    ...:

    ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

    ...:

    ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

    ...:

    ...: # all segments

    ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

    ...:

    ...:

    ...: # phonological changes

    ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

    ...:

    ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

    ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

    ...:

    ...: phonology = (epenthesis @ devoicing).optimize()

    ...:

    ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

    ...:

    ...: wish = A("wɪʃ")

    ...: cat = A("kæt")

    ...: cable = A("keɪbl")

    ...: dog = A("dɔg")

    ...:

    ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

    ...: wishes = wishesT.stringify(token_type="utf8")

    ...:

    ...:

    ...: # THIS WORKS! :-) ☺

    ...:

    ...: def makeRegPl(stem):

    ...: x = (((A(stem)) @ regPlural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # a lexicon of memorized exceptions

    ...:

    ...: pluralExceptions = ( T("fʊt","fit") # foot/feet

    ...: | T("gus","gis") # goose/geese

    ...: | T("fɪʃ","fɪʃ")

    ...: | T("gutz","gutz")).optimize() # fish/fish

    ...:

    ...: lexicalExceptions = pynini.project(pluralExceptions)

    ...:

    ...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()

    ...:

    ...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()

    ...:

    ...: def makePl(stem):

    ...: x = (((A(stem)) @ plural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # the full lexicon

    ...:

    ...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()

    ...:

    ...: lexiconPlural = lexicon @ plural @ phonology

    ...:

    ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

    ...:


In [42]: vowel

Out[42]:




In [43]: vowel.closure(2,2)

Out[43]:




In [44]: ((A("x")).closure(3,3)).optimize()

Out[44]:




In [45]: ((A("x")).closure(3,4)).optimize()

Out[45]:




In [46]: ((A("x")).closure(3,2)).optimize()

Out[46]:




In [47]: ((A("x")).closure(0,0)).optimize()

Out[47]:




In [48]: ((A("x")).closure(1,0)).optimize()

Out[48]:




In [49]: ((A("x")).closure(1,2)).optimize()

Out[49]:




In [50]: ((A("x")).closure(2,2)).optimize()

Out[50]:




In [51]: x + x

---------------------------------------------------------------------------

NameError Traceback (most recent call last)

<ipython-input-51-e2d9e13b7cb9> in <module>()

----> 1 x + x


NameError: name 'x' is not defined


In [52]: %load EngPlurals.py


In [53]: # %load EngPlurals.py

    ...:

    ...: # Jeff Heinz

    ...: # October 2018

    ...: # English Plural with Pynini

    ...:

    ...: import pynini

    ...: import functools

    ...:

    ...: A = functools.partial(pynini.acceptor, token_type="utf8")

    ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

    ...:

    ...:

    ...: # some natural classes

    ...:

    ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

    ...:

    ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

    ...:

    ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

    ...:

    ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

    ...:

    ...: # all segments

    ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

    ...:

    ...:

    ...: # phonological changes

    ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

    ...:

    ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

    ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

    ...:

    ...: phonology = (epenthesis @ devoicing).optimize()

    ...:

    ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

    ...:

    ...: wish = A("wɪʃ")

    ...: cat = A("kæt")

    ...: cable = A("keɪbl")

    ...: dog = A("dɔg")

    ...:

    ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

    ...: wishes = wishesT.stringify(token_type="utf8")

    ...:

    ...:

    ...: # THIS WORKS! :-) ☺

    ...:

    ...: def makeRegPl(stem):

    ...: x = (((A(stem)) @ regPlural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # a lexicon of memorized exceptions

    ...:

    ...: pluralExceptions = ( T("fʊt","fit") # foot/feet

    ...: | T("gus","gis") # goose/geese

    ...: | T("fɪʃ","fɪʃ")

    ...: | T("gutz","gutz")).optimize() # fish/fish

    ...:

    ...: lexicalExceptions = pynini.project(pluralExceptions)

    ...:

    ...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()

    ...:

    ...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()

    ...:

    ...: def makePl(stem):

    ...: x = (((A(stem)) @ plural) @ phonology).optimize()

    ...: y = pynini.project(x,True)

    ...: return y.stringify(token_type="utf8")

    ...:

    ...:

    ...: # the full lexicon

    ...:

    ...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()

    ...:

    ...: lexiconPlural = lexicon @ plural @ phonology

    ...:

    ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

    ...:


In [53]:


In [54]: lexicon

Out[54]:




In [55]: lexicon.text()

Out[55]: '0\t1\td\td\n0\t2\tf\tf\n0\t3\tg\tg\n0\t4\tk\tk\n0\t5\tw\tw\n1\t6\tɔ\tɔ\n2\t7\tɪ\tɪ\n2\t8\tʊ\tʊ\n3\t9\tu\tu\n4\t8\t<0xe6>\t<0xe6>\n5\t7\tɪ\tɪ\n6\t10\tg\tg\n7\t10\tʃ\tʃ\n8\t10\tt\tt\n9\t10\ts\ts\n9\t11\tt\tt\n10\n11\t10\tz\tz\n'


In [56]: print (lexicon.text())

0 1 d d

0 2 f f

0 3 g g

0 4 k k

0 5 w w

1 6 ɔ ɔ

2 7 ɪ ɪ

2 8 ʊ ʊ

3 9 u u

4 8 <0xe6> <0xe6>

5 7 ɪ ɪ

6 10 g g

7 10 ʃ ʃ

8 10 t t

9 10 s s

9 11 t t

10

11 10 z z



In [57]: