...: # the full lexicon
...:
...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: lexiconPlural = lexicon @ plural @ phonology
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
In [24]: makePl("gus")
Out[24]: 'gis'
In [25]: makePl("nus")
Out[25]: 'nusɪz'
In [26]: makePl("guz")
Out[26]: 'guz'
In [27]: %load EngPlurals.py
In [28]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS WORKS! :-) ☺
...:
...: def makeRegPl(stem):
...: x = (((A(stem)) @ regPlural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # a lexicon of memorized exceptions
...:
...: pluralExceptions = ( T("fʊt","fit") # foot/feet
...: | T("gus","gis") # goose/geese
...: | T("fɪʃ","fɪʃ")
...: | T("gutz","gutz")).optimize() # fish/fish
...:
...: lexicalExceptions = pynini.project(pluralExceptions)
...:
...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()
...:
...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()
...:
...: def makePl(stem):
...: x = (((A(stem)) @ plural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # the full lexicon
...:
...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: lexiconPlural = lexicon @ plural @ phonology
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
In [29]: makePl("gutz")
Out[29]: 'guts'
In [30]: vowel
Out[30]:
In [31]: vowel.plus
Out[31]:
In [32]: vowel
Out[32]:
In [33]: vowel.closure(2,2)
Out[33]:
In [34]: vowel
Out[34]:
In [35]: %load EngPlurals.py
In [36]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS WORKS! :-) ☺
...:
...: def makeRegPl(stem):
...: x = (((A(stem)) @ regPlural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # a lexicon of memorized exceptions
...:
...: pluralExceptions = ( T("fʊt","fit") # foot/feet
...: | T("gus","gis") # goose/geese
...: | T("fɪʃ","fɪʃ")
...: | T("gutz","gutz")).optimize() # fish/fish
...:
...: lexicalExceptions = pynini.project(pluralExceptions)
...:
...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()
...:
...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()
...:
...: def makePl(stem):
...: x = (((A(stem)) @ plural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # the full lexicon
...:
...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: lexiconPlural = lexicon @ plural @ phonology
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
In [37]: vowel
Out[37]:
In [38]: (vowel.closure(2,2)).optimize()
Out[38]:
In [39]: vowel
Out[39]:
In [40]: %load EngPlurals.py
In [41]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS WORKS! :-) ☺
...:
...: def makeRegPl(stem):
...: x = (((A(stem)) @ regPlural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # a lexicon of memorized exceptions
...:
...: pluralExceptions = ( T("fʊt","fit") # foot/feet
...: | T("gus","gis") # goose/geese
...: | T("fɪʃ","fɪʃ")
...: | T("gutz","gutz")).optimize() # fish/fish
...:
...: lexicalExceptions = pynini.project(pluralExceptions)
...:
...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()
...:
...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()
...:
...: def makePl(stem):
...: x = (((A(stem)) @ plural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # the full lexicon
...:
...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: lexiconPlural = lexicon @ plural @ phonology
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
In [42]: vowel
Out[42]:
In [43]: vowel.closure(2,2)
Out[43]:
In [44]: ((A("x")).closure(3,3)).optimize()
Out[44]:
In [45]: ((A("x")).closure(3,4)).optimize()
Out[45]:
In [46]: ((A("x")).closure(3,2)).optimize()
Out[46]:
In [47]: ((A("x")).closure(0,0)).optimize()
Out[47]:
In [48]: ((A("x")).closure(1,0)).optimize()
Out[48]:
In [49]: ((A("x")).closure(1,2)).optimize()
Out[49]:
In [50]: ((A("x")).closure(2,2)).optimize()
Out[50]:
In [51]: x + x
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-51-e2d9e13b7cb9> in <module>()
----> 1 x + x
NameError: name 'x' is not defined
In [52]: %load EngPlurals.py
In [53]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS WORKS! :-) ☺
...:
...: def makeRegPl(stem):
...: x = (((A(stem)) @ regPlural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # a lexicon of memorized exceptions
...:
...: pluralExceptions = ( T("fʊt","fit") # foot/feet
...: | T("gus","gis") # goose/geese
...: | T("fɪʃ","fɪʃ")
...: | T("gutz","gutz")).optimize() # fish/fish
...:
...: lexicalExceptions = pynini.project(pluralExceptions)
...:
...: otherWords = ((sigmaStar - lexicalExceptions)).optimize()
...:
...: plural = (pluralExceptions | (otherWords + T("", "z"))).optimize()
...:
...: def makePl(stem):
...: x = (((A(stem)) @ plural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # the full lexicon
...:
...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: lexiconPlural = lexicon @ plural @ phonology
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
In [53]:
In [54]: lexicon
Out[54]:
In [55]: lexicon.text()
Out[55]: '0\t1\td\td\n0\t2\tf\tf\n0\t3\tg\tg\n0\t4\tk\tk\n0\t5\tw\tw\n1\t6\tɔ\tɔ\n2\t7\tɪ\tɪ\n2\t8\tʊ\tʊ\n3\t9\tu\tu\n4\t8\t<0xe6>\t<0xe6>\n5\t7\tɪ\tɪ\n6\t10\tg\tg\n7\t10\tʃ\tʃ\n8\t10\tt\tt\n9\t10\ts\ts\n9\t11\tt\tt\n10\n11\t10\tz\tz\n'
In [56]: print (lexicon.text())
0 1 d d
0 2 f f
0 3 g g
0 4 k k
0 5 w w
1 6 ɔ ɔ
2 7 ɪ ɪ
2 8 ʊ ʊ
3 9 u u
4 8 <0xe6> <0xe6>
5 7 ɪ ɪ
6 10 g g
7 10 ʃ ʃ
8 10 t t
9 10 s s
9 11 t t
10
11 10 z z
In [57]: