Type "copyright", "credits" or "license" for more information.
IPython 5.1.0 -- An enhanced Interactive Python.
? -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help -> Python's own help system.
object? -> Details about 'object', use 'object??' for extra details.
In [1]: cd /media/sf_pynini/
/media/sf_pynini
In [2]: ls
2.dot* irregular-plurals.tsv* p2.eps*
english_phonemes.txt* irregular-plurals.tsv~* __pycache__/
english_phonemes.txt~* irrLexicon.dot* regular-plurals.tsv*
EngPlurals.py* irrLexicon.jpg* regular-plurals.tsv~*
EngPlurals.py~* irrs.dot* upper.dot*
example.py* irrs.jpg* upper.jpg*
exceptions.dot* lower.dot* wish2.dot*
exceptions.eps* lower.jpg* wish.dot*
exceptions.jpg* p1.dot* wish.jpg*
ipa.txt* p1.eps*
ipa.txt~* p1.pdf*
In [3]: %load EngPlurals.py
In [4]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS DOES NOT WORK
...: # def makeRegPl(stem):
...: # x = ((A(stem) @ regPlural) @ phonology).optimize()
...: # return (x.stringify(token_type="utf8"))
...:
...:
...: # a lexicon of memorized exceptions
...:
...: pluralExceptions = ( T("fʊt","fit") # foot/feet
...: | T("gus","gis") # goose/geese
...: | T("fɪʃ","fɪʃ") ).optimize() # fish/fish
...:
...: lexicalExceptions = pynini.project(irrPlural)
...:
...: wrongPlural = (lexicalExceptions @ regPlural).optimize()
...:
...: regPluralLessExceptions = (regPlural - wrongPlural).optimize()
...:
...: plural = (pluralLessExceptions | pluralExceptions).optimize()
...:
...: # a lexicon of nouns for which plural morphology applies productively and normally.
...:
...: lexicon = (wish | cat | dog | cable | foot | goose | fish).optimize()
...:
...: dog_LEX = T("🐕","dɔg") # just playing around with unicode
...:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-4-6008b9e7296c> in <module>()
57 | T("fɪʃ","fɪʃ") ).optimize() # fish/fish
58
---> 59 lexicalExceptions = pynini.project(irrPlural)
60
61 wrongPlural = (lexicalExceptions @ regPlural).optimize()
NameError: name 'irrPlural' is not defined
In [5]: %load EngPlurals.py
In [6]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS DOES NOT WORK
...: def makeRegPl(stem):
...: x = (((A(stem)) @ regPlural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # a lexicon of memorized exceptions
...:
...: pluralExceptions = ( T("fʊt","fit") # foot/feet
...: | T("gus","gis") # goose/geese
...: | T("fɪʃ","fɪʃ") ).optimize() # fish/fish
...:
...: lexicalExceptions = pynini.project(pluralExceptions)
...:
...: wrongPlural = (lexicalExceptions @ regPlural).optimize()
...:
...:
...: regPluralz = pynini.arcmap(regPlural, map_type="rmweight")
...: wrongPluralz = pynini.arcmap(wrongPlural, map_type="rmweight")
...:
...: regPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)
...: wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)
...:
...: regPluralLessExceptions = (regPlural - wrongPlural).optimize()
...:
...: # plural = (pluralLessExceptions | pluralExceptions).optimize()
...:
...: # the full lexicon
...:
...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
---------------------------------------------------------------------------
FstOpError Traceback (most recent call last)
<ipython-input-6-22e8d47313f7> in <module>()
69 wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)
70
---> 71 regPluralLessExceptions = (regPlural - wrongPlural).optimize()
72
73 # plural = (pluralLessExceptions | pluralExceptions).optimize()
pynini.pyx in pynini.Fst.__sub__()
pynini.pyx in pynini._difference_patch.patch()
FstOpError: 2nd argument must be an unweighted acceptor
In [7]: %load EngPlurals.py
In [8]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS WORKS! :-) ☺
...:
...: def makeRegPl(stem):
...: x = (((A(stem)) @ regPlural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # a lexicon of memorized exceptions
...:
...: # pluralExceptions = ( T("fʊt","fit") # foot/feet
...: | T("gus","gis") # goose/geese
...: | T("fɪʃ","fɪʃ") ).optimize() # fish/fish
...:
...: # lexicalExceptions = pynini.project(pluralExceptions)
...:
...: # wrongPlural = (lexicalExceptions @ regPlural).optimize()
...:
...: # regPluralz = pynini.arcmap(regPlural, map_type="rmweight")
...: # wrongPluralz = pynini.arcmap(wrongPlural, map_type="rmweight")
...:
...: # regPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)
...: # wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)
...:
...: # regPluralLessExceptions = (regPlural - wrongPlural).optimize()
...:
...: # plural = (pluralLessExceptions | pluralExceptions).optimize()
...:
...: # the full lexicon
...:
...: # lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
File "<ipython-input-8-8d532c30ea93>", line 58
| T("gus","gis") # goose/geese
^
IndentationError: unexpected indent
In [9]: %load EngPlurals.py
In [10]: # %load EngPlurals.py
...:
...: # Jeff Heinz
...: # October 2018
...: # English Plural with Pynini
...:
...: import pynini
...: import functools
...:
...: A = functools.partial(pynini.acceptor, token_type="utf8")
...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")
...:
...:
...: # some natural classes
...:
...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()
...:
...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()
...:
...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()
...:
...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()
...:
...: # all segments
...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()
...:
...:
...: # phonological changes
...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)
...:
...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()
...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)
...:
...: phonology = (epenthesis @ devoicing).optimize()
...:
...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'
...:
...: wish = A("wɪʃ")
...: cat = A("kæt")
...: cable = A("keɪbl")
...: dog = A("dɔg")
...:
...: wishesT = ((wish @ regPlural) @ phonology).optimize()
...: wishes = wishesT.stringify(token_type="utf8")
...:
...:
...: # THIS WORKS! :-) ☺
...:
...: def makeRegPl(stem):
...: x = (((A(stem)) @ regPlural) @ phonology).optimize()
...: y = pynini.project(x,True)
...: return y.stringify(token_type="utf8")
...:
...:
...: # a lexicon of memorized exceptions
...:
...: # pluralExceptions = ( T("fʊt","fit") # foot/feet
...: # | T("gus","gis") # goose/geese
...: # | T("fɪʃ","fɪʃ") ).optimize() # fish/fish
...:
...: # lexicalExceptions = pynini.project(pluralExceptions)
...:
...: # wrongPlural = (lexicalExceptions @ regPlural).optimize()
...:
...: # regPluralz = pynini.arcmap(regPlural, map_type="rmweight")
...: # wrongPluralz = pynini.arcmap(wrongPlural, map_type="rmweight")
...:
...: # regPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)
...: # wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)
...:
...: # regPluralLessExceptions = (regPlural - wrongPlural).optimize()
...:
...: # plural = (pluralLessExceptions | pluralExceptions).optimize()
...:
...: # the full lexicon
...:
...: # lexicon = (wish | cat | dog | lexicalExceptions).optimize()
...:
...: dogLEX = T("🐕","dɔg") # just playing around with unicode
...:
In [11]: wish
Out[11]:
In [12]: wishesT
Out[12]:
In [13]: wishes
Out[13]: 'wɪʃɪz'
In [14]: wishesT.project(wishes,TRUE)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-14-5b14c795bb05> in <module>()
----> 1 wishesT.project(wishes,TRUE)
NameError: name 'TRUE' is not defined
In [15]: wishesT.project(wishes,True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-15-5cc8dfafc061> in <module>()
----> 1 wishesT.project(wishes,True)
pywrapfst.pyx in pywrapfst._MutableFst.project()
TypeError: project() takes at most 1 positional argument (2 given)
In [16]: wishesT.project(True)
Out[16]:
In [17]: wishesT
Out[17]:
In [18]: wishez = wishesT
In [19]: wishez.stringify(token_type="utf8")
Out[19]: 'wɪʃɪz'
In [20]: makeRegPl("bug")
Out[20]: 'bugz'
In [21]: makeRegPl("buk")
Out[21]: 'buks'
In [22]: makeRegPl("bus")
Out[22]: 'busɪz'
In [23]: makeRegPl("buS")
---------------------------------------------------------------------------
FstArgError Traceback (most recent call last)
<ipython-input-23-fe2793cccfb6> in <module>()
----> 1 makeRegPl("buS")
<ipython-input-10-e043cdfff2c4> in makeRegPl(stem)
50 x = (((A(stem)) @ regPlural) @ phonology).optimize()
51 y = pynini.project(x,True)
---> 52 return y.stringify(token_type="utf8")
53
54
pynini.pyx in pynini.Fst.stringify()
pynini.pyx in pynini.Fst.stringify()
FstArgError: FST is not a string
In [24]: makeRegPl("kat")
---------------------------------------------------------------------------
FstArgError Traceback (most recent call last)
<ipython-input-24-5d8e900c41c9> in <module>()
----> 1 makeRegPl("kat")
<ipython-input-10-e043cdfff2c4> in makeRegPl(stem)
50 x = (((A(stem)) @ regPlural) @ phonology).optimize()
51 y = pynini.project(x,True)
---> 52 return y.stringify(token_type="utf8")
53
54
pynini.pyx in pynini.Fst.stringify()
pynini.pyx in pynini.Fst.stringify()
FstArgError: FST is not a string
In [25]: makeRegPl("kit")
Out[25]: 'kits'
In [26]: makeRegPl("rug")
---------------------------------------------------------------------------
FstArgError Traceback (most recent call last)
<ipython-input-26-cc4618bab748> in <module>()
----> 1 makeRegPl("rug")
<ipython-input-10-e043cdfff2c4> in makeRegPl(stem)
50 x = (((A(stem)) @ regPlural) @ phonology).optimize()
51 y = pynini.project(x,True)
---> 52 return y.stringify(token_type="utf8")
53
54
pynini.pyx in pynini.Fst.stringify()
pynini.pyx in pynini.Fst.stringify()
FstArgError: FST is not a string
In [27]: makeRegPl("bug")
Out[27]: 'bugz'
In [28]: exit