Type "copyright", "credits" or "license" for more information.


IPython 5.1.0 -- An enhanced Interactive Python.

? -> Introduction and overview of IPython's features.

%quickref -> Quick reference.

help -> Python's own help system.

object? -> Details about 'object', use 'object??' for extra details.


In [1]: cd /media/sf_pynini/

/media/sf_pynini


In [2]: ls

2.dot* irregular-plurals.tsv* p2.eps*

english_phonemes.txt* irregular-plurals.tsv~* __pycache__/

english_phonemes.txt~* irrLexicon.dot* regular-plurals.tsv*

EngPlurals.py* irrLexicon.jpg* regular-plurals.tsv~*

EngPlurals.py~* irrs.dot* upper.dot*

example.py* irrs.jpg* upper.jpg*

exceptions.dot* lower.dot* wish2.dot*

exceptions.eps* lower.jpg* wish.dot*

exceptions.jpg* p1.dot* wish.jpg*

ipa.txt* p1.eps*

ipa.txt~* p1.pdf*


In [3]: %load EngPlurals.py


In [4]: # %load EngPlurals.py

   ...:

   ...: # Jeff Heinz

   ...: # October 2018

   ...: # English Plural with Pynini

   ...:

   ...: import pynini

   ...: import functools

   ...:

   ...: A = functools.partial(pynini.acceptor, token_type="utf8")

   ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

   ...:

   ...:

   ...: # some natural classes

   ...:

   ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

   ...:

   ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

   ...:

   ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

   ...:

   ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

   ...:

   ...: # all segments

   ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

   ...:

   ...:

   ...: # phonological changes

   ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

   ...:

   ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

   ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

   ...:

   ...: phonology = (epenthesis @ devoicing).optimize()

   ...:

   ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

   ...:

   ...: wish = A("wɪʃ")

   ...: cat = A("kæt")

   ...: cable = A("keɪbl")

   ...: dog = A("dɔg")

   ...:

   ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

   ...: wishes = wishesT.stringify(token_type="utf8")

   ...:

   ...:

   ...: # THIS DOES NOT WORK

   ...: # def makeRegPl(stem):

   ...: # x = ((A(stem) @ regPlural) @ phonology).optimize()

   ...: # return (x.stringify(token_type="utf8"))

   ...:

   ...:

   ...: # a lexicon of memorized exceptions

   ...:

   ...: pluralExceptions = ( T("fʊt","fit") # foot/feet

   ...: | T("gus","gis") # goose/geese

   ...: | T("fɪʃ","fɪʃ") ).optimize() # fish/fish

   ...:

   ...: lexicalExceptions = pynini.project(irrPlural)

   ...:

   ...: wrongPlural = (lexicalExceptions @ regPlural).optimize()

   ...:

   ...: regPluralLessExceptions = (regPlural - wrongPlural).optimize()

   ...:

   ...: plural = (pluralLessExceptions | pluralExceptions).optimize()

   ...:

   ...: # a lexicon of nouns for which plural morphology applies productively and normally.

   ...:

   ...: lexicon = (wish | cat | dog | cable | foot | goose | fish).optimize()

   ...:

   ...: dog_LEX = T("🐕","dɔg") # just playing around with unicode

   ...:

---------------------------------------------------------------------------

NameError Traceback (most recent call last)

<ipython-input-4-6008b9e7296c> in <module>()

57 | T("fɪʃ","fɪʃ") ).optimize() # fish/fish

58

---> 59 lexicalExceptions = pynini.project(irrPlural)

60

61 wrongPlural = (lexicalExceptions @ regPlural).optimize()


NameError: name 'irrPlural' is not defined


In [5]: %load EngPlurals.py


In [6]: # %load EngPlurals.py

   ...:

   ...: # Jeff Heinz

   ...: # October 2018

   ...: # English Plural with Pynini

   ...:

   ...: import pynini

   ...: import functools

   ...:

   ...: A = functools.partial(pynini.acceptor, token_type="utf8")

   ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

   ...:

   ...:

   ...: # some natural classes

   ...:

   ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

   ...:

   ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

   ...:

   ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

   ...:

   ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

   ...:

   ...: # all segments

   ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

   ...:

   ...:

   ...: # phonological changes

   ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

   ...:

   ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

   ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

   ...:

   ...: phonology = (epenthesis @ devoicing).optimize()

   ...:

   ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

   ...:

   ...: wish = A("wɪʃ")

   ...: cat = A("kæt")

   ...: cable = A("keɪbl")

   ...: dog = A("dɔg")

   ...:

   ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

   ...: wishes = wishesT.stringify(token_type="utf8")

   ...:

   ...:

   ...: # THIS DOES NOT WORK

   ...: def makeRegPl(stem):

   ...: x = (((A(stem)) @ regPlural) @ phonology).optimize()

   ...: y = pynini.project(x,True)

   ...: return y.stringify(token_type="utf8")

   ...:

   ...:

   ...: # a lexicon of memorized exceptions

   ...:

   ...: pluralExceptions = ( T("fʊt","fit") # foot/feet

   ...: | T("gus","gis") # goose/geese

   ...: | T("fɪʃ","fɪʃ") ).optimize() # fish/fish

   ...:

   ...: lexicalExceptions = pynini.project(pluralExceptions)

   ...:

   ...: wrongPlural = (lexicalExceptions @ regPlural).optimize()

   ...:

   ...:

   ...: regPluralz = pynini.arcmap(regPlural, map_type="rmweight")

   ...: wrongPluralz = pynini.arcmap(wrongPlural, map_type="rmweight")

   ...:

   ...: regPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)

   ...: wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)

   ...:

   ...: regPluralLessExceptions = (regPlural - wrongPlural).optimize()

   ...:

   ...: # plural = (pluralLessExceptions | pluralExceptions).optimize()

   ...:

   ...: # the full lexicon

   ...:

   ...: lexicon = (wish | cat | dog | lexicalExceptions).optimize()

   ...:

   ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

   ...:

---------------------------------------------------------------------------

FstOpError Traceback (most recent call last)

<ipython-input-6-22e8d47313f7> in <module>()

69 wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)

70

---> 71 regPluralLessExceptions = (regPlural - wrongPlural).optimize()

72

73 # plural = (pluralLessExceptions | pluralExceptions).optimize()


pynini.pyx in pynini.Fst.__sub__()


pynini.pyx in pynini._difference_patch.patch()


FstOpError: 2nd argument must be an unweighted acceptor


In [7]: %load EngPlurals.py


In [8]: # %load EngPlurals.py

   ...:

   ...: # Jeff Heinz

   ...: # October 2018

   ...: # English Plural with Pynini

   ...:

   ...: import pynini

   ...: import functools

   ...:

   ...: A = functools.partial(pynini.acceptor, token_type="utf8")

   ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

   ...:

   ...:

   ...: # some natural classes

   ...:

   ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

   ...:

   ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

   ...:

   ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

   ...:

   ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

   ...:

   ...: # all segments

   ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

   ...:

   ...:

   ...: # phonological changes

   ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

   ...:

   ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

   ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

   ...:

   ...: phonology = (epenthesis @ devoicing).optimize()

   ...:

   ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

   ...:

   ...: wish = A("wɪʃ")

   ...: cat = A("kæt")

   ...: cable = A("keɪbl")

   ...: dog = A("dɔg")

   ...:

   ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

   ...: wishes = wishesT.stringify(token_type="utf8")

   ...:

   ...:

   ...: # THIS WORKS! :-) ☺

   ...:

   ...: def makeRegPl(stem):

   ...: x = (((A(stem)) @ regPlural) @ phonology).optimize()

   ...: y = pynini.project(x,True)

   ...: return y.stringify(token_type="utf8")

   ...:

   ...:

   ...: # a lexicon of memorized exceptions

   ...:

   ...: # pluralExceptions = ( T("fʊt","fit") # foot/feet

   ...: | T("gus","gis") # goose/geese

   ...: | T("fɪʃ","fɪʃ") ).optimize() # fish/fish

   ...:

   ...: # lexicalExceptions = pynini.project(pluralExceptions)

   ...:

   ...: # wrongPlural = (lexicalExceptions @ regPlural).optimize()

   ...:

   ...: # regPluralz = pynini.arcmap(regPlural, map_type="rmweight")

   ...: # wrongPluralz = pynini.arcmap(wrongPlural, map_type="rmweight")

   ...:

   ...: # regPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)

   ...: # wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)

   ...:

   ...: # regPluralLessExceptions = (regPlural - wrongPlural).optimize()

   ...:

   ...: # plural = (pluralLessExceptions | pluralExceptions).optimize()

   ...:

   ...: # the full lexicon

   ...:

   ...: # lexicon = (wish | cat | dog | lexicalExceptions).optimize()

   ...:

   ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

   ...:

File "<ipython-input-8-8d532c30ea93>", line 58

| T("gus","gis") # goose/geese

^

IndentationError: unexpected indent



In [9]: %load EngPlurals.py


In [10]: # %load EngPlurals.py

   ...:

   ...: # Jeff Heinz

   ...: # October 2018

   ...: # English Plural with Pynini

   ...:

   ...: import pynini

   ...: import functools

   ...:

   ...: A = functools.partial(pynini.acceptor, token_type="utf8")

   ...: T = functools.partial(pynini.transducer, input_token_type="utf8", output_token_type="utf8")

   ...:

   ...:

   ...: # some natural classes

   ...:

   ...: vowel = (A("i") | A("ɪ") | A("e") | A("ɛ") | A("æ") | A("u") | A("ʊ") | A("o") | A("ɔ") | A("ɑ") | A("ə") | A("ʌ")).optimize()

   ...:

   ...: consonant = (A("p") | A("b") | A("t") | A("d") | A("k") | A("g") | A("m") | A("n") | A("ŋ") | A("f") | A("v") | A("θ") | A("ð") | A("s") | A("z") | A("ʃ") | A("ʒ") | A("h") | A("ʤ") | A("ʧ") | A("j") | A("w") | A("ɹ") | A("l")).optimize()

   ...:

   ...: strident = (A("s") | A("z") | A("ʃ") | A("ʒ") | A("ʤ") | A("ʧ")).optimize()

   ...:

   ...: voiceless = (A("p") | A("t") | A("k") | A("f") | A("θ") | A("s") | A("ʃ") | A("ʧ")).optimize()

   ...:

   ...: # all segments

   ...: sigmaStar = (pynini.closure(vowel | consonant)).optimize()

   ...:

   ...:

   ...: # phonological changes

   ...: epenthesis = pynini.cdrewrite(T("","ɪ"), strident, strident, sigmaStar)

   ...:

   ...: devoicePairs = (T("b","p") | T("d","t") | T("g","k") | T("v","f") | T("ð","θ") | T("z","s") | T("ʒ","ʃ") | T("ʤ","ʧ")).optimize()

   ...: devoicing = pynini.cdrewrite(devoicePairs, voiceless, "", sigmaStar)

   ...:

   ...: phonology = (epenthesis @ devoicing).optimize()

   ...:

   ...: regPlural = (sigmaStar + T("", "z")).optimize() # plural morphology suffixes a 'z'

   ...:

   ...: wish = A("wɪʃ")

   ...: cat = A("kæt")

   ...: cable = A("keɪbl")

   ...: dog = A("dɔg")

   ...:

   ...: wishesT = ((wish @ regPlural) @ phonology).optimize()

   ...: wishes = wishesT.stringify(token_type="utf8")

   ...:

   ...:

   ...: # THIS WORKS! :-) ☺

   ...:

   ...: def makeRegPl(stem):

   ...: x = (((A(stem)) @ regPlural) @ phonology).optimize()

   ...: y = pynini.project(x,True)

   ...: return y.stringify(token_type="utf8")

   ...:

   ...:

   ...: # a lexicon of memorized exceptions

   ...:

   ...: # pluralExceptions = ( T("fʊt","fit") # foot/feet

   ...: # | T("gus","gis") # goose/geese

   ...: # | T("fɪʃ","fɪʃ") ).optimize() # fish/fish

   ...:

   ...: # lexicalExceptions = pynini.project(pluralExceptions)

   ...:

   ...: # wrongPlural = (lexicalExceptions @ regPlural).optimize()

   ...:

   ...: # regPluralz = pynini.arcmap(regPlural, map_type="rmweight")

   ...: # wrongPluralz = pynini.arcmap(wrongPlural, map_type="rmweight")

   ...:

   ...: # regPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)

   ...: # wrongPluralz.set_properties(pynini.UNWEIGHTED, pynini.UNWEIGHTED)

   ...:

   ...: # regPluralLessExceptions = (regPlural - wrongPlural).optimize()

   ...:

   ...: # plural = (pluralLessExceptions | pluralExceptions).optimize()

   ...:

   ...: # the full lexicon

   ...:

   ...: # lexicon = (wish | cat | dog | lexicalExceptions).optimize()

   ...:

   ...: dogLEX = T("🐕","dɔg") # just playing around with unicode

   ...:


In [11]: wish

Out[11]:




In [12]: wishesT

Out[12]:




In [13]: wishes

Out[13]: 'wɪʃɪz'


In [14]: wishesT.project(wishes,TRUE)

---------------------------------------------------------------------------

NameError Traceback (most recent call last)

<ipython-input-14-5b14c795bb05> in <module>()

----> 1 wishesT.project(wishes,TRUE)


NameError: name 'TRUE' is not defined


In [15]: wishesT.project(wishes,True)

---------------------------------------------------------------------------

TypeError Traceback (most recent call last)

<ipython-input-15-5cc8dfafc061> in <module>()

----> 1 wishesT.project(wishes,True)


pywrapfst.pyx in pywrapfst._MutableFst.project()


TypeError: project() takes at most 1 positional argument (2 given)


In [16]: wishesT.project(True)

Out[16]:




In [17]: wishesT

Out[17]:




In [18]: wishez = wishesT


In [19]: wishez.stringify(token_type="utf8")

Out[19]: 'wɪʃɪz'


In [20]: makeRegPl("bug")

Out[20]: 'bugz'


In [21]: makeRegPl("buk")

Out[21]: 'buks'


In [22]: makeRegPl("bus")

Out[22]: 'busɪz'


In [23]: makeRegPl("buS")

---------------------------------------------------------------------------

FstArgError Traceback (most recent call last)

<ipython-input-23-fe2793cccfb6> in <module>()

----> 1 makeRegPl("buS")


<ipython-input-10-e043cdfff2c4> in makeRegPl(stem)

50 x = (((A(stem)) @ regPlural) @ phonology).optimize()

51 y = pynini.project(x,True)

---> 52 return y.stringify(token_type="utf8")

53

54


pynini.pyx in pynini.Fst.stringify()


pynini.pyx in pynini.Fst.stringify()


FstArgError: FST is not a string


In [24]: makeRegPl("kat")

---------------------------------------------------------------------------

FstArgError Traceback (most recent call last)

<ipython-input-24-5d8e900c41c9> in <module>()

----> 1 makeRegPl("kat")


<ipython-input-10-e043cdfff2c4> in makeRegPl(stem)

50 x = (((A(stem)) @ regPlural) @ phonology).optimize()

51 y = pynini.project(x,True)

---> 52 return y.stringify(token_type="utf8")

53

54


pynini.pyx in pynini.Fst.stringify()


pynini.pyx in pynini.Fst.stringify()


FstArgError: FST is not a string


In [25]: makeRegPl("kit")

Out[25]: 'kits'


In [26]: makeRegPl("rug")

---------------------------------------------------------------------------

FstArgError Traceback (most recent call last)

<ipython-input-26-cc4618bab748> in <module>()

----> 1 makeRegPl("rug")


<ipython-input-10-e043cdfff2c4> in makeRegPl(stem)

50 x = (((A(stem)) @ regPlural) @ phonology).optimize()

51 y = pynini.project(x,True)

---> 52 return y.stringify(token_type="utf8")

53

54


pynini.pyx in pynini.Fst.stringify()


pynini.pyx in pynini.Fst.stringify()


FstArgError: FST is not a string


In [27]: makeRegPl("bug")

Out[27]: 'bugz'


In [28]: exit