#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Unit tests for Wiktionary.py"""
__version__ = '$Id: test_wiktionary.py 6794 2009-05-02 16:57:24Z cosoleto $'
import unittest
import test_utils
import wiktionary
class KnownValues(unittest.TestCase):
knownValues = (
('==English==', 'en', 2, 'lang'),
('=={{en}}==', 'en', 2, 'lang'),
('{{-en-}}', 'en', None, 'lang'),
('===Noun===', 'noun', 3, 'pos'),
('==={{noun}}===', 'noun', 3, 'pos'),
('{{-noun-}}', 'noun', None, 'pos'),
('===Verb===', 'verb', 3, 'pos'),
('==={{verb}}===', 'verb', 3, 'pos'),
('{{-verb-}}', 'verb', None, 'pos'),
('====Translations====', 'trans', 4, 'other'),
('===={{trans}}====', 'trans', 4, 'other'),
('{{-trans-}}', 'trans', None, 'other'),
)
def testHeaderInitKnownValuesContents(self):
"""Header parsing comparing known result with known input for contents"""
for wikiline, contents, level, type in self.knownValues:
result = wiktionary.Header(wikiline).contents
self.assertEqual(contents, result)
def testHeaderInitKnownValuesLevel(self):
"""Header parsing comparing known result with known input for level"""
for wikiline, contents, level, type in self.knownValues:
result = wiktionary.Header(wikiline).level
self.assertEqual(level, result)
def testHeaderInitKnownValuesType(self):
"""Header parsing comparing known result with known input for type"""
for wikiline, contents, level, type in self.knownValues:
result = wiktionary.Header(wikiline).type
self.assertEqual(type, result)
class SortEntriesCheckSortOrder(unittest.TestCase):
"""Entries should be sorted as follows on a page: Translingual first, Wikilang next, then the others alphabetically on the language name in the Wiktionary's language """
def testHeaderInitKnownValuesType(self):
"""Sorting order of Entries on a page"""
examples=((('en','C'),('eo', 'en', 'de', 'nl', 'es', 'translingual', 'fr'),
['translingual', 'en', 'nl', 'eo', 'fr', 'de', 'es']),
(('nl','C'),('eo', 'en', 'de', 'nl', 'es', 'translingual', 'fr'),
['translingual', 'nl', 'de', 'en', 'eo', 'fr', 'es']),
(('fr','C'),('eo', 'en', 'de', 'nl', 'es', 'translingual', 'fr'),
['translingual', 'fr', 'de', 'en', 'es', 'eo', 'nl']),
(('de','C'),('eo', 'en', 'de', 'nl', 'es', 'translingual', 'fr'),
['translingual', 'de', 'en', 'eo', 'fr', 'nl', 'es']),
)
for example in examples:
page = wiktionary.WiktionaryPage(example[0][0], example[0][1])
for lang in example[1]:
entry = wiktionary.Entry(lang)
page.addEntry(entry)
page.sortEntries()
self.assertEqual(page.sortedentries, example[2])
class TestKnownValuesInParser(unittest.TestCase):
"""This class will check various aspects of parsing Wiktionary entries into our object model"""
knownvalues=({'wikilang': 'en', 'term': 'nut', 'wikiformat': u"""==English==
===Etymology===
From Middle English [[nute]], from Old English [[hnutu]]. <!-- Is Latin [[nux]], nuc- a cognate? -->
===Pronunciation===
*[[w:AHD|AHD]]: nt
*[[w:IPA|IPA]]: /nt/
*[[w:SAMPA|SAMPA]]: /nVt/
===Noun===
'''nut''' (''plural'' '''[[nuts]]''')
#A hard-shelled seed.
#A piece of metal, often [[hexagonal]], with a hole through it with internal threading intended to fit on to a bolt.
#(''informal'') An insane person.
#(''slang'') The head.
#(''slang; rarely used in the singular'') A testicle.
====Synonyms====
*(''insane person''): [[loony]], [[nutcase]], [[nutter]]
*(''the head''): [[bonce]], [[noddle]] (See further synonyms under [[head]])
*(''a testicle''): [[ball]], [[bollock]] (''taboo slang''), [[nad]]
====Translations====
'''seed'''
{{top}}
<!--Put translations for languages from A to I here-->
*Dutch: [[noot]] ''f''
*French: ''no generic translation exists''; [[noix]] ''f'' ''is often used, but this actually means "[[walnut]]"''
*German: [[Nuss]] ''f''
*German: [[Nuss]] ''f''
*Italian: [[noce]] {{f}}
{{mid}}
<!--Put translations for languages from J to Z here-->
*Latin: [[nux]]
{{bottom}}
'''that fits on a bolt'''
{{top}}
<!--Put translations for languages from A to I here-->
*Dutch: [[moer]] ''f''
*French: [[crou]] ''m''
*German: [[Mutter]] ''f''
*Italian: [[dado]] {{m}}
{{mid}}
<!--Put translations for languages from J to Z here-->
{{bottom}}
'''informal: insane person'''
{{top}}
<!--Put translations for languages from A to I here-->
*Dutch: [[gek]] ''m'', [[gekkin]] ''f'', [[zot]] ''m'', [[zottin]] ''f''
*French: [[fou]] ''m'', [[folle]] ''f''
*German: [[Irre]] ''m/f'', [[Irrer]] ''m indef.''
{{mid}}
<!--Put translations for languages from J to Z here-->
{{bottom}}
'''slang: the head'''
{{top}}
<!--Put translations for languages from A to I here-->
*German: [[Birne]] ''f'', [[Rbe]] ''f'', [[Dtz]] ''m''
{{mid}}
<!--Put translations for languages from J to Z here-->
{{bottom}}
'''slang: testicle'''
{{top}}
<!--Put translations for languages from A to I here-->
*Dutch: [[noten]] ''m (plural)'' <!--Never heard this before-->, [[bal]] ''m'', [[teelbal]] ''m''
*French: [[couille]] ''f''
*German: [[Ei]] ''n'', ''lately:'' [[Nuss]] ''f''
{{mid}}
<!--Put translations for languages from J to Z here-->
*Spanish: [[cojone]], [[huevo]]
{{bottom}}
=====Translations to be checked=====
<!--Remove this section once all of the translations below have been moved into the tables above.-->
{{checktrans}}
The translations below need to be checked by native speakers and inserted into the appropriate table(s) above, removing any numbers. Any numbering associating translations with definitions is unreliable.
*[[Anglo-Saxon]]: [[hnutu]] ''f''
*Breton: [[krao]] ''collective noun'' [[kraoenn]] ''singular f'' (1), [[kraouenn]] ''f'' -o ''pl'' (2), [[brizh-sod]] (3), [[kell]] ''f'' divgell ''pl'' (4)
*Finnish: [[phkin]] (1), [[mutteri]] (2), [[hullu]] (3), [[ppi]] (3), [[muna#Finnish|muna]] (4), [[palli]] (4), [[kaali]] (5)
*Interlingua: [[nuce]] (1); [[matre vite]] (2); [[folle]] (3); [[teste]] (4), [[testiculo]] (4)
*Italian: [[noce]] ''f''
*Latvian: [[rieksts]] ''m'' (1), [[uzgrieznis]] ''m'' (2), [[trakais]] ''m'' (3), [[jucis]] ''m'' (3), [[pauts]] ''m'' (usually ''pl. - pauti'') (4)
*Polish: [[orzech]] ''m'' (1), [[nakrtka]] ''f'' (2), [[wir]] ''m'' (3)
*Portuguese: [[noz]] ''f'' (1); [[porca]] ''f'' (2); [[louco]] ''m'' (3), [[doido]] ''m'' (3), [[maluco]] ''m'' (3); [[bago]] ''m'' (4), [[ovo]] ''m'' (4)
*Romanian: [[nuc]] ''f'' (1), [[piuli]] ''f'' (2), [[nebun]] ''m'' (3) [[icnit]] ''m'' (3)
*Russian: [[]] ''m'' (1), [[]] (gaika/gajka) ''f'' (2), [[]] (sumasshedshij) ''m'' / [[]] (sumasshedshaya) ''f'' (3), [[]] (yaitso) ''n'' (4)
*Spanish: [[nuez]] (1), [[tuerca]] (2), [[chiflado]] (3), [[chalado]] (3)
*[[Tok Pisin]]: [[nat]] (2), [[longlongman]] (3), [[kiau]] (4), [[het]] (5)
====Related terms====
*[[coconut]]
*[[groundnut]]
*[[hazelnut]]
*[[peanut]]
*[[walnut]]
*[[nutbeam]]
*[[nutcase]]
*[[nutmeg]]
*[[NutRageous]]®
*[[nutshell]]
===Transitive verb===
'''to nut''' ('''nutting''', '''nutted''')
#(''slang'') To hit deliberately with the head; to [[headbutt]].
===Intransitive verb===
'''to nut''' ('''nutting''', '''nutted''')
#(''slang'') To [[ejaculate]] (''semen'').
----
==Dutch==
===Noun===
'''nut''' ''n''
# [[use]], [[benefit]]
[[io:nut]]
[[la:nut]]
[[Category:1000 English basic words]]
[[Category:Colors]]
[[Category:Browns]]
[[Category:Trees]]
[[category:Foods]]
""",
'internalrep':
(
[u'1000 English basic words',u'Colors',u'Browns',u'Trees',u'Foods'],
[u'io','la'],
{u'en':
[u'nut', None, u'nuts',
[{'definition': u'A hard-shelled seed', 'concisedef': u'seed',
'trans': {'nl': u"[[noot]] ''f''", 'fr': u"""''no generic translation exists''; [[noix]] ''f'' ''is often used, but this actually means "[[walnut]]"''""", 'de': u"[[Nuss]] ''f''", 'it': u"[[noce]] {{f}}", 'la': u"[[nux]]"}},
{'definition': u"A piece of metal, often [[hexagonal]], with a hole through it with internal threading intended to fit on to a bolt.", 'concisedef': u'that fits on a bolt',
'trans': {'nl': u"[[moer]] ''f''", 'fr': u"[[crou]] ''m''", 'de': u"[[Mutter]] ''f''", 'it': u"[[dado]] {{m}}"}},
{'definition': u"(''informal'') An insane person.", 'concisedef': u"'''informal: insane person'''",
'syns': u"[[loony]], [[nutcase]], [[nutter]]",
'trans': {'nl': u"[[gek]] ''m'', [[gekkin]] ''f'', [[zot]] ''m'', [[zottin]] ''f''", 'fr': "[[fou]] ''m'', [[folle]] ''f''", 'de': "[[Irre]] ''m/f'', [[Irrer]] ''m indef.''"}},
{'definition': u"(''slang'') The head.", 'concisedef': u"'''slang: the head'''",
'syns': u"[[bonce]], [[noddle]] (See further synonyms under [[head]])",
'trans': {'de': u"[[Birne]] ''f'', [[Rbe]] ''f'', [[Dtz]] ''m''"}},
{'definition': u"(''slang; rarely used in the singular'') A testicle.", 'concisedef': u"'''slang: testicle'''",
'syns': u"[[ball]], [[bollock]] (''taboo slang''), [[nad]]",
'trans': {'nl': u"[[noten]] ''m (plural)'' <!--Never heard this before-->, [[bal]] ''m'', [[teelbal]] ''m''", 'fr': u"[[couille]] ''f''", 'de': u"[[Ei]] ''n'', ''lately:'' [[Nuss]] ''f''", 'es': u"[[cojone]], [[huevo]]"}},
],
],
u'nl':
[u'nut', 'n', None,
[{'definition': u'[[use]], [[benefit]]'}]
],
}
)
},{'wikilang': 'en', 'term': 'nut', 'wikiformat': u"""[[category:Foods]]
[[category:Drinks]]""", 'internalrep': ([u'Foods', u'Drinks'],[],{})})
def testWhetherCategoriesAreParsedProperly(self):
"""Test whether Categories are parsed properly"""
for value in self.knownvalues:
internalrepresentation=value['internalrep']
apage = wiktionary.WiktionaryPage(value['wikilang'],value['term'])
apage.parseWikiPage(value['wikiformat'])
self.assertEqual(apage.categories, internalrepresentation[0])
def testWhetherLinksAreParsedProperly(self):
"""Test whether Links are parsed properly"""
for value in self.knownvalues:
internalrepresentation=value['internalrep']
apage = wiktionary.WiktionaryPage(value['wikilang'],value['term'])
apage.parseWikiPage(value['wikiformat'])
self.assertEqual(apage.interwikilinks, internalrepresentation[1])
def testWhetherDefsAreParsedProperly(self):
"""Test whether Definitions are parsed properly"""
for value in self.knownvalues:
internalrepresentation=value['internalrep'][2]
apage = wiktionary.WiktionaryPage(value['wikilang'],value['term'])
apage.parseWikiPage(value['wikiformat'])
for entrylang in internalrepresentation.keys():
term=internalrepresentation[entrylang][0]
gender=internalrepresentation[entrylang][1]
plural=internalrepresentation[entrylang][2]
definitions=internalrepresentation[entrylang][3]
refdefs=[]
for definition in definitions:
refdefs.append(definition['definition'])
resultmeanings=[]
for key in apage.entries[entrylang].meanings.keys():
for resultmeaning in apage.entries[entrylang].meanings[key]:
resultmeanings.append(resultmeaning.definition)
self.assertEqual(resultmeanings.sort(), refdefs.sort())
'''
class ToRomanBadInput(unittest.TestCase):
def testTooLarge(self):
"""toRoman should fail with large input"""
self.assertRaises(roman.OutOfRangeError, roman.toRoman, 4000)
'''
if __name__ == "__main__":
unittest.main()
|