from germanetpy.compoundInfo import CompoundInfo, CompoundCategory, CompoundProperty
from germanetpy.lexunit import Lexunit, LexRel
from germanetpy.synset import Synset, WordCategory, WordClass
from germanetpy.utils import convert_to_boolean
# Lexunit xml attribute values
LEXID = 'id'
SENSE = 'sense'
SOURCE = 'source'
NAMEDENTITY = 'namedEntity'
STYLE = 'styleMarking'
ARTIFICIAL = 'artificial'
ORTHFORM = 'orthForm'
ORTHVAR = 'orthVar'
OLDORTHFORM = 'oldOrthForm'
OLDORTHVAR = 'oldOrthVar'
PARTICLE = 'particle'
BASEVERB = 'baseVerb'
COMMENT = 'comment'
COMPOUND = 'compound'
FRAME = 'frame'
EXAMPLE = 'example'
LEXUNIT = "lexUnit"
# Synset xml attribute values
SYNID = 'id'
WORDCLASS = 'class'
WORDCATEGORY = 'category'
[docs]
def get_attribute_element(attributes, element: str, enum):
"""
Constructs an Emum object of a given attribute
:rtype: FastEnum
:type enum: FastEnum
:type attributes: XML attributes
:param attributes: XML attributes of a certain XML node
:param elment: A String
:param enum: The Enum object that should be initialized
:return: The corresponding Enum object or None
"""
if element in attributes:
return enum[attributes[element]]
return None
[docs]
def get_attribute_element_without_enum(attributes, element: str):
"""
Returns attribute value if attribute exists
:type attributes: XML attributes
:param attributes: XML attributes of a certain XML node
:param elment: A String
:return: The corresponding object or None
"""
if element in attributes:
return [attributes[element]]
return None
[docs]
def create_compound_info(child) -> CompoundInfo:
"""
Creates a compound info object. This has a modifier (String) and a head (String). Each modifier and the head can
have a property (CompoundProperty) and a category (CompoundCategory).
:param child: the XML element
:return: A CompoundInfo object
"""
assert len(child) > 0, "wrong data format"
modifier1 = child[0]
modifier1prop = get_attribute_element(modifier1.attrib, CompoundInfo.PROPERTY, CompoundProperty)
modifier1cat = get_attribute_element(modifier1.attrib, CompoundInfo.CATEGORY, CompoundCategory)
mod1lexUnitId1 = get_attribute_element_without_enum(modifier1.attrib, CompoundInfo.XML_LEX_UNIT_ID)
mod1lexUnitId2 = get_attribute_element_without_enum(modifier1.attrib, CompoundInfo.XML_LEX_UNIT_ID2)
mod1lexUnitId3 = get_attribute_element_without_enum(modifier1.attrib, CompoundInfo.XML_LEX_UNIT_ID3)
modifier2 = modifier2prop = modifier2cat = mod2lexUnitId1 = mod2lexUnitId2 = mod2lexUnitId3 = None
if len(child) == 3:
modifier2 = child[1]
head = child[2]
modifier2cat = get_attribute_element(modifier2.attrib, CompoundInfo.CATEGORY, CompoundCategory)
modifier2prop = get_attribute_element(modifier2.attrib, CompoundInfo.PROPERTY, CompoundProperty)
mod2lexUnitId1 = get_attribute_element_without_enum(modifier1.attrib, CompoundInfo.XML_LEX_UNIT_ID)
mod2lexUnitId2 = get_attribute_element_without_enum(modifier1.attrib, CompoundInfo.XML_LEX_UNIT_ID2)
mod2lexUnitId3 = get_attribute_element_without_enum(modifier1.attrib, CompoundInfo.XML_LEX_UNIT_ID3)
else:
head = child[1]
headprop = get_attribute_element(head.attrib, CompoundInfo.PROPERTY, CompoundProperty)
headLexUnitId = get_attribute_element_without_enum(head.attrib, CompoundInfo.XML_LEX_UNIT_ID)
headLexUnitIdArray = get_attribute_element_without_enum(head.attrib, CompoundInfo.XML_LEX_UNIT_ID)
if isinstance(headLexUnitId, list):
headLexUnitId = headLexUnitIdArray[0]
compound = CompoundInfo(modifier1.text, head.text, modifier2, modifier1prop, modifier1cat, mod1lexUnitId1,
mod1lexUnitId2, mod1lexUnitId3, modifier2prop, modifier2cat, mod2lexUnitId1, mod2lexUnitId2,
mod2lexUnitId3, headprop, headLexUnitId)
return compound
[docs]
def load_lexunits(germanet, tree):
"""
Takes the XML tree and walks trough it to create the Lexunit objects.
:type tree: Element Tree
:type germanet: Germanet
:param germanet: the germanet object
:param tree: XML tree
"""
root = tree.getroot()
for child in root:
attribute = child.attrib
syn_id = attribute[SYNID]
category = get_attribute_element(attribute, WORDCATEGORY, WordCategory)
word_class = get_attribute_element(attribute, WORDCLASS, WordClass)
synset = Synset(syn_id, category, word_class)
germanet.synsets[synset.id] = synset
for sub_child in child:
if sub_child.tag == LEXUNIT:
lexunit = create_lexunit(germanet, sub_child.attrib, sub_child, synset)
germanet.lexunits[lexunit.id] = lexunit
germanet.wordcat2lexid[category.name].add(lexunit.id)
germanet.wordclass2lexid[word_class.name].add(lexunit.id)
synset.add_lexunit(lexunit)
for unit in synset.lexunits:
for lexunit in synset.lexunits:
if lexunit is not unit:
unit.relations[LexRel.has_synonym].add(lexunit)
[docs]
def create_lexunit(germanet, attributes, lex_root, synset) -> Lexunit:
"""
Given the XML data, creates a Lexunit object.
:type attributes: XML attributes
:type germanet: Germanet
:param germanet: The germanet object.
:param attributes: The XML attributes.
:param lex_root: The XML root
:param synset: the corresponding synset object
:return: a lexical unit object
"""
lex_id = attributes[LEXID]
lex_sense = int(attributes[SENSE])
lex_source = attributes[SOURCE]
lex_named_entity = convert_to_boolean(attributes[NAMEDENTITY])
lex_artificial = convert_to_boolean(attributes[ARTIFICIAL])
lex_style = convert_to_boolean(attributes[STYLE])
lexunit = Lexunit(id=lex_id, sense=lex_sense, source=lex_source, named_entity=lex_named_entity, synset=synset,
artificial=lex_artificial, style_marking=lex_style)
for child in lex_root:
tag = child.tag
child_value = child.text
if tag == COMPOUND:
compound = create_compound_info(child)
lexunit._compound_info = compound
germanet.compounds.add(lexunit)
elif "rth" in tag:
add_orth_forms(germanet, lexunit, child_value, tag)
elif tag == PARTICLE:
lexunit._particle = child_value
elif tag == BASEVERB:
lexunit._base_verb = child_value
elif tag == COMMENT:
lexunit._comment = child_value
elif tag == FRAME:
lexunit.frames.append(child_value)
for f in lexunit.frames:
germanet.frames2lexunits[f].add(lexunit)
elif tag == EXAMPLE:
example = child[0].text
lexunit.examples.append(example)
if len(child) == 2:
exframe = child[1].text
lexunit.frames2examples[exframe].add(example)
return lexunit