from collections import defaultdict
import math
import fastenum
[docs]
class ConRel(fastenum.Enum):
"""
This Enum class contains the conceptual relations (short: ConRel) that synsets can have to other synsets.
For a description of each relation look at
https://uni-tuebingen.de/en/142846
"""
has_hypernym = 1
has_hyponym = 2
has_component_meronym = 3
has_component_holonym = 4
has_member_meronym = 5
has_member_holonym = 6
has_substance_meronym = 7
has_substance_holonym = 8
has_portion_meronym = 9
has_portion_holonym = 10
entails = 11
is_entailed_by = 12
is_related_to = 13
causes = 14
[docs]
@staticmethod
def transitive(conrel) -> bool:
"""
Returns true if the conceptual relation is transitive, false otherwise
:type conrel: ConRel
:param conrel: a conceptual relation
:return: true if the conceptual relation is transitive, false otherwise
"""
transitive_rels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
if conrel.value in transitive_rels:
return True
return False
[docs]
class WordCategory(fastenum.Enum):
"""
This Enum class contains the three part-of-speech tags (WortCategory), a Synset can have in GermaNet.
adj = adjective, nomen = noun, verben = verb
"""
adj = 1
nomen = 2
verben = 3
[docs]
@staticmethod
def get_possible_word_classes(word_category) -> set:
"""
Each wor category can only occur with a specific set of word classes.
:type word_category: WordCategory
:param word_category: The word category
:return: The set of word classes that occur with the given word category
"""
if word_category.value == 2:
return {WordClass.Tops, WordClass.Artefakt, WordClass.Attribut, WordClass.Besitz, WordClass.Relation,
WordClass.Geschehen, WordClass.Form, WordClass.Gefuehl, WordClass.Gruppe, WordClass.Koerper,
WordClass.Kognition, WordClass.Kommunikation, WordClass.Menge, WordClass.Mensch, WordClass.Motiv,
WordClass.Nahrung, WordClass.natGegenstand, WordClass.Ort, WordClass.Pflanze, WordClass.Substanz,
WordClass.Tier, WordClass.Zeit}
if word_category == 1:
return {WordClass.Allgemein, WordClass.Bewegung, WordClass.Relation, WordClass.Gefuehl,
WordClass.Gesellschaft, WordClass.Koerper, WordClass.Geist, WordClass.Menge,
WordClass.natGegenstand, WordClass.Ort, WordClass.Pertonym, WordClass.Substanz, WordClass.Verhalten,
WordClass.Perzeption, WordClass.Zeit, WordClass.privativ}
else:
return {WordClass.Allgemein, WordClass.Besitz, WordClass.Gefuehl, WordClass.Gesellschaft,
WordClass.Koerperfunktion, WordClass.Kognition, WordClass.Kommunikation, WordClass.Konkurrenz,
WordClass.Kontakt, WordClass.natPhaenomen, WordClass.Lokation, WordClass.Schoepfung,
WordClass.Veraenderung, WordClass.Verbrauch, WordClass.Perzeption}
[docs]
class WordClass(fastenum.Enum):
"""
This Enum class contains the semantic wordclasses / semantic fields a Synset can have in GermaNet.
For a detailed description see:
http://www.sfs.uni-tuebingen.de/GermaNet/germanet_structure.shtml#Tops
"""
Allgemein = 1
Bewegung = 2
Gefuehl = 3
Geist = 4
Gesellschaft = 5
Koerper = 6
Menge = 7
natPhaenomen = 8
Ort = 9
Pertonym = 10
Perzeption = 11
privativ = 12
Relation = 13
Substanz = 14
Verhalten = 15
Zeit = 16
Artefakt = 17
Attribut = 18
Besitz = 19
Form = 20
Geschehen = 21
Gruppe = 22
Kognition = 23
Kommunikation = 24
Mensch = 25
Motiv = 26
Nahrung = 27
natGegenstand = 28
Pflanze = 29
Tier = 30
Tops = 31
Koerperfunktion = 32
Konkurrenz = 33
Kontakt = 34
Lokation = 35
Schoepfung = 36
Veraenderung = 37
Verbrauch = 38
[docs]
@staticmethod
def get_possible_word_categories(word_class):
"""
Each word class can occur with one or several word categories.
:rtype: set(WordCategory)
:type word_class: WordClass
:param word_class: the word class to get the possible word categories for
:return: the set of word categories the given word class can occur with
"""
categories = set()
verb_classes = {37, 5, 1, 32, 3, 35, 11, 33, 38, 36, 19, 34, 8, 24, 23}
noun_classes = {26, 31, 14, 29, 7, 22, 24, 6, 9, 16, 23, 28, 3, 17, 20, 30, 27, 25, 19, 13, 18, 21}
adj_classes = {23, 32, 24, 19, 8, 11, 35, 33, 3, 34, 1, 37, 38, 5, 36}
if word_class.value in verb_classes:
categories.add(WordCategory.verben)
if word_class.value in noun_classes:
categories.add(WordCategory.nomen)
if word_class.value in adj_classes:
categories.add(WordCategory.adj)
return categories
[docs]
class Synset:
"""
This class holds a Synset object. A synset in GermaNet contains several lexical units and holds specific relations
to other synsets, for example a synset can have hypernyms or hyponyms.
"""
def __init__(self, id: str, word_category: WordCategory, word_class: WordClass):
"""
:param id: Every synset has a unique identifier.
:param word_category: Every Synset has exactly one part-of-speech
:param word_class: Every Synset has exactly one semantic class
"""
self._id = id
self._word_category = word_category
self._word_class = word_class
self._paraphrase = ""
self._lexunits = []
self._relations = defaultdict(set)
self._incoming_relations = defaultdict(set)
self._direct_hypernyms = self._relations[ConRel.has_hypernym]
self._direct_hyponyms = self._relations[ConRel.has_hyponym]
def __repr__(self):
lexunit_list = [f'{unit.orthform}' for unit in self._lexunits]
lexunit_str = ', '.join(lexunit_list)
return f'Synset(id={self._id}, lexunits={lexunit_str})'
[docs]
def add_lexunit(self, unit):
"""
Adds a lexical unit that part of this synset to the list of lexical units
:type unit: Lexunit
:param unit: The lexUnit object to be added
"""
self._lexunits.append(unit)
[docs]
def is_root(self) -> bool:
"""
:return: True if this Synset is the root of the Graph (= has no hypernyms), otherwise false
"""
return True if len(self._direct_hypernyms) == 0 else False
[docs]
def is_leaf(self) -> bool:
"""
:return: True if this Synset is a leaf of the Graph (= has no hyponyms), otherwise false
"""
return True if len(self._direct_hyponyms) == 0 else False
[docs]
def num_lexunits(self) -> int:
"""
:return: The number of lexical units, contained in that synset
"""
return len(self._lexunits)
[docs]
def hypernym_paths(self) -> list:
"""
This method iterates recursively through the hypernyms of this synset to get all paths that connect this synset
with the root node. a path is complete if it ends with the root node. all possible paths are returned. each
path is a list of nodes.
:return: A list of lists, each lists contains a node sequence connecting this synset with the root node
"""
paths = []
hypernyms = self._direct_hypernyms
if self.is_root():
paths = [[self]]
for hypernym in hypernyms:
for ancestor_list in hypernym.hypernym_paths():
ancestor_list.append(self)
paths.append(ancestor_list)
return paths
[docs]
def all_hypernyms(self) -> set:
"""
This method extracts all hypernyms for this synset (the transitive closure for this synset)
:return: a set, containing all possible hypernym nodes. it is empty if the current synset is the root node
"""
hypernyms = []
for path in self.hypernym_paths():
for synset in path:
if synset is not self:
hypernyms.append(synset)
return set(hypernyms)
[docs]
def hyponym_paths(self) -> list:
"""
This method iterates recursively through the hyponyms of this synset to get all paths that connect
this synset with a leaf node. A path is complete if it ends with a leaf node. All possible paths are
returned. Each path is a list of nodes.
:return: A list of lists, each lists contains a node sequence connecting this synset with a leaf node
"""
paths = []
hyponyms = self._direct_hyponyms
if self.is_leaf():
paths = [[self]]
for hyponym in hyponyms:
for ancestor_list in hyponym.hyponym_paths():
ancestor_list.append(self)
paths.append(ancestor_list)
return paths
[docs]
def all_hyponyms(self) -> set:
"""
This method returns all possible hyponyms of this synset.
:return: [set(Synset)] A set of synset nodes, each constitutes a hyponym of the current synset.
"""
hyponyms = []
for path in self.hyponym_paths():
for synset in path:
if synset is not self:
hyponyms.append(synset)
return set(hyponyms)
[docs]
def shortest_path_to_root(self) -> list:
"""
This method returns the shortest path to the root node.
:return: [list(Synset)] shortest path to the root node.
"""
paths = self.hypernym_paths()
print(paths)
length_array = []
for path1 in self.hypernym_paths():
length_array.append(len(path1))
shortest_index = length_array.index(min(length_array))
return paths[shortest_index]
[docs]
def common_hypernyms(self, other) -> set:
"""
Given another synset, this method computes shared hypernyms
:type other: Synset
:param other: another synset object
:return: a set of synset nodes, that denotes the shared hypernyms between this synset and the given one.
"""
return set(self.all_hypernyms()).intersection(set(other.all_hypernyms()))
[docs]
def min_depth(self) -> int:
"""
:return: The length of the shortest hypernym path from this synset to the root.
"""
hypernyms = self._relations[ConRel.has_hypernym]
if not hypernyms:
min_depth = 0
else:
min_depth = 1 + min(h.min_depth() for h in hypernyms)
return min_depth
[docs]
def shortest_path_distance(self, other) -> int:
"""
Returns the distance of the shortest path linking the two synsets (if
one exists). If a node is compared with itself 0 is returned. The distance is denoted by the number of edges
that exist in the shortest path.
:type other: Synset
:param other: The Synset to which the shortest path will be found.
:return: The number of edges in the shortest path connecting the two nodes, or None if no path exists.
"""
if self == other:
return 0
paths = self.shortest_path(other)
return None if paths == [] else len(paths[0]) - 1
[docs]
def shortest_path(self, other) -> list:
"""
Returns the shortest possible sequence of synset nodes that are traversed from this synset to a given other
synset. If there are several shortest sequences, all of then are returned.
:type other: Synset
:param other: A synset the path should be computed to
:return: A list of lists, each list containing the sequence of nodes traversed from this synset to the given other synset.
"""
shortest_paths = []
lcs = self.lowest_common_subsumer(other)
for subsumer in lcs:
start_to_lcs_paths = self.shortest_path_to_hypernym(subsumer)
end_to_lcs_paths = other.shortest_path_to_hypernym(subsumer)
for start_path in start_to_lcs_paths:
for end_path in end_to_lcs_paths:
current_path = start_path.copy()
for synset in end_path[::-1]:
if synset != subsumer:
current_path.append(synset)
if current_path not in shortest_paths:
shortest_paths.append(current_path)
return shortest_paths
[docs]
def shortest_path_to_hypernym(self, hypernym) -> list:
"""
The shortest path between this synset and the given hypernym. Asserts that the given other synset is a real
hypernym of the current synset.
:type hypernym: Synset
:param hypernym: a synset, denoting the hypernym the shortest path should be computed to
:return: a list of lists, each list storing the shortest sequence of synset nodes traversed from self to the given hypernym
"""
if self == hypernym:
return [[self]]
assert hypernym in self.all_hypernyms(), "given hypernym is not a hypernym of this synset"
shortest_path = []
shortest = math.inf
for path in self.hypernym_paths():
if hypernym in path:
index = path.index(hypernym)
current_path = path[index:]
path_len = len(current_path)
if path_len <= shortest:
shortest = path_len
current_path.reverse()
shortest_path.append(current_path)
shortest_dist = min([len(p) for p in shortest_path])
shortest_path = [p for p in shortest_path if len(p) == shortest_dist]
return shortest_path
[docs]
def lowest_common_subsumer(self, other) -> set:
"""
Extract the lowes common subsumer(s) / lowest common ancestor(s) of the current synset and a given one.
:type other: Synset
:param other: Another synset object the LCS should be computed to.
:return: a set, containing one or several synset objects, being the LCS between the current synset and the given one.
"""
lcs = set()
if other == self:
lcs.add(self)
return lcs
if other in self._direct_hypernyms or other.is_root():
lcs.add(other)
return lcs
if self in other._direct_hypernyms or self.is_root():
lcs.add(self)
return lcs
common_hypernyms = self.common_hypernyms(other)
dist_dict1 = self.get_distances_hypernym_dic()
dist_dict2 = other.get_distances_hypernym_dic()
dist = math.inf
for hypernym in common_hypernyms:
dist1 = dist_dict1[hypernym]
dist2 = dist_dict2[hypernym]
if dist1 + dist2 < dist:
lcs.clear()
lcs.add(hypernym)
dist = dist1 + dist2
if dist1 + dist2 == dist:
lcs.add(hypernym)
return lcs
[docs]
def get_distances_hypernym_dic(self) -> dict:
"""
For each hypernym, store the shortest distance between the current synset and its hypernym.
:return: A dictionary containing all hypernyms of this synset as keys and the corresponding distances as values.
"""
hypernym_paths = self.hypernym_paths()
distances_dic = {}
for p in hypernym_paths:
for i in range(len(p)):
hypernym = p[i]
dist = len(p) - 1 - i
if hypernym in distances_dic.keys():
current_dist = distances_dic[hypernym]
if dist < current_dist:
distances_dic[hypernym] = dist
else:
distances_dic[hypernym] = dist
return distances_dic
@property
def id(self):
return self._id
@property
def word_category(self):
return self._word_category
@property
def word_class(self):
return self._word_class
@property
def paraphrase(self):
return self._paraphrase
@property
def lexunits(self):
return self._lexunits
@property
def relations(self):
return self._relations
@property
def incoming_relations(self):
return self._incoming_relations
@property
def direct_hypernyms(self):
return self._direct_hypernyms
@property
def direct_hyponyms(self):
return self._direct_hyponyms