001 package org.maltparser.core.syntaxgraph.ds2ps;
002
003
004 import java.util.SortedMap;
005
006 import org.maltparser.core.exception.MaltChainedException;
007 import org.maltparser.core.helper.SystemLogger;
008 import org.maltparser.core.io.dataformat.ColumnDescription;
009 import org.maltparser.core.io.dataformat.DataFormatInstance;
010 import org.maltparser.core.symbol.SymbolTable;
011 import org.maltparser.core.symbol.SymbolTableHandler;
012 import org.maltparser.core.syntaxgraph.MappablePhraseStructureGraph;
013 import org.maltparser.core.syntaxgraph.edge.Edge;
014 import org.maltparser.core.syntaxgraph.headrules.HeadRules;
015 import org.maltparser.core.syntaxgraph.node.DependencyNode;
016 import org.maltparser.core.syntaxgraph.node.NonTerminalNode;
017 import org.maltparser.core.syntaxgraph.node.PhraseStructureNode;
018 /**
019 *
020 *
021 * @author Johan Hall
022 */
023 public class LosslessMapping implements Dependency2PhraseStructure {
024 private String DEPREL = "DEPREL";
025 private String PHRASE = "PHRASE";
026 private String HEADREL = "HEADREL";
027 private String ATTACH = "ATTACH";
028 private String CAT = "CAT";
029 private String EDGELABEL;
030 private final char EMPTY_SPINE = '*';
031 private final String EMPTY_LABEL = "??";
032 private final char SPINE_ELEMENT_SEPARATOR = '|';
033 private final char LABEL_ELEMENT_SEPARATOR = '~';
034 private final char QUESTIONMARK = '?';
035 private String optionString;
036 private HeadRules headRules;
037 private DataFormatInstance dependencyDataFormatInstance;
038 private DataFormatInstance phraseStructuretDataFormatInstance;
039 private boolean lockUpdate = false;
040 private int nonTerminalCounter;
041 private StringBuilder deprel;
042 private StringBuilder headrel;
043 private StringBuilder phrase;
044
045 public LosslessMapping(DataFormatInstance dependencyDataFormatInstance, DataFormatInstance phraseStructuretDataFormatInstance) {
046 setDependencyDataFormatInstance(dependencyDataFormatInstance);
047 setPhraseStructuretDataFormatInstance(phraseStructuretDataFormatInstance);
048 deprel = new StringBuilder();
049 headrel = new StringBuilder();
050 phrase = new StringBuilder();
051
052 if (phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptionSet().size() == 1) {
053 for (ColumnDescription column : phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptionSet()) {
054 EDGELABEL = column.getName();
055 }
056 }
057
058 clear();
059 }
060
061 public void clear() {
062 nonTerminalCounter = 0;
063 }
064
065 public String getOptionString() {
066 return optionString;
067 }
068
069 public void setOptionString(String optionString) {
070 this.optionString = optionString;
071 }
072
073 public DataFormatInstance getDependencyDataFormatInstance() {
074 return dependencyDataFormatInstance;
075 }
076
077 public void setDependencyDataFormatInstance(
078 DataFormatInstance dependencyDataFormatInstance) {
079 this.dependencyDataFormatInstance = dependencyDataFormatInstance;
080 }
081
082 public DataFormatInstance getPhraseStructuretDataFormatInstance() {
083 return phraseStructuretDataFormatInstance;
084 }
085
086 public void setPhraseStructuretDataFormatInstance(
087 DataFormatInstance phraseStructuretDataFormatInstance) {
088 this.phraseStructuretDataFormatInstance = phraseStructuretDataFormatInstance;
089 }
090
091 public void update(MappablePhraseStructureGraph graph, Edge e, Object arg) throws MaltChainedException {
092 if (lockUpdate == false) {
093 // if (e.getType() == Edge.PHRASE_STRUCTURE_EDGE && e.getSource() instanceof NonTerminalNode && lockUpdate == false) {
094 // if(e.getTarget() instanceof TerminalNode) {
095 // PhraseStructureNode top = (PhraseStructureNode)e.getTarget();
096 // while (top.getParent() != null && ((NonTerminalNode)top.getParent()).getLexicalHead() == (PhraseStructureNode)e.getTarget()) {
097 // top = top.getParent();
098 // }
099 // updateDependenyGraph(graph, top);
100 // }
101 // else if (e.getSource().isRoot()) {
102 // updateDependenyGraph(graph, graph.getPhraseStructureRoot());
103 // }
104 // }
105 if (e.getType() == Edge.DEPENDENCY_EDGE && e.getSource() instanceof DependencyNode && e.getTarget() instanceof DependencyNode) {
106 if (e.isLabeled() && e.getLabelSet().size() == 4) {
107 updatePhraseStructureGraph(graph, (Edge)e, false);
108 }
109 }
110 }
111 }
112
113 public void updateDependenyGraph(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException {
114 if (graph.nTokenNode() == 1 && graph.nNonTerminals() == 0) {
115 // Special case when the root dominates direct a single terminal node
116 Edge e = graph.addDependencyEdge(graph.getDependencyRoot(), graph.getDependencyNode(1));
117 e.addLabel(graph.getSymbolTables().getSymbolTable(DEPREL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)));
118 e.addLabel(graph.getSymbolTables().getSymbolTable(HEADREL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(HEADREL)));
119 e.addLabel(graph.getSymbolTables().getSymbolTable(PHRASE), "*");
120 // e.addLabel(graph.getSymbolTables().getSymbolTable(PHRASE), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE)));
121 e.addLabel(graph.getSymbolTables().getSymbolTable(ATTACH), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(ATTACH)));
122 } else {
123 updateDependencyEdges(graph, top);
124 updateDependenyLabels(graph);
125 }
126 }
127
128
129
130 private void updateDependencyEdges(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException {
131 if (top == null) {
132 return;
133 }
134 DependencyNode head = null;
135 DependencyNode dependent = null;
136 if (top instanceof NonTerminalNode) {
137 for (PhraseStructureNode node : ((NonTerminalNode)top).getChildren()) {
138 if (node instanceof NonTerminalNode) {
139 updateDependencyEdges(graph,node);
140 } else {
141 head = ((NonTerminalNode)top).getLexicalHead(headRules);
142 dependent = (DependencyNode)node;
143 if (head != null && dependent != null && head != dependent) {
144 lockUpdate = true;
145 if (!dependent.hasHead()) {
146 graph.addDependencyEdge(head, dependent);
147 }
148 else if (head != dependent.getHead()) {
149 graph.moveDependencyEdge(head, dependent);
150 }
151 lockUpdate = false;
152 }
153 }
154 }
155 }
156
157 head = null;
158 if (top.getParent() != null) {
159 head = ((NonTerminalNode)top.getParent()).getLexicalHead(headRules);
160 } else if (top.isRoot()) {
161 head = (DependencyNode)top;
162 }
163
164 if (top instanceof NonTerminalNode) {
165 dependent = ((NonTerminalNode)top).getLexicalHead(headRules);
166 } else if (!top.isRoot()) {
167 dependent = (DependencyNode)top;
168 }
169 if (head != null && dependent != null && head != dependent) {
170 lockUpdate = true;
171 if (!dependent.hasHead()) {
172 graph.addDependencyEdge(head, dependent);
173 }
174 else if (head != dependent.getHead()) {
175 graph.moveDependencyEdge(head, dependent);
176 }
177 lockUpdate = false;
178 }
179 }
180
181 private void updateDependenyLabels(MappablePhraseStructureGraph graph) throws MaltChainedException {
182 for (int index :graph.getTokenIndices()) {
183 PhraseStructureNode top = (PhraseStructureNode)graph.getTokenNode(index);
184
185 while (top != null && top.getParent() != null &&graph.getTokenNode(index) == ((NonTerminalNode)top.getParent()).getLexicalHead(headRules)) {
186 top = top.getParent();
187 }
188 lockUpdate = true;
189 labelDependencyEdge(graph, graph.getTokenNode(index).getHeadEdge(), top);
190 lockUpdate = false;
191 }
192 }
193
194
195 // private void updateDependenyLabels(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException {
196 // if (top == null) {
197 // return;
198 // }
199 // DependencyNode head = null;
200 // DependencyNode dependent = null;
201 // if (top instanceof NonTerminalNode) {
202 // for (PhraseStructureNode node : ((NonTerminalNode)top).getChildren()) {
203 // if (node instanceof NonTerminalNode) {
204 // updateDependenyLabels(graph, node);
205 // } else {
206 // head = ((NonTerminalNode)top).getLexicalHead(headRules);
207 // dependent = (DependencyNode)node;
208 // if (head != null && dependent != null && head != dependent) {
209 // lockUpdate = true;
210 // if (dependent.hasHead()) {
211 // Edge e = dependent.getHeadEdge();
212 // labelDependencyEdge(graph, e, node);
213 // }
214 // lockUpdate = false;
215 // }
216 // }
217 // }
218 // }
219 //
220 // dependent = null;
221 // if (top instanceof NonTerminalNode) {
222 // dependent = ((NonTerminalNode)top).getLexicalHead(headRules);
223 // }
224 //
225 // if (dependent != null) {
226 // lockUpdate = true;
227 // if (dependent.hasHead()) {
228 // Edge e = dependent.getHeadEdge();
229 // labelDependencyEdge(graph, e, top);
230 // }
231 // lockUpdate = false;
232 // }
233 // }
234
235 private void labelDependencyEdge(MappablePhraseStructureGraph graph, Edge e, PhraseStructureNode top) throws MaltChainedException {
236 if (e == null) {
237 return;
238 }
239 SymbolTableHandler symbolTables = graph.getSymbolTables();
240 deprel.setLength(0);
241 phrase.setLength(0);
242 headrel.setLength(0);
243
244 e.removeLabel(symbolTables.getSymbolTable(DEPREL));
245 e.removeLabel(symbolTables.getSymbolTable(HEADREL));
246 e.removeLabel(symbolTables.getSymbolTable(PHRASE));
247 e.removeLabel(symbolTables.getSymbolTable(ATTACH));
248
249 int i = 0;
250 SortedMap<String, SymbolTable> edgeLabelSymbolTables = phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelSymbolTables();
251 SortedMap<String, SymbolTable> nodeLabelSymbolTables = phraseStructuretDataFormatInstance.getPhraseStructureNodeLabelSymbolTables();
252 if (!top.isRoot()) {
253 for (String name : edgeLabelSymbolTables.keySet()) {
254 if (top.hasParentEdgeLabel(symbolTables.getSymbolTable(name))) {
255 deprel.append(top.getParentEdgeLabelSymbol(symbolTables.getSymbolTable(name)));
256 } else {
257 deprel.append(EMPTY_LABEL);
258 }
259 i++;
260 if (i < edgeLabelSymbolTables.size()) {
261 deprel.append(LABEL_ELEMENT_SEPARATOR);
262 }
263 }
264 if (deprel.length() != 0) {
265 e.addLabel(symbolTables.getSymbolTable(DEPREL), deprel.toString());
266 }
267 } else {
268 String deprelDefaultRootLabel = graph.getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(DEPREL));
269 if (deprelDefaultRootLabel != null) {
270 e.addLabel(symbolTables.getSymbolTable(DEPREL), deprelDefaultRootLabel);
271 } else {
272 e.addLabel(symbolTables.getSymbolTable(DEPREL), EMPTY_LABEL);
273 }
274 }
275 PhraseStructureNode tmp = (PhraseStructureNode)e.getTarget();
276 while (tmp != top && tmp.getParent() != null) { // && !tmp.getParent().isRoot()) {
277 i=0;
278 for (String name : edgeLabelSymbolTables.keySet()) {
279 if (tmp.hasParentEdgeLabel(symbolTables.getSymbolTable(name))) {
280 headrel.append(tmp.getParentEdgeLabelSymbol(symbolTables.getSymbolTable(name)));
281 } else {
282 headrel.append(EMPTY_LABEL);
283 }
284 i++;
285 if (i < edgeLabelSymbolTables.size()) {
286 headrel.append(LABEL_ELEMENT_SEPARATOR);
287 }
288 }
289 i=0;
290 headrel.append(SPINE_ELEMENT_SEPARATOR);
291 for (String name : nodeLabelSymbolTables.keySet()) {
292 if (tmp.getParent().hasLabel(symbolTables.getSymbolTable(name))) {
293 phrase.append(tmp.getParent().getLabelSymbol(symbolTables.getSymbolTable(name)));
294 } else {
295 if (tmp.getParent().isRoot()) {
296 String deprelDefaultRootLabel = graph.getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(PHRASE));
297 if (deprelDefaultRootLabel != null) {
298 phrase.append(deprelDefaultRootLabel);
299 } else {
300 phrase.append(EMPTY_LABEL);
301 }
302 } else {
303 phrase.append(EMPTY_LABEL);
304 }
305 }
306 i++;
307 if (i < nodeLabelSymbolTables.size()) {
308 phrase.append(LABEL_ELEMENT_SEPARATOR);
309 }
310 }
311 phrase.append(SPINE_ELEMENT_SEPARATOR);
312 tmp = tmp.getParent();
313 }
314 if (phrase.length() == 0) {
315 headrel.append(EMPTY_SPINE);
316 phrase.append(EMPTY_SPINE);
317 } else {
318 headrel.setLength(headrel.length()-1);
319 phrase.setLength(phrase.length()-1);
320 }
321 e.addLabel(symbolTables.getSymbolTable(HEADREL), headrel.toString());
322 e.addLabel(symbolTables.getSymbolTable(PHRASE), phrase.toString());
323 int a = 0;
324 tmp = (PhraseStructureNode)e.getSource();
325 while (top.getParent() != null && tmp.getParent() != null && tmp.getParent() != top.getParent()) {
326 a++;
327 tmp = tmp.getParent();
328 }
329 e.addLabel(symbolTables.getSymbolTable(ATTACH), Integer.toString(a));
330 }
331
332 public void connectUnattachedSpines(MappablePhraseStructureGraph graph) throws MaltChainedException {
333 connectUnattachedSpines(graph, graph.getDependencyRoot());
334
335 if (!graph.getPhraseStructureRoot().isLabeled()) {
336 graph.getPhraseStructureRoot().addLabel(graph.getSymbolTables().addSymbolTable(CAT), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE)));
337
338 }
339 }
340
341 private void connectUnattachedSpines(MappablePhraseStructureGraph graph, DependencyNode depNode) throws MaltChainedException {
342 if (!depNode.isRoot()) {
343 PhraseStructureNode dependentSpine = (PhraseStructureNode)depNode;
344 while (dependentSpine.getParent() != null) {
345 dependentSpine = dependentSpine.getParent();
346 }
347 if (!dependentSpine.isRoot()) {
348 updatePhraseStructureGraph(graph,depNode.getHeadEdge(),true);
349 }
350 }
351 for (int i = 0; i < depNode.getLeftDependentCount(); i++) {
352 connectUnattachedSpines(graph, depNode.getLeftDependent(i));
353 }
354 for (int i = depNode.getRightDependentCount()-1; i >= 0 ; i--) {
355 connectUnattachedSpines(graph, depNode.getRightDependent(i));
356 }
357 }
358
359 public void updatePhraseStructureGraph(MappablePhraseStructureGraph graph, Edge depEdge, boolean attachHeadSpineToRoot) throws MaltChainedException {
360 PhraseStructureNode dependentSpine = (PhraseStructureNode)depEdge.getTarget();
361
362 if (((PhraseStructureNode)depEdge.getTarget()).getParent() == null) {
363 // Restore dependent spine
364 String phraseSpineLabel = null;
365 String edgeSpineLabel = null;
366 int empty_label = 0;
367
368 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(PHRASE))) {
369 phraseSpineLabel = depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE));
370 }
371 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(HEADREL))) {
372 edgeSpineLabel = depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(HEADREL));
373 }
374 if (phraseSpineLabel != null && phraseSpineLabel.length() > 0 && phraseSpineLabel.charAt(0) != EMPTY_SPINE) {
375 int ps = 0, es = 0, i = 0, j = 0, n = phraseSpineLabel.length()-1, m = edgeSpineLabel.length()-1;
376 PhraseStructureNode child = (PhraseStructureNode)depEdge.getTarget();
377 while (true) {
378 while (i <= n && phraseSpineLabel.charAt(i) != SPINE_ELEMENT_SEPARATOR) {
379 if (phraseSpineLabel.charAt(i) == QUESTIONMARK) {
380 empty_label++;
381 } else {
382 empty_label = 0;
383 }
384 i++;
385 }
386 if (depEdge.getSource().isRoot() && i >= n) {
387 dependentSpine = graph.getPhraseStructureRoot();
388 } else {
389 dependentSpine = graph.addNonTerminalNode(++nonTerminalCounter);
390 }
391
392 if (empty_label != 2 && ps != i) {
393 dependentSpine.addLabel(graph.getSymbolTables().addSymbolTable(CAT), phraseSpineLabel.substring(ps,i));
394 }
395
396 empty_label = 0;
397 if (edgeSpineLabel != null) {
398 while (j <= m && edgeSpineLabel.charAt(j) != SPINE_ELEMENT_SEPARATOR) {
399 if (edgeSpineLabel.charAt(j) == QUESTIONMARK) {
400 empty_label++;
401 } else {
402 empty_label = 0;
403 }
404 j++;
405 }
406 }
407 lockUpdate = true;
408 Edge e = graph.addPhraseStructureEdge(dependentSpine, child);
409 if (empty_label != 2 && es != j && edgeSpineLabel != null && e != null) {
410 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), edgeSpineLabel.substring(es,j));
411 } else if (es == j) {
412 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), EMPTY_LABEL);
413 }
414
415 lockUpdate = false;
416 child = dependentSpine;
417 if (i >= n) { break; }
418 empty_label = 0;
419 ps = i = i + 1;
420 es = j = j + 1;
421 }
422 }
423
424 // Recursively attach the dependent spines to target node.
425 DependencyNode target = (DependencyNode)depEdge.getTarget();
426 for (int i = 0; i < target.getLeftDependentCount(); i++) {
427 updatePhraseStructureGraph(graph, target.getLeftDependent(i).getHeadEdge(), attachHeadSpineToRoot);
428 }
429 for (int i = target.getRightDependentCount()-1; i >= 0 ; i--) {
430 updatePhraseStructureGraph(graph, target.getRightDependent(i).getHeadEdge(), attachHeadSpineToRoot);
431 }
432 } else {
433 // If dependent spine already exist, then set dependentSpine to the highest nonterminal
434 // of the dependent spine.
435 while (dependentSpine.getParent() != null && !dependentSpine.getParent().isRoot()) {
436 dependentSpine = dependentSpine.getParent();
437 }
438 }
439
440
441 PhraseStructureNode headSpine = null;
442 if (((PhraseStructureNode)depEdge.getSource()).getParent() != null) {
443 // If head spine exist, then attach dependent spine to the head spine at the attachment level a.
444 int a = 0;
445 headSpine = ((PhraseStructureNode)depEdge.getSource()).getParent();
446 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(ATTACH))) {
447 try {
448 a = Integer.parseInt((depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(ATTACH))));
449 } catch (NumberFormatException e) {
450 throw new MaltChainedException(e.getMessage());
451 }
452 }
453 for (int i = 0; i < a && headSpine != null; i++) {
454 headSpine = headSpine.getParent();
455 }
456
457 if ((headSpine == null || headSpine == dependentSpine) && attachHeadSpineToRoot) {
458 headSpine = graph.getPhraseStructureRoot();
459 }
460 if (headSpine != null) {
461 lockUpdate = true;
462 Edge e = graph.addPhraseStructureEdge(headSpine, dependentSpine);
463 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(DEPREL)) && !depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)).equals(EMPTY_LABEL) & e != null) {
464 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)));
465 }
466 lockUpdate = false;
467 }
468 }
469 else if (depEdge.getSource().isRoot() && !depEdge.isLabeled()) {
470 headSpine = graph.getPhraseStructureRoot();
471 lockUpdate = true;
472 Edge e = graph.addPhraseStructureEdge(headSpine, dependentSpine);
473 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(DEPREL)) && !depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)).equals(EMPTY_LABEL) & e != null) {
474 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)));
475 } else {
476 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)));
477 }
478 lockUpdate = false;
479 // Recursively attach the dependent spines to target node.
480 DependencyNode target = (DependencyNode)depEdge.getTarget();
481 for (int i = 0; i < target.getLeftDependentCount(); i++) {
482 updatePhraseStructureGraph(graph, target.getLeftDependent(i).getHeadEdge(), attachHeadSpineToRoot);
483 }
484 for (int i = target.getRightDependentCount()-1; i >= 0 ; i--) {
485 updatePhraseStructureGraph(graph, target.getRightDependent(i).getHeadEdge(), attachHeadSpineToRoot);
486 }
487 }
488 }
489
490 public HeadRules getHeadRules() {
491 return headRules;
492 }
493
494 public void setHeadRules(HeadRules headRules) {
495 this.headRules = headRules;
496 }
497
498 public void setHeadRules(String headRulesURL) throws MaltChainedException {
499 if (headRulesURL != null && headRulesURL.length() > 0 && !headRulesURL.equals("*")) {
500 headRules = new HeadRules(SystemLogger.logger(), phraseStructuretDataFormatInstance);
501 headRules.parseHeadRules(headRulesURL);
502 }
503 }
504 }