/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 2938

     import java.util.ArrayList;
     import java.util.Arrays;
     import java.util.HashMap;
     import java.util.HashSet;
     import java.util.List;
     import java.util.Map.Entry;
-...
     public class XMLTXMWordPropertiesInjection extends XMLProcessor {
     	HashMap<String, HashMap<String, String>> rules;
     	HashSet<String> nonActivatedRules = new HashSet<String>();
     	XPathHookActivator activator;
-...
     	public void addProperty(String id, HashMap<String, String> properties) throws IOException, XMLStreamException {
     		if (rules != null) {
     			rules.put(id, properties);
     			nonActivatedRules.add(id);
+    		}
+    	}
     	public HashSet<String> getNonActivatedRules() {
     		return nonActivatedRules;
+    	}
     	/**
     	 * @param rules the keys are the word identifiers (id) to process. the values are hashmap of ana@type+ana@value to injected. Warning ana@type must be prefixed with "#"
+    	 *
-...
     	public void setProperties(HashMap<String, HashMap<String, String>> rules) throws IOException, XMLStreamException {
     		this.rules = rules;
     		nonActivatedRules.addAll(rules.keySet());
     		activator = new XPathHookActivator<>(hook, "//w");
-...
     				id = parser.getAttributeValue(null, "id");
     				if (id != null && rules.containsKey(id)) {
     					nonActivatedRules.remove(id); // the rule has been activated once
     					anaValues.clear(); // empty ana values
     					formValues.clear(); // empty form values
     					nInsertions++;

     	@Option(name = "connluResultDirectory", usage = "connluResultDirectory", widget = "Folder", required = true, def = "connlu-result-directory")
     	File connluResultDirectory;
     	@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud")
     	@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-")
     	String propertiesPrefix;
     	@Option(name = "separator", usage = "Options", widget = "Separator", required = true, def = "options")
-...
     	@Option(name = "insertParagraphs", usage = "Insert paragraph marks in the Connlu corpus", widget = "Boolean", required = true, def = "true")
     	Boolean insertParagraphs = false;
     	// @Option(name = "detectGap", usage = "Insert gap comment using the CQP 'gap' property", widget = "Boolean", required = true, def = "true")
     	@Option(name = "detectGap", usage = "Insert gap comment using the CQP 'gap' property", widget = "Boolean", required = true, def = "true")
     	Boolean detectGap = false;
     	@Option(name = "formCorrPropertyName", usage = "optional CQP property to fix the missing 'form' ud property", widget = "String", required = false, def = "")
-...
     	@Option(name = "xposCorrPropertyName", usage = "optional CQP property to fix the missing 'xpos' ud property", widget = "String", required = false, def = "")
     	String xposCorrPropertyName;
     	@Option(name = "punctStrong", usage = "optional strong punct tag to fix sentence limits", widget = "String", required = true, def = "PONfrt")
     	String punctStrong;
     	@Option(name = "separator2", usage = "Options", widget = "Separator", required = true, def = "sentence fix options")
     	Boolean separator2 = false;
     	// @Option(name = "punctStrong", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "[?!\\.]")
     	// String punctStrong;
     	@Option(name = "openingPunct", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "[\\-–«‘“\\(]")
     	String openingPunct;
-...
     		try {
     			return exportAnnotationsAsCorpus(mainCorpus, connluResultDirectory, propertiesPrefix, openingPunct,
     					formCorrPropertyName, lemmaCorrPropertyName, uposCorrPropertyName, xposCorrPropertyName,
     					formCorrPropertyName, lemmaCorrPropertyName, uposCorrPropertyName, xposCorrPropertyName, punctStrong,
     					detectGap, insertParagraphs, insertNoSpaceAfter);
+    		}
     		catch (Exception e) {
-...
     	 * @throws InvalidCqpIdException
     	 */
     	public static int exportAnnotationsAsCorpus(MainCorpus mainCorpus, File conlluResultDirectory, String prefix, String openingPunct, String formCorrPropertyName, String lemmaCorrPropertyName,
     			String uposCorrPropertyName, String xposCorrPropertyName, boolean detectGap, boolean insertParagraphs, boolean insertNoSpaceAfter) throws UnexpectedAnswerException,
     			String uposCorrPropertyName, String xposCorrPropertyName, String punctStrongRegex, boolean detectGap, boolean insertParagraphs, boolean insertNoSpaceAfter) throws UnexpectedAnswerException,
     			IOException,
     			CqiServerError,
     			CqiClientException, InvalidCqpIdException {
-...
     			// fixing sentences
     			for (int s = 0; s < sentences.size(); s++) {
     				// fix only ud sentences limits
     				ArrayList<Integer> sentence = sentences.get(s);
     				if (sentidStartPositions.get(sentence.get(0)) == null) {
     					continue; // this is not a UD sentence
+    				}
     				int max = -1;
     				int imax = 0;
     				for (int ip = 0; ip < sentence.size(); ip++) {
-...
+    				}
+    			}
     			// fixing sentences
     			for (int s = 0; s < sentences.size(); s++) {
     				ArrayList<Integer> sentence = sentences.get(s);
     				if (s > 0 && formValues[sentence.get(0)].matches(openingPunct)) {
     					System.out.println("FIXING: first position " + formValues[sentence.get(0)] + "in " + s);
     					int p = sentence.remove(0);
     					sentences.get(s - 1).add(p);
+    				}
     				if (s + 1 < sentences.size() && formValues[sentence.get(sentence.size() - 1)].matches(openingPunct)) {
     					System.out.println("FIXING: last position " + formValues[sentence.get(sentence.size() - 1)] + "in " + s);
     					int p = sentence.remove(sentence.size() - 1);
     					sentences.get(s + 1).add(0, p);
+    				}
     				// int c = 0;
     				// ArrayList<Integer> sentence = sentences.get(s);
     				// for (int ip = 0 ; ip < sentence.size() ; ip++) {
     				//
     				// int p = sentence.get(ip);
     				//
     				// if (idValues[p].equals("__UNDEF__")) {
     				// c++;
     				// }
     				// }
     				// if (c == 0) { // al is fine
     				//
     				// } else if (c )
+    			}
     			if (tmpSentence.size() > 0) { // add last sentence
     				sentences.add(new ArrayList<>(tmpSentence));
+    			}
-...
+    							}
+    						}
+    					}
+    				}
     				// fixing sentence punct limits
     //					while (sentence.size() > 0 && iSentence > 0 && xpos[0].matches(punctStrongRegex)) {
     //						System.out.println("FIXING: first punctStrong position " + xposValues[sentence.get(0)] + " in " + iSentence);
     //						int p2 = sentence.remove(0);
     //						sentences.get(iSentence - 1).add(p2);
     //					}
     					while (sentence.size() > 0 && iSentence > 0 && formValues[sentence.get(0)].matches("\\p{P}") && !formValues[sentence.get(0)].matches(openingPunct)) {
     						System.out.println("FIXING: first non-openingPunct position " + formValues[sentence.get(0)] + " in " + iSentence);
     						int p2 = sentence.remove(0);
     						sentences.get(iSentence - 1).add(p2);
+    					}
     //
     					while (sentence.size() > 0 && iSentence + 1 < sentences.size() && formValues[sentence.get(sentence.size() - 1)].matches(openingPunct)) {
     						System.out.println("FIXING: last openingPunct position " + formValues[sentence.get(sentence.size() - 1)] + " in " + iSentence);
     						int p2 = sentence.remove(sentence.size() - 1);
     						sentences.get(iSentence + 1).add(0, p2);
+    					}
     				if (sentence.size() == 0) { // sentence was depleted after fixing it
     					sentences.remove(iSentence);
     					iSentence--;
     					continue;
+    				}
     				// fixing head and set missing head to 0 and root
     				for (int ip = 0; ip < sentence.size(); ip++) {
     					int p = sentence.get(ip);
-...
     					iParagraph++;
+    				}
     				for (int p : sentence) {
     					if (gap != null && gap[p] != null) writer.println("# gap");
     				for (int ip = 0 ; ip < sentence.size() ; ip++) {
     					int p = sentence.get(ip);
     					// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
     					writer.println(idValues[p] + "\t" + formValues[p] + "\t" + lemmaValues[p] + "\t" + uposValues[p]
     							+ "\t" + xposValues[p] + "\t" + featsValues[p] + "\t" + headValues[p] + "\t" + deprelValues[p]
     							+ "\t" + depsValues[p] + "\t" + miscValues[p]);
     					if (gap != null && gap[ip].equals("next")) writer.println("# gap");
+    				}
     				writer.println("");
     				numberOfSentencesWritten++;

     import javax.xml.stream.XMLStreamException;
     import org.apache.commons.lang.StringUtils;
     import org.eclipse.core.commands.AbstractHandler;
     import org.eclipse.core.commands.ExecutionEvent;
     import org.eclipse.core.commands.ExecutionException;
-...
     	@Option(name = "connluDirectory", usage = "connluDirectory", widget = "Folder", required = true, def = "connlu-directory")
     	File connluDirectory;
     	@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud")
     	@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-")
     	String propertiesPrefix;
     	/*
-...
+    				}
     				if (sent_id != null) {
     					properties.put("#ud-sentid", sent_id);
     					sent_id = null;
     					sent_id = ""; // reset value for next sentence
     				} else {
     					properties.put("#ud-sentid", "");
+    				}
     				if (newdoc_id != null) {
     					properties.put("#ud-newdocid", newdoc_id);
     					newdoc_id = null;
     					newdoc_id = null; // reset value for next sentence
     				} else {
     					properties.put("#ud-newdocid", "");
+    				}
     				if (newpar_id != null) {
     					properties.put("#ud-newparid", newpar_id);
     					newpar_id = null;
     					newpar_id = null; // reset value for next sentence
     				} else {
     					properties.put("#ud-newparid", "");
+    				}
     				processor.addProperty(id, properties);
     				nWords2++;
-...
     			if (processor.process(xmltxmUpdatedFile)) {
     				if (xmltxmFile.delete() && FileCopy.copy(xmltxmUpdatedFile, xmltxmFile)) {
     					if (processor.getNonActivatedRules().size() > 0) {
     						Log.warning("Warning: some words were not imported: "+StringUtils.join(processor.getNonActivatedRules(), ", "));
+    					}
+    				}
     				else {
     					Log.warning("** Warning: annotation import failed for replace the corpus XML-TXM file: " + xmltxmFile + ". TEMP file: " + xmltxmUpdatedFile);

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 2938