Révision 3752

TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/function/ImportCoNLLUAnnotations.java (revision 3752)
24 24
	
25 25
	public static final String[] UD_PROPERTY_NAMES = { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
26 26
	
27
	public static int _importAnnotations(File coonluFile, MainCorpus mainCorpus, String propertiesPrefix, String textId, Boolean normalize_word_ids, Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException, XMLStreamException {
27
	public static int _importAnnotations(File coonluFile, MainCorpus mainCorpus, 
28
			String propertiesPrefix, String textId, 
29
			Boolean normalize_word_ids, 
30
			Boolean importCommentProperties,
31
			Set<String> headPropertiesToProject, 
32
			Set<String> depsPropertiesToProject, 
33
			Set<String> udPropertiesToImport) throws IOException, XMLStreamException {
28 34
		
29 35
		if (textId == null || textId.length() == 0) { // no text id provided, using the conllu file name
30 36
			textId = coonluFile.getName().substring(0, coonluFile.getName().length() - 7);
......
118 124
			}
119 125
			
120 126
			if (sent_id != null) {
121
				properties.put("#"+propertiesPrefix+"sentid", sent_id);
127
				if (importCommentProperties) properties.put("#"+propertiesPrefix+"sentid", sent_id);
122 128
				sent_id = ""; // reset value for next sentence
123 129
			}
124 130
			else {
125
				properties.put("#"+propertiesPrefix+"sentid", "");
131
				if (importCommentProperties) properties.put("#"+propertiesPrefix+"sentid", "");
126 132
			}
127 133
			
128 134
			if (newdoc_id != null) {
129
				properties.put("#"+propertiesPrefix+"newdocid", newdoc_id);
135
				if (importCommentProperties) properties.put("#"+propertiesPrefix+"newdocid", newdoc_id);
130 136
				newdoc_id = null; // reset value for next sentence
131 137
			}
132 138
			else {
133
				properties.put("#"+propertiesPrefix+"newdocid", "");
139
				if (importCommentProperties) properties.put("#"+propertiesPrefix+"newdocid", "");
134 140
			}
135 141
			
136 142
			if (newpar_id != null) {
137
				properties.put("#"+propertiesPrefix+"newparid", newpar_id);
143
				if (importCommentProperties) properties.put("#"+propertiesPrefix+"newparid", newpar_id);
138 144
				newpar_id = null; // reset value for next sentence
139 145
			}
140 146
			else {
141
				properties.put("#"+propertiesPrefix+"newparid", "");
147
				if (importCommentProperties) properties.put("#"+propertiesPrefix+"newparid", "");
142 148
			}
143 149
			
144 150
			if (wId == null) {
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromFile.java (revision 3752)
73 73
	@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false")
74 74
	Boolean normalize_word_ids;
75 75
	
76
	@Option(name = "import_conll_comment_properties", usage = "if set sentid, newdocid and parid will be imported in CQP properties", widget = "Boolean", required = true, def = "false")
77
	Boolean import_conll_comment_properties;
78
	
76 79
	@Option(name = "headPropertiesToProject", usage = "to create the headXYZ properties", widget = "StringArrayMultiple", metaVar="form	lemma	upos	xpos	feats	head	deprel	deps	misc", required = true, def = "deprel,upos")
77 80
	String headPropertiesToProject;
78 81
	
......
117 120
				return null;
118 121
			}
119 122
			
120
			return ImportCoNLLUAnnotationsFromDirectory.importAnnotationsFromCoNLLUFile(mainCorpus, conlluFile, propertiesPrefix, textId, normalize_word_ids, 
123
			return ImportCoNLLUAnnotationsFromDirectory.importAnnotationsFromCoNLLUFile(mainCorpus, conlluFile, propertiesPrefix, textId, normalize_word_ids, import_conll_comment_properties,
121 124
					new HashSet<String>(Arrays.asList(headPropertiesToProject.split(","))), 
122 125
					new HashSet<String>(Arrays.asList(depsPropertiesToProject.split(","))),
123 126
					new HashSet<String>(Arrays.asList(udPropertiesToImport.split(","))));
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromDirectory.java (revision 3752)
80 80
	
81 81
	@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false")
82 82
	Boolean normalize_word_ids;
83
	
84
	@Option(name = "import_conll_comment_properties", usage = "if set sentid, newdocid and parid will be imported in CQP properties", widget = "Boolean", required = true, def = "false")
85
	Boolean import_conll_comment_properties;
83 86
		
84 87
	@Option(name = "headPropertiesToProject", usage = "to create the head-XYZ properties from the word head", widget = "StringArrayMultiple", metaVar="form	lemma	upos	xpos	feats	head	deprel	deps	misc", required = true, def = "deprel,upos")
85 88
	String headPropertiesToProject;
......
114 117
		MainCorpus mainCorpus = corpus.getMainCorpus();
115 118
		
116 119
		try {
117
			HashSet<String> test = new HashSet<>();
118
			for (String p : ImportCoNLLUAnnotations.UD_PROPERTY_NAMES) {
120
			HashSet<String> test = new HashSet<>(); // will contains the CQP properties to update
121
			for (String p : udPropertiesToImport.split(",")) { // test the properties to import
119 122
				if (mainCorpus.getProperty(propertiesPrefix + p) != null) {
120 123
					test.add(propertiesPrefix + p);
121 124
				}
......
125 128
				return null;
126 129
			}
127 130
			
128
			return importAnnotations(mainCorpus, conlluDirectory, propertiesPrefix, normalize_word_ids, 
131
			return importAnnotations(mainCorpus, conlluDirectory, propertiesPrefix, normalize_word_ids, import_conll_comment_properties, 
129 132
					new HashSet<String>(Arrays.asList(headPropertiesToProject.split(","))), 
130 133
					new HashSet<String>(Arrays.asList(depsPropertiesToProject.split(","))),
131 134
					new HashSet<String>(Arrays.asList(udPropertiesToImport.split(","))));
......
151 154
	 * @throws IOException
152 155
	 * @throws XMLStreamException
153 156
	 */
154
	public static int importAnnotations(MainCorpus mainCorpus, File conlluDirectory, String propertiesPrefix, Boolean normalizeWordIds, 
157
	public static int importAnnotations(MainCorpus mainCorpus, File conlluDirectory, String propertiesPrefix, Boolean normalizeWordIds, Boolean importCommentProperties,
155 158
			Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException, CqiServerError, CqiClientException, XMLStreamException {
156 159
		
157 160
		Log.info(TXMCoreMessages.bind("Importing CONLL-U annotations of {0} in {1} using the ''{2}'' prefix...", conlluDirectory, mainCorpus, propertiesPrefix));
......
174 177
		
175 178
		for (File coonluFile : files) {
176 179
			cpb.tick();
177
			nWordsInserted += ImportCoNLLUAnnotations._importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null, normalizeWordIds, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport);
180
			nWordsInserted += ImportCoNLLUAnnotations._importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null, normalizeWordIds, importCommentProperties, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport);
178 181
			nTextProcessed++;
179 182
		}
180 183
		cpb.done();
......
216 219
	 * @throws IOException
217 220
	 * @throws XMLStreamException
218 221
	 */
219
	public static int importAnnotationsFromCoNLLUFile(MainCorpus mainCorpus, File conlluFile, String propertiesPrefix, String textId, Boolean normalize_word_ids, 
222
	public static int importAnnotationsFromCoNLLUFile(MainCorpus mainCorpus, File conlluFile, String propertiesPrefix, String textId, Boolean normalize_word_ids, Boolean importCommentProperties,
220 223
			Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException,
221 224
			CqiServerError, CqiClientException, XMLStreamException {
222 225
		Log.info(TXMCoreMessages.bind("Importing CONLL-u annotations of {0} in {1} using the ''{2}'' prefix...", conlluFile, mainCorpus, propertiesPrefix));
223 226
		
224
		int nWordsInserted = ImportCoNLLUAnnotations._importAnnotations(conlluFile, mainCorpus, propertiesPrefix, textId, normalize_word_ids, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport);
227
		int nWordsInserted = ImportCoNLLUAnnotations._importAnnotations(conlluFile, mainCorpus, propertiesPrefix, textId, normalize_word_ids, importCommentProperties, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport);
225 228
		
226 229
		if (nWordsInserted == 0) {
227 230
			Log.warning("** No annotation imported. Aborting.");

Formats disponibles : Unified diff