Révision 3283

TXM/branches/eltec/org.txm.groovy.core/src/groovy/org/txm/scripts/filters/Tokeniser/ChunkTokenizerXml.groovy (revision 3283)
41 41
// $LastChangedRevision:$
42 42
// $LastChangedBy:$
43 43
//
44
package org.txm.tokenizer;
44
package org.txm.scripts.filters.Tokeniser;
45 45

  
46 46
import static groovy.transform.TypeCheckingMode.SKIP
47 47
import groovy.transform.CompileStatic
......
52 52

  
53 53
import javax.xml.stream.*
54 54

  
55
import org.eclipse.ui.internal.dialogs.NewContentTypeDialog
55 56
import org.txm.importer.PersonalNamespaceContext
56 57
import org.txm.tokenizer.StringTokenizer
57 58
import org.txm.tokenizer.SimpleStringTokenizer
......
63 64
import org.txm.xml.XPathHookActivator
64 65
import org.w3c.dom.Node
65 66

  
66
@CompileStatic
67 67
public class ChunkTokenizerXml extends XMLProcessor {
68 68
	
69 69
	XPathHookActivator activator;
......
119 119
	/** The prefix. */
120 120
	String prefix;
121 121
	String filename;
122
	int wordcount = 0;
122 123
	
123 124
	Pattern regLN;
124 125
	Pattern regCTRL;
......
139 140
	 */
140 141
	public ChunkTokenizerXml(File infile, TokenizerClasses tc) {
141 142
		super(infile)
143
		
142 144
		this.lang = tc.lang;
143 145
		this.stringTokenizer = new SimpleStringTokenizer(lang);
144 146
		
......
158 160
		regLN = Pattern.compile("/\n/");
159 161
		regCTRL = Pattern.compile("/\\p{C}/");
160 162
		
161
		activator = new XPathHookActivator<>(hook, "//div|p|ab");
163
		activator = new XPathHookActivator<>(hook, "//(div|p|ab|note|s|list|head|front|body|back|text)");
162 164
		
163 165
		hook = new DOMIdentityHook("in_text_hook", activator, this) {
164
			
165
			String id;
166
			
167
			boolean inAna = false;
168
			
169
			boolean inForm = false;
170
			
171
			boolean inW = false;
172
			
173
			ArrayList<String[]> anaValues = new ArrayList<>();
174
			
175
			ArrayList<String[]> formValues = new ArrayList<>();
176
			
177
			StringBuilder value = new StringBuilder();
178
			
179
			String resp = "";
180
			
181
			String type = "";
182
			
183
			/**
166
					
167
					String id;
168
					
169
					boolean inAna = false;
170
					
171
					boolean inForm = false;
172
					
173
					boolean inW = false;
174
					
175
					ArrayList<String[]> anaValues = new ArrayList<>();
176
					
177
					ArrayList<String[]> formValues = new ArrayList<>();
178
					
179
					StringBuilder value = new StringBuilder();
180
					
181
					String resp = "";
182
					
183
					String type = "";
184
					
185
					/**
184 186
			 * extends this method to process the DOM before it is written
185 187
			 */
186
			public void processDom() {
187
				
188
				ArrayList<Node> textNodes = getTextNodes(dom);
189
				if (textNodes.size() == 0) return; // easy
190
				
191
				StringBuilder buffer = new StringBuilder(); // build a string to tokenize
192
				for (Node textNode : textNodes) {
193
					buffer.append(" "+textNode.getTextContent());
194
				}
195
				
196
				int nNode = 0;
197
				Node currentTextNode = textNodes.get(0);
198
				String currentText = currentTextNode.getTextContent();
199
				int curentTextIndex = 0;
200
				StringBuilder currentNewText = new StringBuilder()
201
				ArrayList<String> currentWords = new ArrayList<String>()
202
				List<List<String>> sentences = stringTokenizer.processText(buffer.toString());
203
				//println "text="+buffer.toString()
204
				println "sentences=$sentences"
205
				for (List<String> sent : sentences) {
206
					if (nNode >= textNodes.size()) { // all nodes are updated
207
						break;
208
					}
209
					
210
					for (String word : sent) {
211
						if (nNode >= textNodes.size()) { // all nodes are updated
212
							break;
188
					public void processDom() {
189
						
190
						//println "Processing DOM with $stringTokenizer"
191
						ArrayList<Node> textNodes = getTextNodes(dom);
192
						if (textNodes.size() == 0) return; // easy
193
						
194
						StringBuilder buffer = new StringBuilder(); // build a string to tokenize
195
						for (Node textNode : textNodes) {
196
							buffer.append(" "+textNode.getTextContent());
213 197
						}
214 198
						
215
						int idx = currentText.indexOf(word, curentTextIndex);
216
						if (idx >= 0) {
217
							curentTextIndex = idx + word.length();
218
						} else {
219
							println "HOUSTON: word=$word nNode=$nNode currentText=$currentText index=$curentTextIndex words=$currentWords"
220
							currentTextNode.setTextContent("");
221
							for (String w : currentWords) {
222
								Node newChild = dom.getOwnerDocument().createElementNS(null, "w");
223
								newChild.setAttribute("id", "W_ID")
224
								newChild.setTextContent(w);
199
						int nNode = 0;
200
						Node currentTextNode = textNodes.get(0);
201
						String currentText = currentTextNode.getTextContent();
202
						int curentTextIndex = 0;
203
						StringBuilder currentNewText = new StringBuilder()
204
						ArrayList<String> currentWords = new ArrayList<String>()
205
						//println "text="+buffer.toString()
206
						List<List<List<String>>> sentences = stringTokenizer.processText(buffer.toString());
207
						//println "sentences=$sentences"
208
						for (List<List<String>> sent : sentences) {
209
							//println "sent: $sent"
210
							for (int iWord = 0 ; iWord < sent.size() ; iWord++) {
225 211
								
226
								currentTextNode.getParentNode().insertBefore(newChild, currentTextNode)
212
								List<String> word = sent.get(iWord)
213
								int idx = currentText.indexOf(word[0], curentTextIndex);
214
								//println "TEST: $word at $curentTextIndex in $currentText"
215
								if (idx >= 0) { // the whole word is in the current node text content
216
									curentTextIndex = idx + word[0].length();
217
									currentWords.add(word)
218
									continue // NEXT WORD
219
								}
220
								
221
								// see if the word is partially in the current text node
222
//								String partialWord = ""
223
//								for (int c = 0 ; c < word.length() - 1 ; c++) {
224
//									idx = currentText.indexOf(word.substring(0, word.length() - c), curentTextIndex);
225
//									if (idx >= 0) { // the partial word is in the current node text content
226
//										partialWord = word.substring(0, word.length() - c)
227
//										break;
228
//									}
229
//								}
230
//								if (partialWord.length() > 0) { // yes, the word is partially in the text node
231
//									//println "partialword=$partialWord"
232
//									def sword = word.substring(partialWord.length())
233
//									//println "sword=$sword"
234
//									if (sword.length() > 0) { // should always happen
235
//										sent.set(iWord, sword)									
236
//										//iWord-- // to re-process the word
237
//									}
238
//									curentTextIndex = idx + partialWord.length()
239
//									currentWords.add(partialWord.toString())
240
//								}
241

  
242
								if (currentWords.size() > 0) {
243
									writeWords(nNode, currentText, curentTextIndex, currentTextNode, currentWords)
244
								}
245
								
246
								if (nNode < textNodes.size() - 1) {
247
									currentNewText = new StringBuilder()
248
									curentTextIndex = 0;
249
									nNode++;
250
									
251
									currentTextNode = textNodes.get(nNode);
252
									currentText = currentTextNode.getTextContent();
253
									
254
									iWord--; //
255
								} else {
256
									currentWords.add(word)
257
								}
227 258
							}
228
							currentTextNode.getParentNode().removeChild(currentTextNode)
229 259
							
230
							currentNewText = new StringBuilder()
231
							currentWords.clear();
232
							curentTextIndex = 0;
233
							nNode++;
234
							if (nNode < textNodes.size()) {
235
								currentTextNode = textNodes.get(nNode);
236
								currentText = currentTextNode.getTextContent();
260
							if (currentWords.size() > 0) {
261
								writeWords(nNode, currentText, curentTextIndex, currentTextNode, currentWords)
237 262
							}
238 263
							
264
							if (stringTokenizer.doSentences()) {
265
								Node newChild = dom.getOwnerDocument().createProcessingInstruction("txm", "</s>")
266
								currentTextNode.getParentNode().insertBefore(newChild, currentTextNode)//appendChild(newChild)//
267
							}
239 268
						}
240
						
241
						currentWords.add(word)
242 269
					}
243
				}
244
			}
245
			
246
			public ArrayList<Node> getTextNodes(Node element) {
247
				def children = element.getChildNodes()
248
				ArrayList<Node> texts = new ArrayList<Node>()
249
				for (int i = 0 ; i < children.getLength() ; i++) {
250
					def node = children.item(i);
251
					if (node.getNodeType() == Node.TEXT_NODE) {
252
						texts.add(node)
253
					} else if (node.getNodeType() == Node.ELEMENT_NODE) {
254
						if (node.getLocalName().equals("w")) {
255
							texts.add(node)
256
						} else {
257
							texts.addAll(getTextNodes(node));
270
					
271
					public void writeWords(def nNode, def currentText, def curentTextIndex, def currentTextNode, def currentWords) {
272
						//println "WRITING: nNode=$nNode currentText='${currentText.replace("\\n", " ")}' index=$curentTextIndex words=$currentWords"
273
						currentTextNode.setTextContent("");
274
						for (def w : currentWords) {
275
							Node newChild = dom.getOwnerDocument().createElementNS(null, "w");
276
							wordcount++
277
							newChild.setAttribute("id", "w_"+filename+"_"+wordcount)
278
							for (int i = 0 ; i < stringTokenizer.getAdditionalProperties().size() ; i++) {
279
								//println "write att: "+stringTokenizer.getAdditionalProperties()[i]+"="+w[i+1]
280
								newChild.setAttribute(stringTokenizer.getAdditionalProperties()[i], w[i+1])
281
							}
282
							newChild.setTextContent(w[0]);
283
							
284
							currentTextNode.getParentNode().insertBefore(newChild, currentTextNode)
258 285
						}
286
						currentWords.clear();
259 287
					}
260
				}
261
				return texts;
262
			}
263
		};
288
					
289
					public ArrayList<Node> getTextNodes(Node element) {
290
						def children = element.getChildNodes()
291
						ArrayList<Node> texts = new ArrayList<Node>()
292
						for (int i = 0 ; i < children.getLength() ; i++) {
293
							def node = children.item(i);
294
							if (node.getNodeType() == Node.TEXT_NODE && node.getTextContent().trim().length() > 0) {
295
								texts.add(node)
296
							} else if (node.getNodeType() == Node.ELEMENT_NODE) {
297
								if (node.getLocalName().equals("w")) {
298
									if (retokenize) {
299
										//texts.add(node)
300
									}
301
								} else if (reg_outside_text_tags_keep_content != null && reg_outside_text_tags_keep_content.matcher(node.getLocalName()).matches()) {
302
									//texts.add(node)
303
								} else if (reg_outside_text_tags != null && reg_outside_text_tags.matcher(node.getLocalName()).matches()) { // ignore the tag and its content of the tag
304
									//texts.add(node)
305
								} else {
306
									texts.addAll(getTextNodes(node));
307
								}
308
							}
309
						}
310
						return texts;
311
					}
312
				};
264 313
	}
265 314
	
266 315
	/**
......
411 460
	}
412 461
	
413 462
	
414
	
415
	/** The wordcount. */
416
	int wordcount = 0;
417
	
418 463
	/** The ignorecontent. */
419 464
	boolean ignorecontent = true; // tokenization starts with element name matching $reg_startTag
420 465
	boolean insideword = false;
......
628 673
		text = regLN.matcher(text).replaceAll(WHITESPACE);
629 674
		text = regCTRL.matcher(text).replaceAll(EMPTY);						// remove ctrl characters
630 675
		
676
		println "tokenize text"
631 677
		def sentences = stringTokenizer.processText(text);
632 678
		for (def words : sentences) {
633 679
			for (def word : words) {
634 680
				wordcount++;
635 681
				writer.writeStartElement(word_element_to_create);
636 682
				writeWordAttributes();// id
637
				writer.writeCharacters(word);
683
				for (int i = 0 ; i < stringTokenizer.getAdditionalProperties() ; i++) {
684
					String att = stringTokenizer.getAdditionalProperties().get(i)
685
					writer.writeAttribute(att, word[i+1])
686
				}
687
				writer.writeCharacters(word[0]);
638 688
				writer.writeEndElement();
639 689
				writer.writeCharacters("\n");
690
				println "WRITE WORD: "+word[0]+" in "+writer
640 691
			}
641 692
			if (stringTokenizer.doSentences())  {
642 693
				writer.writeProcessingInstruction("txm", "</s>")
TXM/branches/eltec/org.txm.groovy.core/src/groovy/org/txm/scripts/filters/Tokeniser/SimpleTokenizerXml.groovy (revision 3283)
518 518
				wordcount++;
519 519
				writer.writeStartElement(word_element_to_create);
520 520
				writeWordAttributes();// id
521
				writer.writeCharacters(word);
521
				for (int i = 0 ; i < stringTokenizer.getAdditionalProperties() ; i++) {
522
					String att = stringTokenizer.getAdditionalProperties().get(i)
523
					writer.writeAttribute(att, word[i+1])
524
				}
525
				writer.writeCharacters(word[0]);
522 526
				writer.writeEndElement();
523 527
				writer.writeCharacters("\n");
524 528
			}
TXM/branches/eltec/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 3283)
1 1
package org.txm.scripts.importer.xtz
2 2

  
3
import org.txm.scripts.filters.Tokeniser.SimpleTokenizerXml
3
import org.txm.scripts.filters.Tokeniser.ChunkTokenizerXml
4 4

  
5 5
import java.io.File;
6 6

  
......
390 390
				cpb.tick()
391 391
				File infile = f;
392 392
				File outfile = new File(module.getBinaryDirectory(),"tokenized/"+f.getName());
393
				SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(infile, outfile, TokenizerClasses.newTokenizerClasses(project.getPreferencesScope(), lang))
394
				if (module.getProject().getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ?
393
				ChunkTokenizerXml tokenizer = new ChunkTokenizerXml(infile, TokenizerClasses.newTokenizerClasses(project.getPreferencesScope(), lang))
394
				//if (module.getProject().getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ?
395 395
					String engineName = module.getProject().getImportParameters().node("annotate").get("engine", "TreeTagger")
396 396
					def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName)
397 397
					def stringTokenizer = engine.getStringTokenizer(lang)
398 398
					if (stringTokenizer != null) {
399 399
						tokenizer.setStringTokenizer(stringTokenizer)
400 400
					}
401
				}
401
				//}
402 402
				tokenizer.setRetokenize(retokenize)
403 403
				if (outSideTextTagsRegex != null && outSideTextTagsRegex.trim().length() > 0) {
404 404
					tokenizer.setOutSideTextTags(outSideTextTagsRegex)
......
413 413
				}
414 414
				
415 415
				// tokenize !
416
				if (!tokenizer.process()) {
416
				if (!tokenizer.process(outfile)) {
417 417
					println("Failed to process "+f)
418 418
					outfile.delete()
419 419
				}
TXM/branches/eltec/org.txm.udpipe.core/src/org/txm/udpipe/core/UDPipeEngine.java (revision 3283)
20 20
import org.txm.utils.logger.Log;
21 21
import org.txm.xml.xmltxm.XMLTXMWordPropertiesInjection;
22 22

  
23
import cz.cuni.mff.ufal.udpipe.InputFormat;
24
import cz.cuni.mff.ufal.udpipe.Model;
23
import cz.cuni.mff.ufal.udpipe.MultiwordToken;
24
import cz.cuni.mff.ufal.udpipe.MultiwordTokens;
25 25
import cz.cuni.mff.ufal.udpipe.Sentence;
26 26
import cz.cuni.mff.ufal.udpipe.Sentences;
27 27
import cz.cuni.mff.ufal.udpipe.Version;
......
118 118
	 */
119 119
	public static String[] propNames = { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
120 120
	
121
	
121
	/**
122
	 * TODO re-think the process. The UDPStringTokenizer do the job
123
	 */
122 124
	@Override
123 125
	public boolean processFile(File xmlFile, File binaryCorpusDirectory, HashMap<String, Object> parameters) {
124 126
		if (!isRunning()) return false;
125 127
		
126
		String lang = null;
127
		Object ps = parameters.get("langs");
128
		Object p = parameters.get("lang");
128
//		String lang = null;
129
//		Object ps = parameters.get("langs");
130
//		Object p = parameters.get("lang");
131
//		
132
//		if (p == null && ps == null) {
133
//			Log.warning("Warning: can't annotate. No 'lang' (String) or 'langs' (Map<String, String>) parameter specified in " + parameters);
134
//			return false;
135
//		}
136
//		
137
//		if (ps != null && ps instanceof Map) {
138
//			Map<?, ?> map = (Map<?, ?>) ps;
139
//			String text_id = xmlFile.getName();
140
//			if (map.get(text_id) != null) {
141
//				lang = map.get(text_id).toString().toLowerCase();
142
//				if (!canAnnotateLang(lang)) {
143
//					Log.warning("Warning: can't annotate text_id=${text_id} with $lang, will use the default lang=$p");
144
//					return false;
145
//				}
146
//			}
147
//		}
148
//		
149
//		if (lang == null && p == null) {
150
//			System.out.println(NLS.bind("** Error: no 'lang' parameter given: {0}. Aborting TreeTagger annotation.", parameters));
151
//			return false;
152
//		}
153
//		else {
154
//			lang = p.toString();
155
//		}
156
//		
157
//		if (!canAnnotateLang(lang)) {
158
//			return false;
159
//		}
160
//		
161
//		File modelsDirectory = new File(UDPipePreferences.getInstance().getString(UDPipePreferences.MODELS_PATH)); // default models directory is set in the Toolbox
162
//		File modelFile = new File(modelsDirectory, lang + ".udpipe");
163
//		//System.out.println("model="+modelFile.getAbsolutePath());
164
//		
165
//		try {
166
//			long time = System.currentTimeMillis();
167
//			// get words
168
//			XMLTXMToUDPipeXMLParser wparser = new XMLTXMToUDPipeXMLParser(xmlFile.toURI().toURL());
169
//			if (!wparser.process(null)) {
170
//				Log.warning("Error while parsing: " + xmlFile);
171
//				return false;
172
//			}
173
//			//System.out.println("words built in: "+(System.currentTimeMillis() - time));
174
//			Sentences sentences = wparser.getSentences();
175
//			
176
//			time = System.currentTimeMillis();
177
//			// System.out.println("SENTENCES PARSED: " + UDPipeJavaUtils.toString(sentences));
178
//			// tag
179
//			UDPipeJavaUtils.processSentences(modelFile.getAbsolutePath(), sentences);
180
//			//System.out.println("sentences parsed in: "+(System.currentTimeMillis() - time));
181
//			// System.out.println("SENTENCES RESULT: " + UDPipeJavaUtils.toString(sentences));
182
//			
183
//			// update the XML-TXM file
184
//			time = System.currentTimeMillis();
185
//			XMLTXMWordPropertiesInjection injector = new XMLTXMWordPropertiesInjection(xmlFile);
186
//			HashMap<String, HashMap<String, String>> rules = new HashMap<>();
187
//			for (int iSentence = 0; iSentence < sentences.size(); iSentence++) {
188
//				Sentence sentence = sentences.get(iSentence);
189
//				Words words = sentence.getWords();
190
//				MultiwordTokens mwt = sentence.getMultiwordTokens();
191
//				HashMap<Integer, MultiwordToken> firstWordToToken = new HashMap<Integer, MultiwordToken>();
192
//				HashMap<Integer, MultiwordToken> lastWordToToken = new HashMap<Integer, MultiwordToken>();
193
//				for (int i = 0 ; i < mwt.size(); i++) {
194
//					MultiwordToken mw = mwt.get(i);
195
//					firstWordToToken.put(mw.getIdFirst(), mw);
196
//					lastWordToToken.put(mw.getIdLast(), mw);
197
//				}
198
//				
199
//				for (int iWord = 0; iWord < words.size(); iWord++) {
200
//					Word word = words.get(iWord);
201
//					String form = word.getForm();
202
//					if (Sentence.getRootForm().equals(form)) continue;
203
//					
204
//					if (firstWordToToken.containsKey(word.getId())) {
205
//						MultiwordToken mw = firstWordToToken.get(word.getId());
206
//						
207
//						String misc = mw.getMisc();
208
//						int idx = misc.indexOf(XMLIDMISC);
209
//						int idx2 = misc.indexOf("|", idx + 6);
210
//						if (idx2 < 0) idx2 = misc.length();
211
//						String id = misc.substring(idx + 6, idx2);
212
//						if (id != null && id.length() > 0) {
213
//							HashMap<String, String> properties = UDPipeJavaUtils.wordToHashMap(sentence, mw, "#ud-");
214
//							rules.put(id, properties);
215
//						}
216
//					} else if (lastWordToToken.containsKey(word.getId())) {
217
//						// already written
218
//					} else {
219
//						
220
//						String misc = word.getMisc();
221
//						int idx = misc.indexOf(XMLIDMISC);
222
//						int idx2 = misc.indexOf("|", idx + 6);
223
//						if (idx2 < 0) idx2 = misc.length();
224
//						String id = misc.substring(idx + 6, idx2);
225
//						if (id != null && id.length() > 0) {
226
//							HashMap<String, String> properties = UDPipeJavaUtils.wordToHashMap(word, "#ud-");
227
//							rules.put(id, properties);
228
//						}
229
//					}
230
//				}
231
//			}
232
//			File outFile = new File(binaryCorpusDirectory, xmlFile.getName() + ".tmp");
233
//			injector.setProperties(rules);
234
//			if (injector.process(outFile) && outFile.exists()) {
235
//				xmlFile.delete();
236
//				outFile.renameTo(xmlFile);
237
//			}
238
//			else {
239
//				Log.warning("Error while processing: " + xmlFile);
240
//			}
241
//			System.out.println("ud properties injected in: "+(System.currentTimeMillis() - time));
242
//		}
243
//		catch (IOException | XMLStreamException e) {
244
//			// TODO Auto-generated catch block
245
//			e.printStackTrace();
246
//		}
129 247
		
130
		if (p == null && ps == null) {
131
			Log.warning("Warning: can't annotate. No 'lang' (String) or 'langs' (Map<String, String>) parameter specified in " + parameters);
132
			return false;
133
		}
134
		
135
		if (ps != null && ps instanceof Map) {
136
			Map<?, ?> map = (Map<?, ?>) ps;
137
			String text_id = xmlFile.getName();
138
			if (map.get(text_id) != null) {
139
				lang = map.get(text_id).toString().toLowerCase();
140
				if (!canAnnotateLang(lang)) {
141
					Log.warning("Warning: can't annotate text_id=${text_id} with $lang, will use the default lang=$p");
142
					return false;
143
				}
144
			}
145
		}
146
		
147
		if (lang == null && p == null) {
148
			System.out.println(NLS.bind("** Error: no 'lang' parameter given: {0}. Aborting TreeTagger annotation.", parameters));
149
			return false;
150
		}
151
		else {
152
			lang = p.toString();
153
		}
154
		
155
		if (!canAnnotateLang(lang)) {
156
			return false;
157
		}
158
		
159
		File modelsDirectory = new File(UDPipePreferences.getInstance().getString(UDPipePreferences.MODELS_PATH)); // default models directory is set in the Toolbox
160
		File modelFile = new File(modelsDirectory, lang + ".udpipe");
161
		//System.out.println("model="+modelFile.getAbsolutePath());
162
		
163
		try {
164
			// get words
165
			XMLTXMToUDPipeXMLParser wparser = new XMLTXMToUDPipeXMLParser(xmlFile.toURI().toURL());
166
			if (!wparser.process(null)) {
167
				Log.warning("Error while parsing: " + xmlFile);
168
				return false;
169
			}
170
			
171
			Sentences sentences = wparser.getSentences();
172
						
173
			// System.out.println("SENTENCES PARSED: " + UDPipeJavaUtils.toString(sentences));
174
			// tag
175
			UDPipeJavaUtils.processSentences(modelFile.getAbsolutePath(), sentences);
176
			
177
			// System.out.println("SENTENCES RESULT: " + UDPipeJavaUtils.toString(sentences));
178
			
179
			// update XML-TXM files
180
			XMLTXMWordPropertiesInjection injector = new XMLTXMWordPropertiesInjection(xmlFile);
181
			HashMap<String, HashMap<String, String>> rules = new HashMap<>();
182
			for (int iSentence = 0; iSentence < sentences.size(); iSentence++) {
183
				Sentence sentence = sentences.get(iSentence);
184
				Words words = sentence.getWords();
185
				for (int iWord = 0; iWord < words.size(); iWord++) {
186
					Word word = words.get(iWord);
187
					String form = word.getForm();
188
					if ("<root>".equals(form)) continue;
189
					
190
					String misc = word.getMisc();
191
					int idx = misc.indexOf(XMLIDMISC);
192
					int idx2 = misc.indexOf("|", idx + 6);
193
					if (idx2 < 0) idx2 = misc.length();
194
					String id = misc.substring(idx + 6, idx2);
195
					if (id != null && id.length() > 0) {
196
						HashMap<String, String> properties = UDPipeJavaUtils.wordToHashMap(word, "#ud-");
197
						rules.put(id, properties);
198
					}
199
				}
200
			}
201
			File outFile = new File(binaryCorpusDirectory, xmlFile.getName() + ".tmp");
202
			injector.setProperties(rules);
203
			if (injector.process(outFile) && outFile.exists()) {
204
				xmlFile.delete();
205
				outFile.renameTo(xmlFile);
206
			}
207
			else {
208
				Log.warning("Error while processing: " + xmlFile);
209
			}
210
		}
211
		catch (IOException | XMLStreamException e) {
212
			// TODO Auto-generated catch block
213
			e.printStackTrace();
214
		}
215
		
216 248
		// update xml-txm files
217 249
		return true;
218 250
	}
TXM/branches/eltec/org.txm.udpipe.core/src/org/txm/udpipe/core/XMLTXMToUDPipeXMLParser.java (revision 3283)
173 173
									flagform = false;
174 174
									form = form.trim();
175 175
									form = form.replace("\n", "").replace("<", "&lt;");
176
									Word word = new Word();
177
									word.setForm(form);
176
									
177
									sentence.addWord(form);
178
									Word word = sentence.getWords().get((int) (sentence.getWords().size() - 1));
178 179
									word.setMisc(UDPipeEngine.XMLIDMISC + wordId);
179
									sentence.getWords().add(word);
180 180
								}
181 181
								break;
182 182
							
......
195 195
				}
196 196
			}
197 197
			
198
			if (sentence != null && sentence.getWords().size() > 0) {
198
			if (sentence != null && sentence.getWords().size() > 1) { // a sent contains at least <root>
199 199
				sentences.add(sentence);
200 200
			}
201 201
			
TXM/branches/eltec/org.txm.udpipe.core/src/org/txm/udpipe/core/UDPipeJavaUtils.java (revision 3283)
10 10

  
11 11
import cz.cuni.mff.ufal.udpipe.InputFormat;
12 12
import cz.cuni.mff.ufal.udpipe.Model;
13
import cz.cuni.mff.ufal.udpipe.MultiwordToken;
14
import cz.cuni.mff.ufal.udpipe.MultiwordTokens;
13 15
import cz.cuni.mff.ufal.udpipe.OutputFormat;
14 16
import cz.cuni.mff.ufal.udpipe.Sentence;
15 17
import cz.cuni.mff.ufal.udpipe.Sentences;
......
42 44
			
43 45
			model.parse(sent, "");
44 46
			model.tag(sent, "");
45
//			System.out.println(toString(sent));
47
			//			System.out.println(toString(sent));
46 48
		}
47 49
		
48 50
		return sentences;
......
60 62
	public static String toString(Sentence sent) {
61 63
		StringBuilder buffer = new StringBuilder();
62 64
		Words words = sent.getWords();
63
		for (int i = 0; i < words.size(); i++) {
65
		
66
		MultiwordTokens mwt = sent.getMultiwordTokens();
67
		HashMap<Integer, MultiwordToken> firstWordToToken = new HashMap<Integer, MultiwordToken>();
68
		HashMap<Integer, MultiwordToken> lastWordToToken = new HashMap<Integer, MultiwordToken>();
69
		for (int i = 0 ; i < mwt.size(); i++) {
70
			MultiwordToken mw = mwt.get(i);
71
			firstWordToToken.put(mw.getIdFirst(), mw);
72
			lastWordToToken.put(mw.getIdLast(), mw);
73
		}
74
		
75
		for (int i = 0 ; i < words.size(); i++) {
76
			//			MultiwordToken mw = mwt.get(i);
77
			//			Word w = words.get(mw.getIdFirst());
78
			
64 79
			Word w = words.get(i);
80
			
65 81
			if (i > 0) buffer.append(" ");
66 82
			
67
			buffer.append(w.getForm());
68
			buffer.append("/" + w.getLemma());
69
			buffer.append("/" + w.getUpostag());
70
			buffer.append("/" + w.getXpostag());
71
			buffer.append("/" + w.getFeats());
72
			buffer.append("/" + w.getDeps());
73
			buffer.append("/" + w.getDeprel());
74
			buffer.append("/" + w.getHead());
75
			buffer.append("/" + w.getMisc());
83
			if (firstWordToToken.containsKey(w.getId())) {
84
				MultiwordToken mw = firstWordToToken.get(w.getId());
85
				Word w2 = words.get(mw.getIdLast());
86
				
87
				buffer.append(""+w.getId()+"+"+w2.getId());
88
				buffer.append("/" + w.getForm()+"+"+w2.getForm());
89
				buffer.append("/" + w.getLemma()+"+"+w2.getLemma());
90
				buffer.append("/" + w.getUpostag()+"+"+w2.getUpostag());
91
				buffer.append("/" + w.getXpostag()+"+"+w2.getXpostag());
92
				buffer.append("/" + w.getFeats()+"+"+w2.getFeats());
93
				buffer.append("/" + w.getDeps()+"+"+w2.getDeps());
94
				buffer.append("/" + w.getDeprel()+"+"+w2.getDeprel());
95
				buffer.append("/" + w.getHead()+"+"+w2.getHead());
96
				buffer.append("/" + w.getMisc()+"+"+w2.getMisc());
97
			} else if (lastWordToToken.containsKey(w.getId())) {
98
				// already written
99
			} else {
100
			
101
				buffer.append(w.getId());
102
				buffer.append("/" + w.getForm());
103
				buffer.append("/" + w.getLemma());
104
				buffer.append("/" + w.getUpostag());
105
				buffer.append("/" + w.getXpostag());
106
				buffer.append("/" + w.getFeats());
107
				buffer.append("/" + w.getDeps());
108
				buffer.append("/" + w.getDeprel());
109
				buffer.append("/" + w.getHead());
110
				buffer.append("/" + w.getMisc());
111
			}
76 112
		}
77 113
		return buffer.toString();
78 114
	}
......
126 162
	}
127 163
	
128 164
	public static void main(String[] args) {
129
//		try {
130
//			toConnluFile(new File("/tmp/result.connlu"), new File("/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-gsd-ud-2.4-190531.udpipe"),
131
//					"Et un petit test... En deux phrases ? ou trois.");
165
		//		try {
166
		//			toConnluFile(new File("/tmp/result.connlu"), new File("/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-gsd-ud-2.4-190531.udpipe"),
167
		//					"Et un petit test... En deux phrases ? ou trois.");
168
		//		}
169
		//		catch (UnsupportedEncodingException | FileNotFoundException e) {
170
		//			// TODO Auto-generated catch block
171
		//			e.printStackTrace();
172
		//		}
173
		UDPipeEngine.initializeUDLib(new File("/home/mdecorde/workspace047/org.txm.libs.udpipe"));
174
//		System.out.println("VERSION: "+Version.current().toString());
175
//		// /usr/lib/UDPipe/models/fr.udpipe
176
//		// /home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-gsd-ud-2.4-190531.udpipe
177
//		for (String p : new String[] {"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-gsd-ud-2.4-190531.udpipe",
178
//				"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-partut-ud-2.4-190531.udpipe",
179
//				"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-sequoia-ud-2.4-190531.udpipe",
180
//		"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-spoken-ud-2.4-190531.udpipe"}) {
181
//			System.out.println("M="+p);
182
//			System.out.println(toString(process(p, "Une plaine, des champs cultivés que traverse une grande route.")));
132 183
//		}
133
//		catch (UnsupportedEncodingException | FileNotFoundException e) {
134
//			// TODO Auto-generated catch block
135
//			e.printStackTrace();
136
//		}
137
		UDPipeEngine.initializeUDLib(new File("/home/mdecorde/workspace047/org.txm.libs.udpipe"));
138
		System.out.println("VERSION: "+Version.current().toString());
139
		// /usr/lib/UDPipe/models/fr.udpipe
140
		// /home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-gsd-ud-2.4-190531.udpipe
141
		for (String p : new String[] {"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-gsd-ud-2.4-190531.udpipe",
142
				"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-partut-ud-2.4-190531.udpipe",
143
				"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-sequoia-ud-2.4-190531.udpipe",
144
				"/home/mdecorde/SOFTWARE/udpipe-1.2.0-bin/bin-linux64/french-spoken-ud-2.4-190531.udpipe"}) {
145
			System.out.println("M="+p);
146
		process(p, "Et un petit test... En deux phrases ? ou trois.");
147
		}
184
		
185
		Sentence sentence = new Sentence();
186
		sentence.addWord("je");
187
		sentence.addWord("suis");
188
		sentence.addWord(".");
189
		System.out.println("SENT="+toString(sentence));
190
		
148 191
	}
149 192
	
150 193
	public static void processSentences(String modelPath, Sentences sentences) {
151
		processSentences(Model.load(modelPath), sentences);
194
		long time = System.currentTimeMillis();
195
		Model m = Model.load(modelPath);
196
		//System.out.println("Model load in: "+(System.currentTimeMillis()-time));
197
		
198
		time = System.currentTimeMillis();
199
		processSentences(m, sentences);
200
		//System.out.println("sent processed in: "+(System.currentTimeMillis()-time));
152 201
	}
153 202
	
154 203
	public static void processSentences(Model model, Sentences sentences) {
......
156 205
		for (int iSentence = 0; iSentence < sentences.size(); iSentence++) {
157 206
			Sentence sent = sentences.get(iSentence);
158 207
			
208
			model.parse(sent, "");
159 209
			model.tag(sent, "");
160
			model.parse(sent, "");
210
			
161 211
			//System.out.println(toString(sent));
162 212
		}
163 213
	}
......
186 236
		
187 237
		return properties;
188 238
	}
239
	
240
	/**
241
	 * fill a map with values of "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc"
242
	 * 
243
	 * @param word
244
	 * @param prefix
245
	 * @return
246
	 */
247
	public static HashMap<String, String> wordToHashMap(Sentence sentence, MultiwordToken mword, String prefix) {
248
		if (prefix == null) prefix = "";
249
		
250
		Word word = sentence.getWords().get(mword.getIdFirst());
251
		Word word2 = sentence.getWords().get(mword.getIdFirst());
252
		
253
		HashMap<String, String> properties = new HashMap<>();
254
		properties.put(prefix + "id", Integer.toString(word.getId()) + "+" + Integer.toString(word2.getId()));
255
		properties.put(prefix + "form", word.getForm() + "+" + word2.getForm());
256
		properties.put(prefix + "lemma", word.getLemma() + "+" + word2.getLemma());
257
		properties.put(prefix + "upos", word.getUpostag() + "+" + word2.getUpostag());
258
		properties.put(prefix + "xpos", word.getXpostag() + "+" + word2.getXpostag());
259
		properties.put(prefix + "feats", word.getFeats() + "+" + word2.getFeats());
260
		properties.put(prefix + "head", Integer.toString(word.getHead()) + "+" + Integer.toString(word2.getId()));
261
		properties.put(prefix + "deprel", word.getDeprel() + "+" + word2.getDeprel());
262
		properties.put(prefix + "deps", word.getDeps() + "+" + word2.getDeps());
263
		properties.put(prefix + "misc", word.getMisc() + "+" + word2.getMisc());
264
		
265
		return properties;
266
	}
189 267
}
TXM/branches/eltec/org.txm.udpipe.core/src/org/txm/udpipe/core/UDStringTokenizer.java (revision 3283)
2 2

  
3 3
import java.io.File;
4 4
import java.util.ArrayList;
5
import java.util.Arrays;
6
import java.util.HashMap;
5 7
import java.util.List;
6 8

  
7 9
import org.txm.tokenizer.StringTokenizer;
8 10

  
9 11
import cz.cuni.mff.ufal.udpipe.InputFormat;
10 12
import cz.cuni.mff.ufal.udpipe.Model;
13
import cz.cuni.mff.ufal.udpipe.MultiwordToken;
14
import cz.cuni.mff.ufal.udpipe.MultiwordTokens;
11 15
import cz.cuni.mff.ufal.udpipe.Sentence;
16
import cz.cuni.mff.ufal.udpipe.Word;
12 17
import cz.cuni.mff.ufal.udpipe.Words;
13 18

  
14 19

  
......
29 34
		tokenizer = model.newTokenizer("");
30 35
	}
31 36
	
37
	public final static List<String> ADDITIONAL_PROPERTIES = Arrays.asList("ud-id", "ud-form", "ud-lemma", "ud-upos", "ud-xpos", "ud-feats", "ud-head", "ud-deprel", "ud-deps", "ud-misc");
38
	public List<String> getAdditionalProperties() {
39
		return ADDITIONAL_PROPERTIES;
40
	}
41
	
32 42
	@Override
33
	public List<List<String>> processText(String text) {
34
		ArrayList<List<String>> result = new ArrayList<>();
43
	public ArrayList<ArrayList<ArrayList<String>>> processText(String text) {
44
		ArrayList<ArrayList<ArrayList<String>>> result = new ArrayList<>();
35 45
		
36
		
37 46
		tokenizer.setText(text);
38 47
		Sentence sent = new Sentence();
48
		
49
		
50
		
39 51
		while (tokenizer.nextSentence(sent)) {
40
			List<String> sresult = new ArrayList<>();
52
			
53
			model.parse(sent, "");
54
			model.tag(sent, "");
55
			
56
			ArrayList<ArrayList<String>> sresult = new ArrayList<>();
41 57
			Words words = sent.getWords();
58
			
59
			MultiwordTokens mwt = sent.getMultiwordTokens();
60
			HashMap<Integer, MultiwordToken> firstWordToToken = new HashMap<Integer, MultiwordToken>();
61
			HashMap<Integer, MultiwordToken> lastWordToToken = new HashMap<Integer, MultiwordToken>();
62
			for (int i = 0 ; i < mwt.size(); i++) {
63
				MultiwordToken mw = mwt.get(i);
64
				firstWordToToken.put(mw.getIdFirst(), mw);
65
				lastWordToToken.put(mw.getIdLast(), mw);
66
			}
67
			
68
			// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" };
42 69
			for (int iWord = 0; iWord < words.size(); iWord++) {
43
				sresult.add(words.get(iWord).getForm());
70
				Word word = words.get(iWord);
71
				ArrayList<String> properties = new ArrayList<String>();
72
				
73
				if (firstWordToToken.containsKey(word.getId())) {
74
					MultiwordToken mw = firstWordToToken.get(word.getId());
75
					Word word2 = words.get(mw.getIdLast());
76
					
77
					properties.add(mw.getForm());
78
					properties.add(""+word.getId());
79
					properties.add(word.getForm() + "+" + word2.getForm());
80
					properties.add(word.getLemma() + "+" + word2.getLemma());
81
					properties.add(word.getUpostag() + "+" + word2.getUpostag());
82
					properties.add(word.getXpostag() + "+" + word2.getXpostag());
83
					properties.add(word.getFeats() + "+" + word2.getFeats());
84
					properties.add(Integer.toString(word.getHead()) + "+" + word2.getHead());
85
					properties.add(word.getDeprel() + "+" + word2.getDeprel());
86
					properties.add(word.getDeps() + "+" + word2.getDeps());
87
					properties.add(mw.getMisc() + "+" + word2.getMisc());
88
					
89
					sresult.add(properties);
90
				} else if (lastWordToToken.containsKey(word.getId())) {
91
					// already written
92
				} else {
93
					properties.add(word.getForm());
94
					properties.add(""+word.getId());
95
					properties.add(word.getForm());
96
					properties.add(word.getLemma());
97
					properties.add(word.getUpostag());
98
					properties.add(word.getXpostag());
99
					properties.add(word.getFeats());
100
					properties.add(Integer.toString(word.getHead()));
101
					properties.add(word.getDeprel());
102
					properties.add(word.getDeps());
103
					properties.add(word.getMisc());
104
									
105
					sresult.add(properties);
106
				}
44 107
			}
45 108
			if (sresult.size() > 0) {
46 109
				sresult.remove(0); // remove the <root> element
......
59 122
		return true;
60 123
	}
61 124
}
125

  
TXM/branches/eltec/org.txm.tokenizer.core/src/org/txm/tokenizer/SimpleStringTokenizer.groovy (revision 3283)
270 270
			println "Result : "+tokenizer.processText(text).collect{"<"+it+">"}
271 271
		}
272 272
	}
273

  
274
	@Override
275
	public List<String> getAdditionalProperties() {
276
		return Arrays.asList();
277
	}
273 278
}
TXM/branches/eltec/org.txm.tokenizer.core/src/org/txm/tokenizer/StringTokenizer.java (revision 3283)
1 1
package org.txm.tokenizer;
2 2

  
3
import java.util.ArrayList;
3 4
import java.util.List;
4 5

  
5 6
public interface StringTokenizer {
6 7
	
7
	List<List<String>> processText(String text);
8
	ArrayList<ArrayList<ArrayList<String>>> processText(String text);
8 9
	
9 10
	boolean doSentences();
11
	
12
	List<String> getAdditionalProperties();
10 13
}
TXM/branches/eltec/org.txm.internalview.rcp/src/org/txm/internalview/rcp/editors/InternalViewEditor.java (revision 3283)
9 9
import org.eclipse.jface.viewers.ISelectionChangedListener;
10 10
import org.eclipse.jface.viewers.IStructuredContentProvider;
11 11
import org.eclipse.jface.viewers.SelectionChangedEvent;
12
import org.eclipse.jface.viewers.StructuredSelection;
12 13
import org.eclipse.jface.viewers.TableViewer;
13 14
import org.eclipse.jface.viewers.TableViewerColumn;
14 15
import org.eclipse.jface.viewers.Viewer;
......
37 38
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
38 39
import org.txm.searchengine.cqp.corpus.CQPCorpus;
39 40
import org.txm.searchengine.cqp.corpus.Property;
41
import org.txm.searchengine.cqp.corpus.StructuralUnit;
40 42
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty;
41 43
import org.txm.searchengine.cqp.corpus.WordProperty;
42 44
import org.txm.searchengine.cqp.corpus.query.Match;
......
114 116
		structComboLabel.setText(InternalViewUIMessages.structure);
115 117
		structComboLabel.setLayoutData(new GridData(GridData.CENTER, GridData.CENTER, false, true));
116 118
		this.structuralUnitsComboViewer = new StructuralUnitsComboViewer(parametersArea, this, true);
119
		
117 120
		// Listener
118 121
		this.structuralUnitsComboViewer.addSelectionChangedListener(new ISelectionChangedListener() {
119 122
			
......
124 127
			}
125 128
		});
126 129
		
130
		
127 131
		// Word properties selector
128 132
		propertiesSelector = new PropertiesSelector<>(parametersArea);
129 133
		propertiesSelector.setLayoutData(new GridData(GridData.CENTER, GridData.CENTER, false, true));
TXM/branches/eltec/org.txm.core/src/java/org/txm/xml/StaxDomConstructor.java (revision 3283)
2 2

  
3 3
import java.io.Reader;
4 4
import java.io.StringReader;
5
import java.util.Arrays;
5 6

  
6 7
import javax.xml.parsers.ParserConfigurationException;
7 8
import javax.xml.stream.XMLInputFactory;
......
77 78
					elements++;
78 79
					break;
79 80
				case XMLStreamConstants.CHARACTERS:
81
					//char[] tmp = Arrays.copyOfRange(parser.getTextCharacters(), parser.getTextStart(), parser.getTextStart()+parser.getTextLength());
82
					//System.out.println("T='"+Arrays.toString(tmp)+"'");
80 83
					Text textNode = doc.createTextNode(parser.getText());
81 84
					currentElement.appendChild(textNode);
82 85
					break;
TXM/branches/eltec/org.txm.core/src/java/org/txm/xml/DOMIdentityHook.java (revision 3283)
73 73
			parentParser.writer.writeCharacters("\n");
74 74
			Element e = (Element) node;
75 75
			NodeList children = e.getChildNodes();
76
			String ns = e.getNamespaceURI();
77
			String ln = e.getLocalName();
76 78
			if (children.getLength() > 0) {
77
				String ns = e.getNamespaceURI();
78
				String ln = e.getLocalName();
79 79
				if (ns == null) {
80 80
					parentParser.writer.writeStartElement(ln);
81 81
				} else {
......
83 83
				}
84 84
			}
85 85
			else {
86
				parentParser.writer.writeEmptyElement(e.getNamespaceURI(), e.getLocalName());
86
				if (ns == null) {
87
					parentParser.writer.writeEmptyElement(ln);
88
				} else {
89
					parentParser.writer.writeEmptyElement(ns, ln);
90
				}
87 91
			}
88 92
			
89 93
			for (int i = 0; i < e.getAttributes().getLength(); i++) {
......
102 106
			}
103 107
			if (children.getLength() > 0) {
104 108
				parentParser.writer.writeEndElement();
105
				parentParser.writer.writeCharacters("\n");
109
				//parentParser.writer.writeCharacters("\n");
106 110
			}
107 111
		}
108 112
		else if (node.getNodeType() == Node.TEXT_NODE) {
109 113
			parentParser.writer.writeCharacters(node.getTextContent());
114
		} else if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
115
			parentParser.writer.writeProcessingInstruction(node.getNodeName(), node.getNodeValue());
116
		} else if (node.getNodeType() == Node.CDATA_SECTION_NODE) {
117
			parentParser.writer.writeCData(node.getNodeValue());
118
		} else if (node.getNodeType() == Node.COMMENT_NODE) {
119
			parentParser.writer.writeComment(node.getNodeValue());
110 120
		}
111 121
	}
112 122
	
TXM/branches/eltec/org.txm.internalview.core/src/org/txm/internalview/core/functions/InternalView.java (revision 3283)
3 3
import java.io.File;
4 4
import java.io.PrintWriter;
5 5
import java.util.ArrayList;
6
import java.util.Arrays;
6 7
import java.util.Collection;
7 8
import java.util.HashMap;
8 9
import java.util.LinkedHashMap;
......
91 92
	public boolean loadParameters() throws Exception {
92 93
		try {
93 94
			String str = this.getStringParameterValue(TXMPreferences.STRUCTURAL_UNIT);
94
			this.pStructuralUnit = this.getCorpus().getStructuralUnit(str);
95
			if (str.length() == 0) {
96
				try {
97
					StructuralUnit struct = this.getCorpus().getStructuralUnit("text");
98
					if (struct != null) {
99
						this.pStructuralUnit = struct;
100
					}
101
					struct = this.getCorpus().getStructuralUnit("div");
102
					if (struct != null) {
103
						this.pStructuralUnit = struct;
104
					}
105
					struct = this.getCorpus().getStructuralUnit("p");
106
					if (struct != null) {
107
						this.pStructuralUnit = struct;
108
					}
109
				}
110
				catch (CqiClientException e1) {
111
					// TODO Auto-generated catch block
112
					e1.printStackTrace();
113
				}
114
			} else {
115
				this.pStructuralUnit = this.getCorpus().getStructuralUnit(str);
116
			}
117
			
95 118
		}
96 119
		catch (Exception e) {
97 120
			Log.printStackTrace(e);
......
105 128
		}
106 129
		try {
107 130
			String str = this.getStringParameterValue(TXMPreferences.STRUCTURAL_UNIT_PROPERTIES);
108
			this.pStructuralUnitsProperties = StructuralUnitProperty.stringToProperties(this.getCorpus(), str);
131
			if (str.length() == 0) {
132
				StructuralUnitProperty structP = pStructuralUnit.getProperty("id");
133
				if (structP != null) {
134
					this.pStructuralUnitsProperties = Arrays.asList(structP);
135
				}
136
				structP = pStructuralUnit.getProperty("n");
137
				if (structP != null) {
138
					this.pStructuralUnitsProperties = Arrays.asList(structP);
139
				}
140
			} else {
141
				this.pStructuralUnitsProperties = StructuralUnitProperty.stringToProperties(this.getCorpus(), str);
142
			}
109 143
		}
110 144
		catch (Exception e3) {
111 145
			Log.printStackTrace(e3);
......
365 399
		this.setDirty();
366 400
	}
367 401
	
368
	
369
	
370
	
371 402
	@Override
372 403
	public boolean setParameters(TXMParameters parameters) {
373 404
		System.err.println("InternalView.setParameters(): not yet implemented.");
TXM/branches/eltec/org.txm.internalview.core/src/org/txm/internalview/core/preferences/InternalViewPreferences.java (revision 3283)
37 37
		super.initializeDefaultPreferences();
38 38
		Preferences preferences = this.getDefaultPreferencesNode();
39 39
		
40
		preferences.put(STRUCTURAL_UNIT, TBXPreferences.DEFAULT_STRUCTURAL_UNIT);
40
		preferences.put(STRUCTURAL_UNIT, "");
41 41
		preferences.put(UNIT_PROPERTIES, TBXPreferences.DEFAULT_UNIT_PROPERTY);
42
		preferences.put(STRUCTURAL_UNIT_PROPERTIES, TBXPreferences.DEFAULT_STRUCTURAL_UNIT_PROPERTY);
42
		preferences.put(STRUCTURAL_UNIT_PROPERTIES, "");
43 43
		preferences.putInt(CURRENT_PAGE, 0);
44 44
	}
45 45
}

Formats disponibles : Unified diff