Révision 2905

tmp/org.txm.analec.rcp/src/org/txm/annotation/urs/commands/ImportGlozzCorpus.java (revision 2905)
11 11
import org.eclipse.core.runtime.IProgressMonitor;
12 12
import org.eclipse.core.runtime.IStatus;
13 13
import org.eclipse.core.runtime.Status;
14
import org.eclipse.jface.dialogs.MessageDialog;
14 15
import org.eclipse.osgi.util.NLS;
16
import org.eclipse.swt.widgets.Display;
17
import org.eclipse.swt.widgets.MessageBox;
15 18
import org.kohsuke.args4j.Option;
16 19
import org.txm.Toolbox;
17 20
import org.txm.annotation.urs.URSAnnotationReIndexer;
......
19 22
import org.txm.annotation.urs.Messages;
20 23
import org.txm.objects.Project;
21 24
import org.txm.rcp.handlers.scripts.ExecuteImportScript;
25
import org.txm.rcp.messages.TXMUIMessages;
22 26
import org.txm.rcp.swt.widget.parameters.ParametersDialog;
23 27
import org.txm.rcp.utils.JobHandler;
24 28
import org.txm.searchengine.cqp.CQPSearchEngine;
......
33 37
import visuAnalec.fichiers.FichiersGlozz;
34 38
import visuAnalec.fichiers.FichiersJava;
35 39

  
36
public class ImportGlozzCorpus  extends AbstractHandler {
37

  
38

  
39

  
40
	//	@Option(name="aafile",usage="an example file", widget="File", required=true, def="text.aa")
41
	//	File aafile;
42

  
43
	//	@Option(name="acfile",usage="an example file", widget="File", required=true, def="annotations.ac")
44
	//	File acfile
45

  
46
	@Option(name="glozzDirectory",usage="A folder containing the Glozz files: aa ac and aam", widget="Folder", required=true, def="glozz")
40
public class ImportGlozzCorpus extends AbstractHandler {
41
	
42
	
43
	
44
	// @Option(name="aafile",usage="an example file", widget="File", required=true, def="text.aa")
45
	// File aafile;
46
	
47
	// @Option(name="acfile",usage="an example file", widget="File", required=true, def="annotations.ac")
48
	// File acfile
49
	
50
	@Option(name = "glozzDirectory", usage = "A folder containing the Glozz files: aa ac and aam", widget = "Folder", required = true, def = "glozz")
47 51
	File glozzDirectory;
48

  
49
	@Option(name="aamfile",usage="The aam file to use", widget="File", required=true, def="model.aam")
52
	
53
	@Option(name = "aamfile", usage = "The aam file to use", widget = "File", required = true, def = "model.aam")
50 54
	File aamfile;
55
	
51 56
	private String name;
52

  
53
	/* (non-Javadoc)
57
	
58
	/*
59
	 * (non-Javadoc)
54 60
	 * @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent)
55 61
	 */
56 62
	@Override
......
58 64
		try {
59 65
			// Open the parameters input dialog box
60 66
			if (!ParametersDialog.open(this)) return null;
61

  
67
			
62 68
			// END OF PARAMETERS
63 69
			if (!glozzDirectory.exists()) {
64 70
				System.out.println(Messages.ImportGlozzCorpus_0);
65 71
				return null;
66 72
			}
67

  
73
			
68 74
			name = glozzDirectory.getName();
69 75
			name = AsciiUtils.buildId(name);
70

  
76
			String newCorpusName = name.toUpperCase();
77
			if (Toolbox.workspace.getProject(newCorpusName) != null) {
78
				boolean b = MessageDialog.openConfirm(Display.getCurrent().getActiveShell(), TXMUIMessages.warning, NLS.bind(TXMUIMessages.theP0CorpusDirectoryAlreadyExistsDoYouWantToReplaceIt,
79
						newCorpusName));
80
				if (!b) {
81
					Log.info("Import aborted.");
82
					return null;
83
				}
84
			}
85
			
71 86
			final File srcDir = new File(glozzDirectory, name);
72 87
			System.out.println(NLS.bind(Messages.ImportGlozzCorpus_1, srcDir));
73 88
			DeleteDir.deleteDirectory(srcDir);
74 89
			srcDir.mkdir();
75

  
90
			
76 91
			final File ecDir = new File(glozzDirectory, "ec"); //$NON-NLS-1$
77 92
			System.out.println(NLS.bind(Messages.ImportGlozzCorpus_3, ecDir));
78 93
			DeleteDir.deleteDirectory(ecDir);
79 94
			ecDir.mkdir();
80

  
81

  
95
			
96
			
82 97
			if (!srcDir.exists()) {
83 98
				System.out.println(Messages.ImportGlozzCorpus_4);
84 99
				return null;
85 100
			}
86

  
101
			
87 102
			if (!ecDir.exists()) {
88 103
				System.out.println(Messages.ImportGlozzCorpus_5);
89 104
				return null;
90 105
			}
91

  
106
			
92 107
			// write the TXT file WITH paragraphs
93 108
			File[] aaFiles = glozzDirectory.listFiles(new FilenameFilter() {
109
				
94 110
				@Override
95 111
				public boolean accept(File dir, String name) {
96 112
					return name.endsWith(".aa"); //$NON-NLS-1$
97 113
				}
98 114
			});
99 115
			File[] acFiles = glozzDirectory.listFiles(new FilenameFilter() {
116
				
100 117
				@Override
101 118
				public boolean accept(File dir, String name) {
102 119
					return name.endsWith(".ac"); //$NON-NLS-1$
103 120
				}
104 121
			});
105

  
122
			
106 123
			if (aaFiles == null || aaFiles.length == 0) {
107 124
				System.out.println(NLS.bind(Messages.ImportGlozzCorpus_8, glozzDirectory));
108 125
				return false;
......
115 132
				System.out.println(NLS.bind(Messages.ImportGlozzCorpus_10, glozzDirectory));
116 133
				return false;
117 134
			}
118

  
135
			
119 136
			System.out.println(NLS.bind(Messages.ImportGlozzCorpus_11, glozzDirectory));
120 137
			Arrays.sort(acFiles);
121 138
			Arrays.sort(aaFiles);
122
			for (int i = 0 ; i < aaFiles.length ; i++) {
139
			for (int i = 0; i < aaFiles.length; i++) {
123 140
				File acFile = acFiles[i];
124 141
				File aaFile = aaFiles[i];
125 142
				String name = acFile.getName();
126 143
				int idx = name.indexOf("."); //$NON-NLS-1$
127 144
				if (idx > 0) name = name.substring(0, idx);
128

  
129
				File txtFile = new File(srcDir, name+".txt"); //$NON-NLS-1$
130
				File ecFile = new File(ecDir, name+".ec"); //$NON-NLS-1$
145
				
146
				File txtFile = new File(srcDir, name + ".txt"); //$NON-NLS-1$
147
				File ecFile = new File(ecDir, name + ".ec"); //$NON-NLS-1$
131 148
				Corpus tmpAnalecCorpus = new Corpus(); // need a temporary corpus
132

  
149
				
133 150
				if (!FichiersGlozz.importerGlozz(tmpAnalecCorpus, acFile, aaFile)) {
134 151
					System.out.println(Messages.ImportGlozzCorpus_15);
135 152
					return null;
136 153
				}
137

  
154
				
138 155
				final String texte = tmpAnalecCorpus.getTexte();
139 156
				int debParag = 0;
140 157
				Integer[] finPars = tmpAnalecCorpus.getFinParagraphes();
141 158
				StringBuffer newTexte = new StringBuffer(texte.length() + finPars.length);
142
				for (int iParagraph = 0 ; iParagraph < finPars.length ; iParagraph++) {
159
				for (int iParagraph = 0; iParagraph < finPars.length; iParagraph++) {
143 160
					newTexte.append(texte.substring(debParag, finPars[iParagraph])).append("\n"); //$NON-NLS-1$
144 161
					debParag = finPars[iParagraph];
145 162
				}
146

  
163
				
147 164
				IOUtils.write(txtFile, newTexte.toString()); // write the TXT file for TXM TXT import module
148 165
				FichiersJava.enregistrerCorpus(tmpAnalecCorpus, ecFile); // write for later
149

  
166
				
150 167
				if (!txtFile.exists()) {
151 168
					System.out.println(Messages.ImportGlozzCorpus_17);
152 169
					return null;
153 170
				}
154 171
			}
155

  
156
			Project project = Toolbox.workspace.getProject(name.toUpperCase());
172
			
173
			Project project = Toolbox.workspace.getProject(newCorpusName);
157 174
			if (project != null) {
158
//				CQPSearchEngine.getEngine().stop();
175
				// CQPSearchEngine.getEngine().stop();
159 176
				project.delete();
160 177
			}
161
			project = new Project(Toolbox.workspace, name.toUpperCase());
162
			project.setName(name.toUpperCase());
178
			project = new Project(Toolbox.workspace, newCorpusName);
179
			project.setName(newCorpusName);
163 180
			project.setSourceDirectory(srcDir.getAbsolutePath());
164 181
			project.setImportModuleName("txt");
165 182
			project.getEditionDefinition("default").setBuildEdition(true);
......
169 186
				System.out.println(Messages.ImportGlozzCorpus_23);
170 187
				return null;
171 188
			}
172

  
189
			
173 190
			JobHandler job2 = new JobHandler(Messages.ImportGlozzCorpus_24) {
174

  
191
				
175 192
				@Override
176 193
				protected IStatus run(IProgressMonitor monitor) {
177 194
					try {
......
180 197
							System.out.println("The CQP corpus was not created. Aborting.");
181 198
							return Status.CANCEL_STATUS;
182 199
						}
200
						corpus.compute(false);
183 201
						Corpus analecCorpus = URSCorpora.getCorpus(corpus);
184 202
						analecCorpus.clearAll(); // remove all : annotations, structure
185 203
						File[] ecFiles = ecDir.listFiles(IOUtils.HIDDENFILE_FILTER);
......
187 205
						for (File ecFile : ecFiles) {
188 206
							FichiersJava.concatener(ecFile, analecCorpus);
189 207
						}
190

  
208
						
191 209
						System.out.println(NLS.bind(Messages.ImportGlozzCorpus_25, aamfile));
192 210
						if (!FichiersGlozz.importerModeleGlozz(analecCorpus, aamfile)) {
193 211
							System.out.println(Messages.ImportGlozzCorpus_26);
194 212
							return Status.CANCEL_STATUS;
195 213
						}
196
						//					
197
						//					System.out.println("Importing Glozz corpus from: "+acfile+" and "+aafile);
198
						//					if (!FichiersGlozz.importerGlozz(analecCorpus, acfile, aafile)) {
199
						//						System.out.println("Error while importing Glozz corpus.");
200
						//						return null;
201
						//					}
202

  
214
						//
215
						// System.out.println("Importing Glozz corpus from: "+acfile+" and "+aafile);
216
						// if (!FichiersGlozz.importerGlozz(analecCorpus, acfile, aafile)) {
217
						// System.out.println("Error while importing Glozz corpus.");
218
						// return null;
219
						// }
220
						
203 221
						System.out.println(Messages.ImportGlozzCorpus_27);
204 222
						URSCorpora.removeCorpus(corpus); // remove old corpus if any
205

  
223
						
206 224
						URSAnnotationReIndexer aari = new URSAnnotationReIndexer(corpus, analecCorpus);
207 225
						if (!aari.process()) {
208 226
							System.out.println(Messages.ImportGlozzCorpus_28);
209 227
							return Status.CANCEL_STATUS;
210 228
						}
211

  
229
						
212 230
						System.out.println(Messages.ImportGlozzCorpus_29);
213 231
						URSCorpora.saveCorpus(analecCorpus);
214 232
						URSCorpora.getVue(analecCorpus).retablirVueParDefaut();
......
216 234
						analecCorpus.setTexte(""); // free memory //$NON-NLS-1$
217 235
						DeleteDir.deleteDirectory(ecDir); // cleaning
218 236
						DeleteDir.deleteDirectory(srcDir); // cleaning
219

  
237
						
220 238
						return Status.OK_STATUS;
221
					} catch (Exception e) {
239
					}
240
					catch (Exception e) {
222 241
						System.out.println(NLS.bind(Messages.ImportGlozzCorpus_32, e.getLocalizedMessage()));
223 242
						Log.printStackTrace(e);
224 243
						return Status.CANCEL_STATUS;
225 244
					}
226 245
				}
227 246
			};
228
			job2.startJob(true); // wait for the TXT import job to finish
247
			job.join(0, null);
248
			if (job.getResult() == Status.OK_STATUS) {
249
				job2.startJob(true); // wait for the TXT import job to finish
250
			}
251
			else {
252
				Log.warning("Export could not be finished since the corpus import failed.");
253
			}
229 254
			return null;
230
		} catch (Exception e) {
255
		}
256
		catch (Exception e) {
231 257
			// TODO Auto-generated catch block
232 258
			e.printStackTrace();
233 259
		}
tmp/org.txm.analec.rcp/src/org/txm/annotation/urs/commands/ExportGlozzCorpus.java (revision 2905)
21 21
import org.txm.searchengine.cqp.serverException.CqiServerError;
22 22
import org.txm.utils.logger.Log;
23 23

  
24
public class ExportGlozzCorpus  extends AbstractHandler {
25

  
26
	@Option(name="exportDirectory",usage="the result directory ", widget="Folder", required=true, def="result directory")
24
public class ExportGlozzCorpus extends AbstractHandler {
25
	
26
	@Option(name = "exportDirectory", usage = "the result directory ", widget = "Folder", required = true, def = "result directory")
27 27
	File exportDirectory;
28
	@Option(name="unit_type",usage="A unit type to export", widget="String", required=true, def="MENTION")
28
	
29
	@Option(name = "unit_type", usage = "A unit type to export", widget = "String", required = true, def = "MENTION")
29 30
	String unit_type;
30

  
31
	/* (non-Javadoc)
31
	
32
	/*
33
	 * (non-Javadoc)
32 34
	 * @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent)
33 35
	 */
34 36
	@Override
......
36 38
		try {
37 39
			// Open the parameters input dialog box
38 40
			if (!ParametersDialog.open(this)) return null;
39

  
41
			
40 42
			Object first = CorporaView.getFirstSelectedObject();
41 43
			if (!(first instanceof org.txm.searchengine.cqp.corpus.CQPCorpus)) {
42 44
				System.out.println(NLS.bind(Messages.ExportGlozzCorpus_0, first));
43 45
				return null;
44 46
			}
45

  
46
			final MainCorpus mainCorpus = ((org.txm.searchengine.cqp.corpus.CQPCorpus)first).getMainCorpus();
47

  
47
			
48
			final MainCorpus mainCorpus = ((org.txm.searchengine.cqp.corpus.CQPCorpus) first).getMainCorpus();
49
			
48 50
			JobHandler job = new JobHandler(NLS.bind(Messages.ExportGlozzCorpus_1, mainCorpus.getName())) {
49

  
51
				
50 52
				@Override
51 53
				protected IStatus run(IProgressMonitor monitor) {
52 54
					this.runInit(monitor);
53 55
					try {
56
						if (exportDirectory == null) {
57
							Log.warning("No export directory set. aBorting");
58
							return Status.CANCEL_STATUS;
59
						}
60
						exportDirectory.mkdirs();
61
						
54 62
						export(exportDirectory, mainCorpus, unit_type);
55
					} catch (ThreadDeath e) {
63
					}
64
					catch (ThreadDeath e) {
56 65
						System.out.println(Messages.ExportGlozzCorpus_2);
57
					} catch (Throwable e) {
66
					}
67
					catch (Throwable e) {
58 68
						System.out.println(NLS.bind(Messages.ExportGlozzCorpus_3, e.getLocalizedMessage()));
59 69
						Log.printStackTrace(e);
60 70
						return Status.CANCEL_STATUS;
......
63 73
				}
64 74
			};
65 75
			job.schedule();
66

  
76
			
67 77
			return null;
68
		} catch (Throwable e) {
78
		}
79
		catch (Throwable e) {
69 80
			System.out.println(NLS.bind(Messages.ExportGlozzCorpus_3, e.getLocalizedMessage()));
70 81
			Log.printStackTrace(e);
71 82
		}
tmp/org.txm.analec.rcp/src/org/txm/annotation/urs/URSAnnotationReIndexer.java (revision 2905)
26 26
 *
27 27
 */
28 28
public class URSAnnotationReIndexer {
29
	
29 30
	MainCorpus corpus;
31
	
30 32
	Corpus analecCorpus;
33
	
31 34
	File aafile;
35
	
32 36
	public boolean debug = false;
33

  
37
	
34 38
	public URSAnnotationReIndexer(MainCorpus corpus, Corpus analecCorpus) {
35 39
		this.corpus = corpus;
36 40
		this.analecCorpus = analecCorpus;
37 41
	}
38

  
42
	
39 43
	public boolean process() throws CqiClientException, IOException, CqiServerError {
40

  
44
		
41 45
		int corpusSize = corpus.getSize();
42

  
46
		
43 47
		String text = analecCorpus.getTexte();
44 48
		int isearch = 0; // the current search start position
45

  
49
		
46 50
		AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
47 51
		Property word = corpus.getProperty("word");
48

  
52
		
49 53
		int positions[] = new int[corpusSize];
50
		for (int i = 0 ; i < corpusSize ; i++) positions[i] = i;
54
		for (int i = 0; i < corpusSize; i++)
55
			positions[i] = i;
51 56
		
52 57
		int positionsCorrespondances[] = new int[corpusSize];
53 58
		String strs[] = CQI.cpos2Str(word.getQualifiedName(), positions);
54

  
55
		for (int i = 0 ; i < corpusSize ; i++) {
59
		
60
		for (int i = 0; i < corpusSize; i++) {
56 61
			int idx = text.indexOf(strs[i], isearch);
57 62
			if (idx < 0) {
58
				System.out.println("Error: cannot find word='"+strs[i]+"' (word with position in CQP corpus="+positions[i]+") in text with current carret="+isearch+". Aborting.");
63
				System.out.println("Error: cannot find word='" + strs[i] + "' (word with position in CQP corpus=" + positions[i] + ") in text with current carret=" + isearch + ". Aborting.");
59 64
				
60 65
				System.out.println("Current text slice is (-20, +20 characters): ");
61
				System.out.println("* before: "+text.substring(Math.max(0, isearch-20), Math.min(isearch+20, isearch)));
62
				System.out.println("* after: "+text.substring(isearch, Math.min(isearch+20, text.length()-1)));
66
				System.out.println("* before: " + text.substring(Math.max(0, isearch - 20), Math.min(isearch + 20, isearch)));
67
				System.out.println("* after: " + text.substring(isearch, Math.min(isearch + 20, text.length() - 1)));
63 68
				
64 69
				return false;
65 70
			}
......
69 74
		
70 75
		if (debug) System.out.println(Arrays.toString(positions));
71 76
		if (debug) System.out.println(Arrays.toString(positionsCorrespondances));
72

  
77
		
73 78
		isearch = 0;
74 79
		ArrayList<Unite> unites = analecCorpus.getToutesUnites();
75 80
		if (debug) System.out.println("units not-sorted: ");
76 81
		if (debug) printUnits(unites);
77 82
		Collections.sort(unites, new Comparator<Unite>() {
83
			
78 84
			@Override
79 85
			public int compare(Unite o1, Unite o2) {
80
				return o1.getDeb() - o2.getDeb();
86
				int d = o1.getDeb() - o2.getDeb();
87
				if (d == 0) return o1.getFin() - o2.getFin();
88
				else return d;
81 89
			}
82 90
		});
83 91
		if (debug) System.out.println("units sorted: ");
......
93 101
			boolean startFound = false;
94 102
			boolean endFound = false;
95 103
			int i = 0; // or not : unites are sorted by start position, we don't need to browse all words \o/
96
			for (; i < positionsCorrespondances.length ; i++) {
104
			for (; i < positionsCorrespondances.length; i++) {
97 105
				if (startFound && endFound) break; // no need to go further
98
				if (debug) System.out.println("i="+i+" positionsCorrespondances[i]="+positionsCorrespondances[i]);
106
				if (debug) System.out.println("i=" + i + " positionsCorrespondances[i]=" + positionsCorrespondances[i]);
99 107
				if (!startFound && start < positionsCorrespondances[i]) {
100
					unite.setDeb(i-1);
108
					unite.setDeb(i - 1);
101 109
					startFound = true;
102 110
				}
103 111
				if (!endFound && end <= positionsCorrespondances[i]) {
104
					unite.setFin(i-1);
112
					unite.setFin(i - 1);
105 113
					endFound = true;
106 114
				}
107 115
			}
108 116
			
109 117
			if (!endFound && i == positionsCorrespondances.length) {
110
				unite.setFin(i-1);
118
				unite.setFin(i - 1);
111 119
				endFound = true;
112 120
			}
113 121
			if (!startFound && i == positionsCorrespondances.length) {
114
				unite.setDeb(i-1);
122
				unite.setDeb(i - 1);
115 123
				startFound = true;
116 124
			}
117 125
			
118 126
			if (!(startFound && endFound)) {
119
				String s = "Error: cannot find words positions for unite of type="+unite.getType()+" and unit carret positions=["+start+", "+end+"]. Aborting";
127
				String s = "Error: cannot find words positions for unite of type=" + unite.getType() + " and unit carret positions=[" + start + ", " + end + "]. Aborting";
120 128
				System.out.println(s);
121 129
				
122 130
				System.out.println("5 last found units are: ");
123
				for (int j = 4 ; j >= 0 ; j--) {
124
					if (iunite-j >= 0) {
125
						printUnite(unites.get(iunite-j));
131
				for (int j = 4; j >= 0; j--) {
132
					if (iunite - j >= 0) {
133
						printUnite(unites.get(iunite - j));
126 134
					}
127 135
				}
128 136
				return false;
129 137
			}
130
			//if (i > 0) i--; // restart at previous word
138
			// if (i > 0) i--; // restart at previous word
131 139
			iunite++;
132 140
		}
133 141
		if (debug) System.out.println("units updated: ");
......
136 144
	}
137 145
	
138 146
	public static void printUnite(Unite unite) {
139
		System.out.print(unite.getType()+ "["+unite.getDeb()+", "+unite.getFin()+"]");
147
		System.out.print(unite.getType() + "[" + unite.getDeb() + ", " + unite.getFin() + "]");
140 148
	}
141 149
	
142 150
	public static void printUnits(List<Unite> units) {

Formats disponibles : Unified diff