Révision 2923
tmp/org.txm.searchengine.cqp.rcp/src/org/txm/searchengine/cqp/rcp/dialogs/ExportCorpusDialog.java (revision 2923) | ||
---|---|---|
42 | 42 |
* |
43 | 43 |
*/ |
44 | 44 |
public class ExportCorpusDialog extends Dialog { |
45 |
|
|
45 |
|
|
46 | 46 |
/** |
47 | 47 |
* list of the default files to include in the corpus binary |
48 | 48 |
*/ |
49 | 49 |
private List<String> defaultFiles = Arrays.asList("HTML", "txm", "data", "registry", "annotations", "analec", "temporary_annotations", "xsl", "css", ".settings", ".project"); |
50 |
|
|
50 |
|
|
51 | 51 |
private File zipFile; |
52 |
|
|
52 |
|
|
53 | 53 |
private LinkedHashMap<File, Boolean> selectedFiles; |
54 |
|
|
54 |
|
|
55 | 55 |
private CQPCorpus corpus; |
56 |
|
|
56 |
|
|
57 | 57 |
private TableViewer treeViewer; |
58 |
|
|
58 |
|
|
59 | 59 |
private TableViewerColumn nameColumn; |
60 |
|
|
60 |
|
|
61 | 61 |
private Text textField; |
62 |
|
|
62 |
|
|
63 | 63 |
private Button selectButton; |
64 |
|
|
64 |
|
|
65 | 65 |
public final static String ID = ExportCorpusDialog.class.getName(); |
66 |
|
|
66 |
|
|
67 | 67 |
/** |
68 | 68 |
* |
69 | 69 |
* @param parentShell |
... | ... | |
75 | 75 |
super(parentShell); |
76 | 76 |
this.corpus = corpus; |
77 | 77 |
} |
78 |
|
|
78 |
|
|
79 | 79 |
@Override |
80 | 80 |
protected void configureShell(Shell newShell) { |
81 | 81 |
super.configureShell(newShell); |
82 | 82 |
newShell.setText(NLS.bind("Exporting {0}...", corpus.getName())); |
83 | 83 |
newShell.setMinimumSize(400, 300); |
84 | 84 |
} |
85 |
|
|
85 |
|
|
86 | 86 |
@Override |
87 | 87 |
protected Control createDialogArea(Composite parent) { |
88 | 88 |
Composite comp = (Composite) super.createDialogArea(parent); |
89 |
|
|
89 |
|
|
90 | 90 |
GridLayout layout = (GridLayout) comp.getLayout(); |
91 | 91 |
layout.numColumns = 3; |
92 |
|
|
92 |
|
|
93 | 93 |
Label l = new Label(comp, SWT.LEFT); |
94 | 94 |
l.setText("Output file"); |
95 | 95 |
l.setLayoutData(new GridData(GridData.BEGINNING, GridData.CENTER, false, false)); |
96 |
|
|
96 |
|
|
97 | 97 |
textField = new Text(comp, SWT.SINGLE | SWT.BORDER); |
98 | 98 |
textField.setLayoutData(new GridData(GridData.FILL, GridData.CENTER, true, false)); |
99 | 99 |
String previousPath = TXMPreferences.getString("path", ExportCorpusDialog.class.getName()); |
100 | 100 |
if (previousPath == null || previousPath.length() == 0) { |
101 |
previousPath = System.getProperty("user.home"); ////$NON-NLS-0$
|
|
101 |
previousPath = System.getProperty("user.home"); // |
|
102 | 102 |
} |
103 | 103 |
textField.setText(previousPath + "/" + corpus.getName() + "-" + Toolbox.dateformat.format(Calendar.getInstance().getTime()) + ".txm"); //$NON-NLS-1$ |
104 |
|
|
104 |
|
|
105 | 105 |
selectButton = new Button(comp, SWT.PUSH); |
106 | 106 |
selectButton.setText("..."); |
107 | 107 |
selectButton.addSelectionListener(new SelectionListener() { |
108 |
|
|
108 |
|
|
109 | 109 |
@Override |
110 | 110 |
public void widgetSelected(SelectionEvent e) { |
111 | 111 |
FileDialog dialog = new FileDialog(selectButton.getShell(), SWT.SAVE); |
112 |
|
|
112 |
|
|
113 | 113 |
String[] exts = { "*.txm", "*.zip" }; //$NON-NLS-1$ //$NON-NLS-2$ |
114 | 114 |
dialog.setFilterExtensions(exts); |
115 | 115 |
String path = textField.getText(); |
... | ... | |
117 | 117 |
File p = new File(path); |
118 | 118 |
if (p.isDirectory()) { |
119 | 119 |
dialog.setFilterPath(textField.getText()); |
120 |
} else { |
|
120 |
} |
|
121 |
else { |
|
121 | 122 |
dialog.setFilterPath(p.getParent()); |
122 | 123 |
dialog.setFileName(p.getName()); |
123 | 124 |
} |
124 | 125 |
} |
125 | 126 |
// |
126 |
|
|
127 |
|
|
127 | 128 |
path = dialog.open(); |
128 | 129 |
if (path != null) { |
129 | 130 |
textField.setText(path); |
... | ... | |
132 | 133 |
TXMPreferences.put(ExportCorpusDialog.class.getName(), "path", dir.getAbsolutePath()); |
133 | 134 |
} |
134 | 135 |
} |
135 |
|
|
136 |
|
|
136 | 137 |
@Override |
137 | 138 |
public void widgetDefaultSelected(SelectionEvent e) {} |
138 | 139 |
}); |
139 |
|
|
140 |
|
|
140 | 141 |
selectedFiles = new LinkedHashMap<>(); |
141 | 142 |
Project project = corpus.getProject(); |
142 | 143 |
File directory = project.getProjectDirectory(); |
143 | 144 |
if (directory.exists()) { |
144 |
|
|
145 |
|
|
145 | 146 |
File[] files = directory.listFiles(new FileFilter() { |
146 |
|
|
147 |
|
|
147 | 148 |
@Override |
148 | 149 |
public boolean accept(File f) { |
149 | 150 |
return !f.getName().startsWith(".") && !f.isHidden() && f.isDirectory() && !defaultFiles.contains(f.getName()); |
150 | 151 |
} |
151 | 152 |
}); |
152 | 153 |
Arrays.sort(files, new Comparator<File>() { |
153 |
|
|
154 |
|
|
154 | 155 |
@Override |
155 | 156 |
public int compare(File arg0, File arg1) { |
156 | 157 |
if (arg0.isDirectory() && arg1.isDirectory()) { |
... | ... | |
171 | 172 |
}); |
172 | 173 |
for (File f : files) { |
173 | 174 |
if (!f.isHidden() && f.isDirectory()) { |
174 |
|
|
175 |
|
|
175 | 176 |
if (defaultFiles.contains(f.getName())) { |
176 | 177 |
selectedFiles.put(f, true); |
177 | 178 |
} |
... | ... | |
180 | 181 |
} |
181 | 182 |
} |
182 | 183 |
} |
183 |
|
|
184 |
|
|
184 | 185 |
if (files.length > 0) { |
185 | 186 |
treeViewer = new TableViewer(comp, SWT.CHECK | SWT.BORDER | SWT.V_SCROLL | SWT.H_SCROLL); |
186 |
|
|
187 |
|
|
187 | 188 |
treeViewer.setContentProvider(new ArrayContentProvider()); |
188 |
|
|
189 |
|
|
189 | 190 |
treeViewer.getTable().addListener(SWT.Selection, event -> { |
190 |
|
|
191 |
|
|
191 | 192 |
if (event.detail == SWT.CHECK) { |
192 | 193 |
TableItem item = (TableItem) event.item; |
193 | 194 |
selectedFiles.put((File) event.item.getData(), item.getChecked()); |
194 | 195 |
} |
195 | 196 |
}); |
196 |
|
|
197 |
|
|
197 | 198 |
treeViewer.getTable().setLayoutData(new GridData(GridData.FILL, GridData.FILL, true, true, 3, 1)); |
198 | 199 |
treeViewer.getTable().setHeaderVisible(true); |
199 | 200 |
treeViewer.getTable().setLinesVisible(true); |
200 |
|
|
201 |
|
|
201 | 202 |
nameColumn = new TableViewerColumn(treeViewer, SWT.NONE); |
202 |
nameColumn.getColumn().setText("Extra files");
|
|
203 |
nameColumn.getColumn().setText("Add extra files");
|
|
203 | 204 |
nameColumn.getColumn().pack(); |
204 | 205 |
nameColumn.setLabelProvider(new CellLabelProvider() { |
205 |
|
|
206 |
|
|
206 | 207 |
@Override |
207 | 208 |
public void update(ViewerCell cell) { |
208 | 209 |
Object element = cell.getElement(); |
209 |
|
|
210 |
|
|
210 | 211 |
if (element instanceof File) { |
211 | 212 |
File f = (File) element; |
212 | 213 |
cell.setText(f.getName()); |
... | ... | |
216 | 217 |
} |
217 | 218 |
} |
218 | 219 |
}); |
219 |
|
|
220 |
|
|
220 | 221 |
treeViewer.setInput(files); |
221 | 222 |
for (int i = 0; i < files.length; i++) { |
222 | 223 |
treeViewer.getTable().getItem(i).setChecked(selectedFiles.get(files[i])); |
223 | 224 |
} |
224 |
|
|
225 |
|
|
225 | 226 |
Button selectAllButton = new Button(comp, SWT.PUSH); |
226 | 227 |
selectAllButton.setText("Select All"); |
227 | 228 |
selectAllButton.addSelectionListener(new SelectionListener() { |
228 |
|
|
229 |
|
|
229 | 230 |
@Override |
230 | 231 |
public void widgetSelected(SelectionEvent e) { |
231 | 232 |
massSelect(true); |
232 | 233 |
} |
233 |
|
|
234 |
|
|
234 | 235 |
@Override |
235 | 236 |
public void widgetDefaultSelected(SelectionEvent e) {} |
236 | 237 |
}); |
237 |
|
|
238 |
|
|
238 | 239 |
Button clearAllButton = new Button(comp, SWT.PUSH); |
239 | 240 |
clearAllButton.setText("Clear selection"); |
240 | 241 |
clearAllButton.addSelectionListener(new SelectionListener() { |
241 |
|
|
242 |
|
|
242 | 243 |
@Override |
243 | 244 |
public void widgetSelected(SelectionEvent e) { |
244 | 245 |
massSelect(false); |
245 | 246 |
} |
246 |
|
|
247 |
|
|
247 | 248 |
@Override |
248 | 249 |
public void widgetDefaultSelected(SelectionEvent e) {} |
249 | 250 |
}); |
250 | 251 |
} |
251 | 252 |
} |
252 |
|
|
253 |
|
|
253 | 254 |
return comp; |
254 | 255 |
} |
255 |
|
|
256 |
|
|
256 | 257 |
protected void massSelect(boolean b) { |
257 | 258 |
for (int i = 0; i < treeViewer.getTable().getItems().length; i++) { |
258 | 259 |
treeViewer.getTable().getItem(i).setChecked(b); |
... | ... | |
261 | 262 |
selectedFiles.put(f, b); |
262 | 263 |
} |
263 | 264 |
} |
264 |
|
|
265 |
|
|
265 | 266 |
@Override |
266 | 267 |
protected boolean isResizable() { |
267 | 268 |
return true; |
268 | 269 |
} |
269 |
|
|
270 |
|
|
270 | 271 |
@Override |
271 | 272 |
protected void buttonPressed(int buttonId) { |
272 | 273 |
if (buttonId == Dialog.OK) { |
... | ... | |
274 | 275 |
} |
275 | 276 |
super.buttonPressed(buttonId); |
276 | 277 |
} |
277 |
|
|
278 |
|
|
278 | 279 |
public File getZipFile() { |
279 | 280 |
return zipFile; |
280 | 281 |
} |
281 |
|
|
282 |
|
|
282 | 283 |
public LinkedHashMap<File, Boolean> getFileSelection() { |
283 | 284 |
return selectedFiles; |
284 | 285 |
} |
285 |
|
|
286 |
|
|
286 | 287 |
public HashSet<String> getIgnoreNames() { |
287 | 288 |
HashSet<String> ignore = new HashSet<>(); |
288 | 289 |
for (File f : selectedFiles.keySet()) { |
tmp/org.txm.ruby.core/plugin.xml (revision 2923) | ||
---|---|---|
7 | 7 |
class="org.txm.ruby.core.RubyScriptEngine" |
8 | 8 |
name="Ruby"> |
9 | 9 |
</ScriptEngine> |
10 |
<ScriptEngine |
|
11 |
class="org.txm.python.core.RubyScriptEngine" |
|
12 |
name="Ruby"> |
|
13 |
</ScriptEngine> |
|
14 | 10 |
</extension> |
15 | 11 |
|
16 | 12 |
</plugin> |
tmp/org.txm.treetagger.core/src/org/txm/importer/xmltxm/Annotate.groovy (revision 2923) | ||
---|---|---|
52 | 52 |
*/ |
53 | 53 |
class Annotate { |
54 | 54 |
boolean cancelNow = false; |
55 |
|
|
55 |
|
|
56 | 56 |
/** The report file. */ |
57 | 57 |
File reportFile;//contains the txm:application tag content |
58 |
|
|
58 |
|
|
59 | 59 |
/** The resp person. */ |
60 | 60 |
String respPerson; |
61 |
|
|
61 |
|
|
62 | 62 |
/** The resp id. */ |
63 | 63 |
String respId; |
64 |
|
|
64 |
|
|
65 | 65 |
/** The resp desc. */ |
66 | 66 |
String respDesc; |
67 |
|
|
67 |
|
|
68 | 68 |
/** The resp date. */ |
69 | 69 |
String respDate; |
70 |
|
|
70 |
|
|
71 | 71 |
/** The resp when. */ |
72 | 72 |
String respWhen; |
73 |
|
|
73 |
|
|
74 | 74 |
/** The app ident. */ |
75 | 75 |
String appIdent; |
76 |
|
|
76 |
|
|
77 | 77 |
/** The app version. */ |
78 | 78 |
String appVersion; |
79 |
|
|
79 |
|
|
80 | 80 |
/** The distributor. */ |
81 | 81 |
String distributor; |
82 |
|
|
82 |
|
|
83 | 83 |
/** The publi stmt. */ |
84 | 84 |
String publiStmt; |
85 |
|
|
85 |
|
|
86 | 86 |
/** The source stmt. */ |
87 | 87 |
String sourceStmt; |
88 |
|
|
88 |
|
|
89 | 89 |
/** The types. */ |
90 | 90 |
def types; |
91 |
|
|
91 |
|
|
92 | 92 |
/** The types title. */ |
93 | 93 |
def typesTITLE; |
94 |
|
|
94 |
|
|
95 | 95 |
/** The types desc. */ |
96 | 96 |
def typesDesc; |
97 |
|
|
97 |
|
|
98 | 98 |
/** The types tagset. */ |
99 | 99 |
def typesTAGSET; |
100 |
|
|
100 |
|
|
101 | 101 |
/** The types web. */ |
102 | 102 |
def typesWEB; |
103 |
|
|
103 |
|
|
104 | 104 |
/** The idform. */ |
105 | 105 |
String idform; |
106 |
|
|
106 |
|
|
107 | 107 |
/** The debug. */ |
108 | 108 |
boolean debug = false; |
109 |
|
|
109 |
|
|
110 | 110 |
File modelsDirectory; |
111 |
|
|
111 |
|
|
112 | 112 |
public Annotate() { |
113 | 113 |
modelsDirectory = new File(TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.MODELS_PATH)); // default models directory is set in the Toolbox |
114 | 114 |
} |
115 |
|
|
115 |
|
|
116 | 116 |
/** |
117 | 117 |
* Sets the debug. |
118 | 118 |
*/ |
119 | 119 |
public void setDebug() { |
120 | 120 |
debug = true; |
121 | 121 |
} |
122 |
|
|
122 |
|
|
123 | 123 |
String id; |
124 | 124 |
/** |
125 | 125 |
* Inits the tt outfile infos. |
... | ... | |
131 | 131 |
{ |
132 | 132 |
initTTOutfileInfos(rootDirFile, modelfile, modelfilename, null); |
133 | 133 |
} |
134 |
|
|
134 |
|
|
135 | 135 |
/** |
136 | 136 |
* Inits the tt outfile infos. |
137 | 137 |
* |
... | ... | |
145 | 145 |
String[] split = id.split("\\."); |
146 | 146 |
if (split.length > 0) id = split[0]; |
147 | 147 |
if (id.equals("??")) id = "xx" |
148 |
|
|
148 |
|
|
149 | 149 |
reportFile = new File(rootDirFile,"NLPToolsParameters.xml"); |
150 |
|
|
150 |
|
|
151 | 151 |
respPerson = System.getProperty("user.name"); |
152 | 152 |
respId = "txm"; |
153 | 153 |
respDesc = "NLP annotation tool"; |
154 | 154 |
respDate = DateFormat.getDateInstance(DateFormat.SHORT, Locale.UK).format(new Date()); |
155 | 155 |
respWhen = DateFormat.getDateInstance(DateFormat.FULL, Locale.UK).format(new Date()); |
156 |
|
|
156 |
|
|
157 | 157 |
appIdent = "TreeTagger"; |
158 | 158 |
appVersion = "3.2"; |
159 |
|
|
159 |
|
|
160 | 160 |
distributor = ""; |
161 | 161 |
publiStmt = """"""; |
162 | 162 |
sourceStmt = """"""; |
163 |
|
|
163 |
|
|
164 | 164 |
if (properties != null && properties.length == 2) { |
165 |
types = [properties[0],properties[1]]; |
|
166 |
typesTITLE = [properties[0],properties[1]]; |
|
165 |
types = [properties[0], properties[1]];
|
|
166 |
typesTITLE = [properties[0], properties[1]];
|
|
167 | 167 |
} else { |
168 |
types = [id+"pos",id+"lemma"]; |
|
169 |
typesTITLE = [id+"pos",id+"lemma"]; |
|
168 |
types = [id+"pos", id+"lemma"];
|
|
169 |
typesTITLE = [id+"pos", id+"lemma"];
|
|
170 | 170 |
} |
171 |
|
|
171 |
|
|
172 | 172 |
//TODO: the tagset, website and description should be referenced in the model catalog |
173 | 173 |
if(modelfile.getName() == "rgaqcj.par") { |
174 |
typesDesc = ["CATTEX pos tagset built with BFM texts","fr lemma of the model "+modelfile+" - "] |
|
175 |
typesTAGSET = ["http://bfm.ens-lyon.fr/IMG/pdf/Cattex2009_Manuel.pdf",""] |
|
176 |
typesWEB = ["http://bfm.ens-lyon.fr/",""] |
|
174 |
typesDesc = [ |
|
175 |
"CATTEX pos tagset built with BFM texts", |
|
176 |
"fr lemma of the model "+modelfile+" - " |
|
177 |
] |
|
178 |
typesTAGSET = [ |
|
179 |
"http://bfm.ens-lyon.fr/IMG/pdf/Cattex2009_Manuel.pdf", |
|
180 |
"" |
|
181 |
] |
|
182 |
typesWEB = ["http://bfm.ens-lyon.fr/", ""] |
|
177 | 183 |
} else { |
178 |
typesDesc = ["pos tagset built from model "+modelfile,id+" lemma of the model "+modelfile+" - "] |
|
179 |
typesTAGSET = ["",""] |
|
180 |
typesWEB = ["",""] |
|
184 |
typesDesc = [ |
|
185 |
"pos tagset built from model "+modelfile, |
|
186 |
id+" lemma of the model "+modelfile+" - " |
|
187 |
] |
|
188 |
typesTAGSET = ["", ""] |
|
189 |
typesWEB = ["", ""] |
|
181 | 190 |
} |
182 |
|
|
191 |
|
|
183 | 192 |
idform ="w"; |
184 | 193 |
} |
185 |
|
|
194 |
|
|
186 | 195 |
/** |
187 | 196 |
* Apply tt. |
188 | 197 |
* |
... | ... | |
192 | 201 |
* @return true, if successful |
193 | 202 |
*/ |
194 | 203 |
public boolean applyTT(File ttsrcfile, File ttoutfile, File modelfile) { |
195 |
applyTT(ttsrcfile, ttoutfile, modelfile, null) |
|
204 |
return applyTT(ttsrcfile, ttoutfile, modelfile, null)
|
|
196 | 205 |
} |
197 |
|
|
206 |
|
|
198 | 207 |
/** |
199 | 208 |
* Apply tt. |
200 | 209 |
* |
... | ... | |
209 | 218 |
try { |
210 | 219 |
File infile = ttsrcfile; |
211 | 220 |
File outfile = ttoutfile; |
212 |
|
|
221 |
|
|
213 | 222 |
def tt = new TreeTagger(TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.INSTALL_PATH)+"/bin/", options); |
214 | 223 |
tt.settoken(); |
215 | 224 |
tt.setlemma(); |
... | ... | |
250 | 259 |
} |
251 | 260 |
return true; |
252 | 261 |
} |
253 |
|
|
262 |
|
|
254 | 263 |
/** |
255 | 264 |
* Write standoff file. |
256 | 265 |
* |
... | ... | |
270 | 279 |
transfo.setInfos(distributor, publiStmt, sourceStmt); |
271 | 280 |
return transfo.process( ttoutfile, posfile, encoding ); |
272 | 281 |
} |
273 |
|
|
282 |
|
|
274 | 283 |
/** |
275 | 284 |
* Run step by step : build TT src files, run TT, build xml-standoff files, inject standoff annotations |
276 | 285 |
* |
... | ... | |
287 | 296 |
println "TREETAGGER INSTALL PATH : "+TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.INSTALL_PATH); |
288 | 297 |
println "TREETAGGER MODELS PATH : "+TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.MODELS_PATH) |
289 | 298 |
} |
290 |
|
|
299 |
|
|
291 | 300 |
//test if the Toolbox know TreeTagger |
292 | 301 |
if (!new File(TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.INSTALL_PATH)+"/bin/").exists()) { |
293 | 302 |
println("Could not find TreeTagger binaries in "+TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.INSTALL_PATH)+"/bin/") |
... | ... | |
299 | 308 |
langAll = new LangDetector(binDir).getLang(); |
300 | 309 |
println "General lang $langAll" |
301 | 310 |
} |
302 |
|
|
311 |
|
|
303 | 312 |
//cleaning |
304 | 313 |
new File(binDir, "annotations").deleteDir(); |
305 | 314 |
new File(binDir, "annotations").mkdir(); |
306 | 315 |
new File(binDir, "treetagger").deleteDir(); |
307 | 316 |
new File(binDir, "treetagger").mkdir(); |
308 |
|
|
317 |
|
|
309 | 318 |
ArrayList<String> milestones = []; |
310 |
|
|
319 |
|
|
311 | 320 |
//BUILD TT FILE READY TO BE TAGGED |
312 | 321 |
List<File> files = txmDir.listFiles(IOUtils.HIDDENFILE_FILTER) |
313 |
|
|
322 |
|
|
314 | 323 |
println("Building TT source files ("+files.size()+") from directory "+txmDir) |
315 | 324 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.size()) |
316 | 325 |
for (File f : files) { |
... | ... | |
323 | 332 |
if (!ttsrcbuilder.process(resultfile, null)) |
324 | 333 |
System.out.println("Failed to build tt src file of "+srcfile); |
325 | 334 |
} |
326 |
|
|
335 |
|
|
327 | 336 |
if (cancelNow) return; |
328 |
|
|
337 |
|
|
329 | 338 |
File modelDirectory = new File(TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.MODELS_PATH)); |
330 | 339 |
if (!modelDirectory.exists()) { |
331 | 340 |
println "Skipping ANNOTATE: TreeTagger language model file directory not found: "+modelDirectory.getAbsolutePath(); |
... | ... | |
336 | 345 |
} |
337 | 346 |
println("") |
338 | 347 |
//Convert encoding if needed |
339 |
|
|
348 |
|
|
340 | 349 |
//APPLY TREETAGGER |
341 | 350 |
files = new File(binDir, "treetagger").listFiles(IOUtils.HIDDENFILE_FILTER) |
342 | 351 |
println("Applying $modelfilename TreeTagger model on dir: "+new File(binDir, "treetagger")+ " ("+files.size()+" files)") |
... | ... | |
357 | 366 |
modelfile = new File(modelsDirectory, tmpModelFileName); |
358 | 367 |
if (debug) |
359 | 368 |
println "model file : "+modelfile; |
360 |
|
|
369 |
|
|
361 | 370 |
File |
362 |
|
|
371 |
|
|
363 | 372 |
if (!modelfile.exists()) { |
364 | 373 |
println "Skipping ANNOTATE: '$modelfile' TreeTagger language model file not found." |
365 | 374 |
if(System.getProperty("os.name").startsWith("Windows") || System.getProperty("os.name").startsWith("Mac")) |
... | ... | |
369 | 378 |
println "Skipping ANNOTATE: impossible to access the '$modelfile' TreeTagger language model file." |
370 | 379 |
return false; |
371 | 380 |
} |
372 |
|
|
381 |
|
|
373 | 382 |
// if (modelfile.getName().equals("sp.par")) {//UTF >> Latin1 |
374 | 383 |
// if(debug) |
375 | 384 |
// println "fix encoding for model "+modelfile |
376 | 385 |
// new EncodingConverter(f, "UTF-8", "ISO-8859-1") |
377 | 386 |
// } |
378 |
|
|
387 |
|
|
379 | 388 |
cpb.tick() |
380 | 389 |
File infile = f; |
381 | 390 |
File outfile = new File(f.getParent(),f.getName()+"-out.tt"); |
382 | 391 |
if (!applyTT(infile, outfile, modelfile)) { |
383 | 392 |
return false; |
384 | 393 |
} |
385 |
|
|
394 |
|
|
386 | 395 |
// //Reconvert encoding if needed |
387 | 396 |
// if (modelfile.getName().equals("sp.par")) { |
388 | 397 |
// if(debug) |
... | ... | |
391 | 400 |
// } |
392 | 401 |
} |
393 | 402 |
println("") |
394 |
|
|
403 |
|
|
395 | 404 |
if (cancelNow) return; |
396 |
|
|
405 |
|
|
397 | 406 |
//BUILD STAND-OFF FILES |
398 | 407 |
if (modelfile == null) { |
399 | 408 |
println "no model applied" |
400 | 409 |
return false; |
401 | 410 |
} |
402 |
|
|
411 |
|
|
403 | 412 |
initTTOutfileInfos(binDir, modelfile, modelfilename); |
404 | 413 |
files = new File(binDir, "treetagger").listFiles(IOUtils.HIDDENFILE_FILTER)// now contains the result files of TT |
405 | 414 |
println "Building stdoff files ("+files.size()+") from dir:"+new File(binDir, "treetagger")+" to "+new File(binDir, "annotations"); |
... | ... | |
413 | 422 |
println("Failed to build standoff file of "+ttrezfile); |
414 | 423 |
} |
415 | 424 |
println("") |
416 |
|
|
425 |
|
|
417 | 426 |
if (cancelNow) return; |
418 |
|
|
427 |
|
|
419 | 428 |
//INJECT ANNOTATIONS |
420 | 429 |
List<File> interpfiles = new File(binDir, "annotations").listFiles(IOUtils.HIDDENFILE_FILTER); |
421 | 430 |
List<File> txmfiles = txmDir.listFiles(IOUtils.HIDDENFILE_FILTER); |
... | ... | |
439 | 448 |
return false; |
440 | 449 |
} |
441 | 450 |
builder = null; |
442 |
|
|
451 |
|
|
443 | 452 |
//println "renaming files..." |
444 | 453 |
if (!(srcfile.delete() && temp.renameTo(srcfile))) |
445 | 454 |
println "Warning can't rename file "+temp+" to "+srcfile |
... | ... | |
448 | 457 |
println("") |
449 | 458 |
return true; |
450 | 459 |
} |
451 |
|
|
460 |
|
|
452 | 461 |
public void setModelsDirectory(File modelsDirectory) { |
453 | 462 |
this.modelsDirectory = modelsDirectory; |
454 | 463 |
} |
455 |
|
|
464 |
|
|
456 | 465 |
/** |
457 | 466 |
* Run file by file. Allow to have one different lang per file. Default behavior add new word properties |
458 | 467 |
* |
... | ... | |
465 | 474 |
{ |
466 | 475 |
return run(binDir, txmDir, langs, false, new String[0], new String[0]); |
467 | 476 |
} |
468 |
|
|
477 |
|
|
469 | 478 |
/** |
470 | 479 |
* Run file by file. Allow to have one different lang per file |
471 | 480 |
* |
... | ... | |
481 | 490 |
println("Path to TreeTagger is wrong "+TreeTaggerPreferences.getInstance().getString(TreeTaggerPreferences.INSTALL_PATH)+"/bin/") |
482 | 491 |
return true; |
483 | 492 |
} |
484 |
|
|
493 |
|
|
485 | 494 |
List<File> listfiles = txmDir.listFiles(IOUtils.HIDDENFILE_FILTER); |
486 |
|
|
495 |
|
|
487 | 496 |
//cleaning |
488 | 497 |
File annotDir = new File(binDir,"annotations"); |
489 | 498 |
annotDir.deleteDir(); |
... | ... | |
494 | 503 |
File treetaggerDir = new File(binDir,"treetagger"); |
495 | 504 |
treetaggerDir.deleteDir(); |
496 | 505 |
treetaggerDir.mkdir(); |
497 |
|
|
506 |
|
|
498 | 507 |
int cores = Runtime.getRuntime().availableProcessors() |
499 | 508 |
int coresToUse = Math.max(1.0, cores * 0.7) |
500 | 509 |
ExecutorService pool = Executors.newFixedThreadPool(coresToUse) |
501 |
|
|
510 |
|
|
502 | 511 |
def files = txmDir.listFiles(IOUtils.HIDDENFILE_FILTER) |
503 | 512 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.size()) |
504 | 513 |
for (File teiFile : files) { |
505 | 514 |
int counter = 1; |
506 | 515 |
ThreadFile t = new ThreadFile("TT_"+counter++, teiFile) { |
507 |
|
|
516 |
|
|
508 | 517 |
public void run() { |
509 |
|
|
518 |
|
|
510 | 519 |
if (cancelNow) return; |
511 | 520 |
if (langs.get(f.getName()) == null) { |
512 | 521 |
println "Error: no lang defined for file $f" |
513 | 522 |
return; |
514 | 523 |
} |
515 |
|
|
524 |
|
|
516 | 525 |
String lang = langs.get(f.getName()); |
517 | 526 |
run(f, lang, binDir, txmDir, replace, properties, options, annotDir, ptreetaggerDir, treetaggerDir) |
518 |
|
|
527 |
|
|
519 | 528 |
cpb.tick(); |
520 | 529 |
} |
521 | 530 |
}; |
522 |
|
|
531 |
|
|
523 | 532 |
pool.execute(t) |
524 | 533 |
} |
525 |
|
|
534 |
|
|
526 | 535 |
pool.shutdown() |
527 | 536 |
pool.awaitTermination(10, TimeUnit.HOURS) |
528 | 537 |
println "" |
529 | 538 |
return true; |
530 | 539 |
} |
531 | 540 |
public boolean run(File f, String lang, File binDir, File txmDir) { |
532 |
|
|
541 |
|
|
533 | 542 |
File annotDir = new File(binDir,"annotations"); |
534 | 543 |
annotDir.mkdir(); |
535 | 544 |
File ptreetaggerDir = new File(binDir,"ptreetagger"); |
536 | 545 |
ptreetaggerDir.mkdir(); |
537 | 546 |
File treetaggerDir = new File(binDir,"treetagger"); |
538 | 547 |
treetaggerDir.mkdir(); |
539 |
|
|
548 |
|
|
540 | 549 |
return run(f, lang, binDir, txmDir, false, new String[0], new String[0], annotDir, ptreetaggerDir, treetaggerDir) |
541 | 550 |
} |
542 |
|
|
543 |
|
|
551 |
|
|
552 |
|
|
544 | 553 |
public boolean run(File f, String lang, File binDir, File txmDir, boolean replace, String[] properties, String[] options, File annotDir, File ptreetaggerDir, File treetaggerDir) { |
545 |
|
|
554 |
|
|
546 | 555 |
File modelfile = new File(modelsDirectory, lang+".par"); |
547 | 556 |
if (!"??".equals(lang) && !modelfile.exists()) { |
548 | 557 |
println "Error: No Modelfile available for lang "+modelfile+". Continue import process "; |
... | ... | |
552 | 561 |
File ttsrcfile = new File(ptreetaggerDir, f.getName()+"-src.tt"); |
553 | 562 |
File ttrezfile = new File(treetaggerDir, f.getName()+"-out.tt"); |
554 | 563 |
//println ("TT with $model "+f+"+"+annotfile+" > "+ttsrcfile+" > "+ttrezfile); |
555 |
|
|
564 |
|
|
556 | 565 |
//BUILD TT FILE READY TO BE TAGGED |
557 | 566 |
def builder = new BuildTTSrc(f.toURL()); |
558 | 567 |
builder.process(ttsrcfile, null); |
559 |
|
|
568 |
|
|
560 | 569 |
String tmpModelFileName = modelfile.getName() |
561 | 570 |
if (tmpModelFileName.startsWith("??")) { |
562 | 571 |
if (f.length() > LangDetector.MINIMALSIZE) { |
... | ... | |
569 | 578 |
|
570 | 579 |
//Apply TT |
571 | 580 |
applyTT(ttsrcfile, ttrezfile, modelfile, options); |
572 |
|
|
581 |
|
|
573 | 582 |
//CREATE STANDOFF FILES |
574 | 583 |
initTTOutfileInfos(binDir, modelfile, lang, properties); |
575 | 584 |
writeStandoffFile(ttrezfile, annotfile) |
576 |
|
|
585 |
|
|
577 | 586 |
//INJECT ANNOTATIONS |
578 | 587 |
File tmpFile = new File(txmDir, "temp_"+f.getName()) |
579 | 588 |
builder = new AnnotationInjection(f.toURL(), annotfile.toURL(), replace); |
580 | 589 |
builder.process(tmpFile); |
581 | 590 |
if (!(f.delete() && tmpFile.renameTo(f))) println "Warning can't rename file "+tmpFile+" to "+f |
582 |
|
|
591 |
|
|
583 | 592 |
return f.exists(); |
584 | 593 |
} |
585 |
|
|
594 |
|
|
586 | 595 |
public void setCancelNow() { |
587 | 596 |
cancelNow = true; |
588 | 597 |
} |
589 |
|
|
598 |
|
|
590 | 599 |
public class ThreadFile extends Thread { |
591 | 600 |
File f; |
592 | 601 |
public ThreadFile(String name, File f) { |
Formats disponibles : Unified diff