Révision 1115
tmp/org.txm.rcp/src/main/java/org/txm/rcp/handlers/scripts/ExecuteImportScript.java (revision 1115) | ||
---|---|---|
61 | 61 |
import org.txm.rcp.messages.TXMUIMessages; |
62 | 62 |
import org.txm.rcp.utils.JobHandler; |
63 | 63 |
import org.txm.rcp.views.corpora.CorporaView; |
64 |
import org.txm.searchengine.core.SearchEnginesManager; |
|
64 | 65 |
import org.txm.searchengine.cqp.CQPSearchEngine; |
65 | 66 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
66 | 67 |
import org.txm.tokenizer.TokenizerClasses; |
... | ... | |
170 | 171 |
e.printStackTrace(); |
171 | 172 |
} finally { |
172 | 173 |
System.gc(); |
174 |
if (!CQPSearchEngine.isInitialized()) { |
|
175 |
try { |
|
176 |
SearchEnginesManager.getCQPSearchEngine().start(monitor); |
|
177 |
} catch (Exception e) { |
|
178 |
// TODO Auto-generated catch block |
|
179 |
e.printStackTrace(); |
|
180 |
} |
|
181 |
} |
|
173 | 182 |
syncExec(new Runnable() { |
174 | 183 |
@Override |
175 | 184 |
public void run() { |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/commands/ImportSelectedText.java (revision 1115) | ||
---|---|---|
37 | 37 |
import org.eclipse.core.commands.AbstractHandler; |
38 | 38 |
import org.eclipse.core.commands.ExecutionEvent; |
39 | 39 |
import org.eclipse.core.commands.ExecutionException; |
40 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
41 | 40 |
import org.eclipse.jface.dialogs.MessageDialog; |
42 | 41 |
import org.eclipse.osgi.util.NLS; |
43 | 42 |
import org.eclipse.swt.widgets.Shell; |
44 | 43 |
import org.txm.Toolbox; |
45 |
import org.txm.core.engines.ImportEngine; |
|
46 |
import org.txm.core.engines.ImportEngines; |
|
47 |
import org.txm.core.engines.ScriptedImportEngine; |
|
48 | 44 |
import org.txm.core.preferences.TBXPreferences; |
49 | 45 |
import org.txm.objects.Project; |
50 | 46 |
import org.txm.rcp.handlers.scripts.ExecuteImportScript; |
51 | 47 |
import org.txm.rcp.messages.TXMUIMessages; |
52 | 48 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
53 | 49 |
import org.txm.utils.LangDetector; |
54 |
// TODO: Auto-generated Javadoc |
|
50 |
|
|
55 | 51 |
/** |
56 | 52 |
* Import the text in the Clipboard as a corpus. the loader used is the |
57 | 53 |
* QuickLoader |
... | ... | |
76 | 72 |
System.out.println(TXMUIMessages.ImportSelectedText_0); |
77 | 73 |
|
78 | 74 |
try { |
79 |
String corpusName = TXMUIMessages.ImportSelectedText_1+(nextclipcorpus++); |
|
80 |
while (Toolbox.workspace.getProject(corpusName) != null) |
|
81 |
corpusName = TXMUIMessages.ImportSelectedText_1+(nextclipcorpus++); |
|
75 |
String corpusName = TXMUIMessages.ImportSelectedText_1.toUpperCase()+(nextclipcorpus++); |
|
76 |
while (new File(Toolbox.workspace.getLocation(), corpusName).exists()) { |
|
77 |
corpusName = TXMUIMessages.ImportSelectedText_1.toUpperCase()+(nextclipcorpus++); |
|
78 |
} |
|
82 | 79 |
importText(result, corpusName); |
80 |
|
|
83 | 81 |
} catch (Exception e) { |
84 | 82 |
// TODO Auto-generated catch block |
85 | 83 |
org.txm.rcp.utils.Logger.printStackTrace(e); |
... | ... | |
96 | 94 |
* @throws Exception |
97 | 95 |
*/ |
98 | 96 |
public static Object importText(String text, String basename) throws Exception { |
99 |
|
|
97 |
basename = basename.toUpperCase(); |
|
100 | 98 |
if (text.length() == 0) { |
101 | 99 |
System.err.println(TXMUIMessages.ImportSelectedText_2); |
102 | 100 |
return null; |
... | ... | |
129 | 127 |
|
130 | 128 |
|
131 | 129 |
// configure project |
132 |
Project project = new Project(Toolbox.workspace, clipboardDirectory.getName()); |
|
130 |
Project project = Toolbox.workspace.getProject(clipboardDirectory.getName()); |
|
131 |
if (project == null) { |
|
132 |
project = new Project(Toolbox.workspace, clipboardDirectory.getName()); |
|
133 |
} |
|
133 | 134 |
project.setSourceDirectory(clipboardDirectory.getAbsolutePath()); |
134 | 135 |
project.setImportModuleName("txt"); |
135 |
project.setEncoding("UTF-8"); |
|
136 |
|
|
136 |
project.setEncoding("UTF-8"); |
|
137 |
project.getEditionDefinition("default").setBuildEdition(true); |
|
138 |
|
|
137 | 139 |
String lang = TBXPreferences.getInstance().getString(TBXPreferences.IMPORT_DEFAULT_LANG); |
138 | 140 |
if (lang.length() == 0) lang = Locale.getDefault().getLanguage(); |
139 | 141 |
if ("??".equals(lang)) { //$NON-NLS-1$ |
... | ... | |
146 | 148 |
|
147 | 149 |
project.compute(); |
148 | 150 |
|
149 |
new ExecuteImportScript().executeScript(project);
|
|
151 |
ExecuteImportScript.executeScript(project);
|
|
150 | 152 |
return null; |
151 | 153 |
} |
152 | 154 |
} |
tmp/org.txm.concordance.core/src/org/txm/concordance/core/functions/Concordance.java (revision 1115) | ||
---|---|---|
313 | 313 |
this.nLines = queryResult.getNMatch(); |
314 | 314 |
this.lines = new ArrayList<Line>(Collections.nCopies(nLines, (Line) null));// lines are lazily fetched; we force an |
315 | 315 |
|
316 |
return lines.size() > 0; |
|
316 |
return true;//lines.size() > 0;
|
|
317 | 317 |
} |
318 | 318 |
|
319 | 319 |
/** |
tmp/org.txm.core/src/java/org/txm/importer/xtz/ImportModule.java (revision 1115) | ||
---|---|---|
74 | 74 |
|
75 | 75 |
|
76 | 76 |
this.sourceDirectory = project.getSrcdir(); |
77 |
this.binaryDirectory = new File(Toolbox.getTxmHomePath(), "corpora/"+corpusName.toUpperCase()); |
|
78 |
|
|
79 |
if (!updateCorpus) { // clean directories only if it's a new import |
|
80 |
DeleteDir.deleteDirectory(binaryDirectory); |
|
81 |
binaryDirectory.mkdir(); |
|
77 |
this.binaryDirectory = project.getProjectDirectory(); |
|
82 | 78 |
|
79 |
if (!updateCorpus) { // clean directories only if it's a new import |
|
83 | 80 |
File txmDir = new File(binaryDirectory, "txm"); |
84 |
txmDir.mkdir(); |
|
81 |
DeleteDir.deleteDirectory(binaryDirectory); |
|
82 |
txmDir.mkdirs(); |
|
85 | 83 |
} |
86 | 84 |
} |
87 | 85 |
|
... | ... | |
211 | 209 |
return isSuccessful; |
212 | 210 |
} |
213 | 211 |
|
214 |
public Project getParameters() {
|
|
212 |
public Project getProject() {
|
|
215 | 213 |
return project; |
216 | 214 |
} |
217 | 215 |
|
tmp/org.txm.core/src/java/org/txm/Toolbox.java (revision 1115) | ||
---|---|---|
305 | 305 |
|
306 | 306 |
//IScopeContext projectScope = new ProjectScope(projects[i]); |
307 | 307 |
|
308 |
ArrayList<String> resultNodePaths = TXMPreferences.getAllResultsNodePaths("project/" + projects[i].getName() + "/");
|
|
308 |
ArrayList<String> resultNodePaths = TXMPreferences.getAllResultsNodePaths("P/" + projects[i].getName() + "/");
|
|
309 | 309 |
|
310 | 310 |
Log.info("Toolbox.initialize(): loading project " + projects[i].getName() + "..."); |
311 | 311 |
Log.info("Toolbox.initialize(): " + resultNodePaths.size() + " node(s) found in project " + projects[i].getName() + "..."); |
tmp/org.txm.core/src/java/org/txm/core/results/TXMResult.java (revision 1115) | ||
---|---|---|
183 | 183 |
} |
184 | 184 |
if (parametersNodePath == null) { |
185 | 185 |
this.uniqueID = createUUID() + "_" + this.getClass().getSimpleName(); //$NON-NLS-1$ |
186 |
if(this.getProject() != null) { |
|
186 |
if (this.getProject() != null) {
|
|
187 | 187 |
parametersNodePath = this.getProject().getParametersNodeRootPath(); |
188 | 188 |
} |
189 | 189 |
this.parametersNodePath = parametersNodePath + this.uniqueID; |
... | ... | |
231 | 231 |
// retrieving parent from UUID |
232 | 232 |
String parentUUID = this.getStringParameterValue(TXMPreferences.PARENT_UUID); |
233 | 233 |
|
234 |
System.out.println("TXMResult.TXMResult(): parent UUID = " + parentUUID); |
|
234 |
//System.out.println("TXMResult.TXMResult(): parent UUID = " + parentUUID);
|
|
235 | 235 |
|
236 | 236 |
if (!("ROOT".equals(this.uniqueID)) && // search for parent only if UUID != "ROOT" |
237 | 237 |
parent == null && |
... | ... | |
275 | 275 |
if (this.parent == null) { |
276 | 276 |
Log.warning("Warning: the TXMResult of " + this.getClass() + " is attached to no parent. (uuid = " + this.getUUID() + ")"); |
277 | 277 |
} |
278 |
|
|
279 | 278 |
} |
280 | 279 |
|
281 | 280 |
public void setUserName(String name) { |
... | ... | |
1005 | 1004 |
// FIXME: debug |
1006 | 1005 |
// System.err.println("TXMResult.delete()"); |
1007 | 1006 |
|
1008 |
// delete the local node |
|
1009 |
TXMPreferences.delete(this); |
|
1007 |
// START WITH CHILDREN |
|
1010 | 1008 |
|
1011 | 1009 |
// remove children and clean resources |
1012 |
for (int i = 0; i < this.children.size(); i++) {
|
|
1013 |
this.children.get(i).clean();
|
|
1014 |
TXMPreferences.delete(this.children.get(i)); |
|
1015 |
this.removeChild(i); |
|
1010 |
while (this.children.size() > 0) {
|
|
1011 |
this.children.get(0).delete(); // should call parent.removeResult(child)
|
|
1012 |
// TXMPreferences.delete(this.children.get(i));
|
|
1013 |
// this.removeChild(i);
|
|
1016 | 1014 |
} |
1017 |
|
|
1015 |
this.children.clear(); |
|
1016 |
|
|
1017 |
//THEN FINISH WITH THIS |
|
1018 |
// delete the local node |
|
1019 |
TXMPreferences.delete(this); |
|
1020 |
|
|
1018 | 1021 |
// specific cleaning |
1019 | 1022 |
this.clean(); |
1020 | 1023 |
|
tmp/org.txm.core/src/java/org/txm/objects/Edition.java (revision 1115) | ||
---|---|---|
63 | 63 |
|
64 | 64 |
@Parameter(key=TBXPreferences.NAMES) |
65 | 65 |
ArrayList<String> pPageNames = new ArrayList<String>(); |
66 |
|
|
66 |
|
|
67 | 67 |
@Parameter(key="pages_wordids") |
68 | 68 |
ArrayList<String> pPageFirstWordIds = new ArrayList<String>(); |
69 | 69 |
|
... | ... | |
79 | 79 |
this.visible = false; |
80 | 80 |
this.internalPersistable = true; |
81 | 81 |
} |
82 |
|
|
82 |
|
|
83 | 83 |
/** |
84 | 84 |
* Instantiates a new edition. |
85 | 85 |
* |
... | ... | |
194 | 194 |
} catch (PatternSyntaxException pse) { } |
195 | 195 |
return r; |
196 | 196 |
} |
197 |
|
|
197 |
|
|
198 | 198 |
public void addPage(String id, String wordid) { |
199 | 199 |
this.pPageNames.add(id); |
200 | 200 |
this.pPageFirstWordIds.add(wordid); |
... | ... | |
361 | 361 |
} else { |
362 | 362 |
pPageFirstWordIds = new ArrayList<>(); |
363 | 363 |
} |
364 |
|
|
365 |
if (pages == null) { |
|
366 |
pages = new ArrayList<Page>(); |
|
367 |
} else { |
|
368 |
pages.clear(); |
|
369 |
} |
|
370 | 364 |
|
371 |
for (int i = 0 ; i < pPageNames.size() && i < pPageFirstWordIds.size() ; i++) { |
|
372 |
pages.add(new Page(this, pPageNames.get(i), pPageFirstWordIds.get(i))); |
|
373 |
} |
|
374 |
|
|
375 | 365 |
return true; |
376 | 366 |
} |
377 | 367 |
|
... | ... | |
411 | 401 |
@Override |
412 | 402 |
protected boolean _compute() throws Exception { |
413 | 403 |
//System.out.println("USE THE PROJECT IMPORTMODULE TO BUILD THE EDITION="+pName); |
404 |
if (pages == null) { |
|
405 |
pages = new ArrayList<Page>(); |
|
406 |
} else { |
|
407 |
pages.clear(); |
|
408 |
} |
|
409 |
|
|
410 |
if (pages.size() != pPageNames.size()) { |
|
411 |
for (int i = 0 ; i < pPageNames.size() && i < pPageFirstWordIds.size() ; i++) { |
|
412 |
pages.add(new Page(this, pPageNames.get(i), pPageFirstWordIds.get(i))); |
|
413 |
} |
|
414 |
} |
|
415 |
|
|
414 | 416 |
return true; |
415 | 417 |
} |
416 | 418 |
|
tmp/org.txm.core/src/java/org/txm/objects/Project.java (revision 1115) | ||
---|---|---|
175 | 175 |
* @throws Exception |
176 | 176 |
*/ |
177 | 177 |
public Project(Workspace workspace, String name) throws Exception { |
178 |
super(workspace); |
|
178 |
super("project/" + name + "/" + createUUID() + "_Project", workspace);
|
|
179 | 179 |
this.internalPersistable = true; |
180 | 180 |
this.pName = name; |
181 | 181 |
this.dirty = false; |
... | ... | |
648 | 648 |
* @return the base directory |
649 | 649 |
*/ |
650 | 650 |
public File getProjectDirectory() { |
651 |
if (this.rcpProject == null || this.rcpProject.getLocation() == null) { |
|
652 |
return new File(Toolbox.workspace.getLocation(), this.pName); |
|
653 |
} |
|
651 | 654 |
return this.rcpProject.getLocation().toFile(); |
652 | 655 |
} |
653 | 656 |
|
... | ... | |
1003 | 1006 |
} |
1004 | 1007 |
return null; |
1005 | 1008 |
} |
1006 |
|
|
1009 |
|
|
1007 | 1010 |
@SuppressWarnings("unchecked") |
1008 | 1011 |
private List<CorpusBuild> getCorpusBuilds() { |
1009 | 1012 |
return (List<CorpusBuild>) getChildren(CorpusBuild.class); |
tmp/org.txm.utils/src/org/txm/utils/StreamHog.java (revision 1115) | ||
---|---|---|
28 | 28 |
package org.txm.utils; |
29 | 29 |
|
30 | 30 |
import java.io.BufferedReader; |
31 |
import java.io.IOException; |
|
32 | 31 |
import java.io.InputStream; |
33 | 32 |
import java.io.InputStreamReader; |
34 | 33 |
import java.util.ArrayList; |
... | ... | |
104 | 103 |
lastline = line; |
105 | 104 |
|
106 | 105 |
} |
107 |
} catch (IOException e) {
|
|
108 |
System.out.println("ERROR: R logging is broken: "+e);
|
|
106 |
} catch (Exception e) { |
|
107 |
System.out.println("ERROR: broken process logging : "+e);
|
|
109 | 108 |
org.txm.utils.logger.Log.printStackTrace(e); |
110 | 109 |
} |
111 | 110 |
} |
tmp/org.txm.links.rcp/src/org/txm/links/rcp/handlers/SendSelectionToQueryable.java (revision 1115) | ||
---|---|---|
46 | 46 |
*/ |
47 | 47 |
public abstract class SendSelectionToQueryable extends BaseAbstractHandler { |
48 | 48 |
|
49 |
|
|
50 |
|
|
51 | 49 |
/** |
52 | 50 |
* Creates the query. |
53 | 51 |
* @param isel |
... | ... | |
94 | 92 |
} |
95 | 93 |
return null; |
96 | 94 |
} |
97 |
|
|
98 | 95 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/Partition.java (revision 1115) | ||
---|---|---|
278 | 278 |
* @see org.txm.objects.TxmObject#load() |
279 | 279 |
*/ |
280 | 280 |
protected boolean _load(Element e) { |
281 |
|
|
282 |
// partition already persisted |
|
283 |
if (this.getUUID().length() > 0) { |
|
284 |
return true; |
|
285 |
} |
|
286 | 281 |
|
287 | 282 |
// partition already computed |
288 | 283 |
if (this.hasBeenComputedOnce) { |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPCorpus.java (revision 1115) | ||
---|---|---|
898 | 898 |
query.getQueryString()); |
899 | 899 |
queryResult = new QueryResult(queryResultId, queryResultName, this, query); |
900 | 900 |
|
901 |
if (save) { |
|
902 |
new SavedQuery(this).setParameters(query.toString(), new ArrayList<String>()); |
|
903 |
} |
|
901 |
// if (save) {
|
|
902 |
// new SavedQuery(this).setParameters(query.toString(), new ArrayList<String>());
|
|
903 |
// }
|
|
904 | 904 |
} catch (Exception e) { |
905 | 905 |
org.txm.utils.logger.Log.printStackTrace(e); |
906 | 906 |
throw new CqiClientException(e); |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/MainCorpus.java (revision 1115) | ||
---|---|---|
565 | 565 |
|
566 | 566 |
removeFromCWB(); |
567 | 567 |
|
568 |
File data = new File(getProjectDirectory(), "data");
|
|
569 |
File registry = new File(getProjectDirectory(), "registry");
|
|
570 |
File cqp = new File(getProjectDirectory(), "cqp"); |
|
568 |
File data = new File(getProjectDirectory(), "data/"+this.pID);
|
|
569 |
File registry = new File(getProjectDirectory(), "registry/"+this.pID.toLowerCase());
|
|
570 |
File cqp = new File(getProjectDirectory(), "cqp/"+this.pID+".cqp");
|
|
571 | 571 |
DeleteDir.deleteDirectory(data); |
572 |
DeleteDir.deleteDirectory(registry);
|
|
573 |
DeleteDir.deleteDirectory(cqp);
|
|
572 |
registry.delete();
|
|
573 |
cqp.delete();
|
|
574 | 574 |
} |
575 | 575 |
|
576 | 576 |
@Override |
tmp/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/CwbAlign.java (revision 1115) | ||
---|---|---|
451 | 451 |
// arg : the align file created by cwb-align |
452 | 452 |
{ |
453 | 453 |
ArrayList<String> args = new ArrayList<String>(); |
454 |
args.add(binpath + "cwb-align-encode"); //$NON-NLS-1$
|
|
454 |
args.add(new File(binpath, "cwb-align-encode").getAbsolutePath()); //$NON-NLS-1$
|
|
455 | 455 |
if (isd) { |
456 | 456 |
args.add("-d"); //$NON-NLS-1$ |
457 | 457 |
args.add("" + d); //$NON-NLS-1$ |
tmp/org.txm.groovy.core/src/java/org/txm/groovy/core/GroovyScriptedImportEngine.java (revision 1115) | ||
---|---|---|
127 | 127 |
// if (tempBinDirectory.exists()) tempBinDirectory.renameTo(basedir); |
128 | 128 |
return new Status(Status.ERROR, "org.txm.groovy.core", "Import not correclty ended. See console messages."); |
129 | 129 |
} |
130 |
} else { |
|
131 |
System.out.println("Error: import not correctly ended (no 'readyToLoad' binding found). See console messages."); |
|
132 |
return new Status(Status.ERROR, "org.txm.groovy.core", "Import not correclty ended. See console messages."); |
|
130 | 133 |
} |
131 | 134 |
} catch (ThreadDeath td) { |
132 | 135 |
return Status.CANCEL_STATUS; |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/compiler.groovy (revision 1115) | ||
---|---|---|
49 | 49 |
import org.txm.scripts.*; |
50 | 50 |
import org.txm.importer.scripts.xmltxm.*; |
51 | 51 |
import org.txm.utils.treetagger.TreeTagger; |
52 |
|
|
52 |
import org.txm.objects.* |
|
53 | 53 |
import javax.xml.stream.*; |
54 | 54 |
import java.net.URL; |
55 | 55 |
import java.io.File; |
56 | 56 |
import java.util.Comparator; |
57 | 57 |
import java.util.HashMap; |
58 | 58 |
import java.util.List; |
59 |
import org.txm.searchengine.cqp.corpus.* |
|
59 | 60 |
|
60 |
// TODO: Auto-generated Javadoc |
|
61 | 61 |
/** |
62 | 62 |
* Produce CQP files from the TEI-TXM files. <br/> |
63 | 63 |
* - Read texts metadata with XPath queries <br/> |
... | ... | |
108 | 108 |
/** The base. */ |
109 | 109 |
private String base=""; |
110 | 110 |
|
111 |
/** The project. */ |
|
112 |
private String project=""; |
|
113 |
|
|
114 | 111 |
/** The lang. */ |
115 | 112 |
private String lang ="fr"; |
116 | 113 |
|
... | ... | |
136 | 133 |
* @param base the base's name |
137 | 134 |
* @param project the Project's name |
138 | 135 |
*/ |
139 |
public compiler(URL url,String text,String base, String project, Properties metadataXPath) |
|
136 |
public compiler(URL url,String text,String base, String projectName, Properties metadataXPath)
|
|
140 | 137 |
{ |
141 | 138 |
this.metadataXPath = metadataXPath; |
142 | 139 |
this.text = text |
143 | 140 |
this.base = base; |
144 |
this.project = project; |
|
145 | 141 |
try { |
146 | 142 |
this.url = url; |
147 | 143 |
inputData = url.openStream(); |
... | ... | |
249 | 245 |
* @param fileName the file name |
250 | 246 |
* @return true, if successful |
251 | 247 |
*/ |
252 |
private boolean transfomFileCqp(File cqpFile) |
|
248 |
private boolean transfomFileCqp(Project project, File cqpFile)
|
|
253 | 249 |
{ |
254 | 250 |
try { |
255 | 251 |
if (!createOutput(cqpFile)) return false; |
... | ... | |
349 | 345 |
} |
350 | 346 |
} |
351 | 347 |
|
352 |
output.write(" base=\""+base+"\" project=\""+project+"\">\n"); |
|
348 |
output.write(" base=\""+base+"\" project=\""+project.getName()+"\">\n");
|
|
353 | 349 |
captureword=true; |
354 | 350 |
break; |
355 | 351 |
|
... | ... | |
746 | 742 |
* @param basename the basename |
747 | 743 |
* @return true, if successful |
748 | 744 |
*/ |
749 |
public boolean run(File binDir, File txmDir, String corpusname, Properties metadataXPath) |
|
745 |
public boolean run(Project project, File binDir, File txmDir, String corpusname, Properties metadataXPath)
|
|
750 | 746 |
{ |
751 | 747 |
sattrsListener = null; // reset SAttribute Listener for each new import |
752 | 748 |
this.metadataXPath = metadataXPath; |
... | ... | |
754 | 750 |
if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) { |
755 | 751 |
println ("Error: CWB executables not well set.") |
756 | 752 |
return false; |
757 |
} |
|
758 |
if (!txmDir.exists()) { |
|
759 |
println ("binary directory does not exists: "+txmDir) |
|
760 |
return false; |
|
761 |
} |
|
753 |
} |
|
754 |
|
|
755 |
CorpusBuild corpus = project.getCorpusBuild(project.getName()); |
|
756 |
if (corpus != null) { |
|
757 |
//println "CLEAN PREVIOUS CORPUS" |
|
758 |
corpus.delete(); // remove old files |
|
759 |
} |
|
760 |
|
|
761 |
// make new one |
|
762 |
corpus = new MainCorpus(project); |
|
763 |
corpus.setID(project.getName()); |
|
764 |
corpus.setName(project.getName()); |
|
765 |
corpus.setDescription("Built with the TXT+CSV import module"); |
|
766 |
|
|
767 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
|
768 |
new File(binDir,"cqp").mkdirs() |
|
769 |
new File(binDir,"data").mkdirs() |
|
770 |
new File(binDir,"registry").mkdirs() |
|
762 | 771 |
|
763 |
File cqpFile = new File(binDir, "cqp/${corpusname}.cqp"); |
|
764 |
new File(binDir, "/cqp/").deleteDir(); |
|
765 |
new File(binDir, "/cqp/").mkdir(); |
|
766 |
new File(binDir, "/data/${corpusname}").deleteDir(); |
|
767 |
new File(binDir, "/data/${corpusname}").mkdir(); |
|
768 |
new File(binDir, "registry/").mkdir(); |
|
769 |
|
|
770 | 772 |
String textid = ""; |
771 | 773 |
int counttext = 0; |
772 | 774 |
List<File> files = txmDir.listFiles(); |
... | ... | |
847 | 849 |
String txtname = f.getName().substring(0, f.getName().length()-4); |
848 | 850 |
def builder = new compiler(f.toURI().toURL(), txtname, corpusname, "default", metadataXPath); |
849 | 851 |
builder.setLang(lang) |
850 |
if (!builder.transfomFileCqp(cqpFile)) { |
|
852 |
if (!builder.transfomFileCqp(project, cqpFile)) {
|
|
851 | 853 |
println "Failed to compile "+f |
852 | 854 |
} |
853 | 855 |
builder.setAnnotationDone(this.annotate_status); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/bfmLoader.groovy (revision 1115) | ||
---|---|---|
45 | 45 |
import org.txm.importer.* |
46 | 46 |
|
47 | 47 |
String userDir = System.getProperty("user.home"); |
48 |
boolean debug = org.txm.utils.logger.Log.isPrintingErrors(); |
|
49 |
def MONITOR; |
|
50 |
BaseParameters params; |
|
51 |
try {params = paramsBinding;MONITOR=monitor} catch (Exception) |
|
52 |
{ println "DEV MODE";//exception means we debug |
|
53 |
debug = true |
|
54 |
params = new BaseParameters(new File(userDir, "xml/bfm/import.xml")) |
|
55 |
params.load() |
|
56 |
if (!org.txm.Toolbox.isInitialized()) { |
|
57 |
//rootDir = userDir+"/xml/TESTS/alceste"; // directory which contains the source file |
|
48 |
Project project; |
|
58 | 49 |
|
59 |
Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM")); |
|
60 |
//Toolbox.setParam(Toolbox.INSTALL_DIR,new File("C:\\Program Files\\TXM"));//For Windows |
|
61 |
Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8"); |
|
62 |
Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ","); |
|
63 |
Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\""); |
|
64 |
Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File("C:\\Program Files\\treetagger\\models"));//for Windows |
|
65 |
Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM")); |
|
66 |
} |
|
67 |
} |
|
68 |
if (params == null) { println "no parameters. Aborting"; return; } |
|
50 |
try {project=projectBinding;MONITOR=monitor} catch (Exception) |
|
51 |
{ } |
|
52 |
if (project == null) { println "no project set. Aborting"; return; } |
|
69 | 53 |
|
70 |
String corpusname = params.getCorpusName(); |
|
71 |
Element corpusElem = params.corpora.get(corpusname); |
|
72 |
String basename = params.name; |
|
73 |
String rootDir = params.rootDir; |
|
74 |
String lang = corpusElem.getAttribute("lang"); |
|
54 |
String corpusname = project.getName(); |
|
55 |
String basename = corpusname |
|
56 |
String rootDir = project.getSrcdir(); |
|
57 |
String lang = project.getLang() |
|
75 | 58 |
String model = lang |
76 |
String encoding = corpusElem.getAttribute("encoding"); |
|
77 |
boolean annotate = "true" == corpusElem.getAttribute("annotate"); |
|
78 |
String xsl = params.getXsltElement(corpusElem).getAttribute("xsl") |
|
79 |
def xslParams = params.getXsltParams(corpusElem); |
|
80 |
int wordsPerPage = params.getWordsPerPage("default") |
|
81 |
boolean build_edition = params.getDoEdition("default") |
|
59 |
String encoding = project.getEncoding() |
|
60 |
boolean annotate = project.getAnnotate() |
|
61 |
String xsl = project.getFrontXSL(); |
|
62 |
def xslParams = project.getXsltParameters(); |
|
63 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage() |
|
64 |
String page_element = project.getEditionDefinition("default").getPageElement() |
|
65 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition() |
|
82 | 66 |
|
83 | 67 |
File srcDir = new File(rootDir); |
84 |
File binDir = new File(Toolbox.getTxmHomePath(),"corpora/"+basename); |
|
85 |
binDir.deleteDir(); |
|
68 |
File binDir = project.getProjectDirectory(); |
|
86 | 69 |
binDir.mkdirs(); |
87 | 70 |
if (!binDir.exists()) { |
88 | 71 |
println "Could not create binDir "+binDir |
... | ... | |
166 | 149 |
//c.setCwbPath("~/TXM/cwb/bin"); |
167 | 150 |
c.setLang(lang); |
168 | 151 |
c.setAnnotationDone(annotate_status) |
169 |
if (!c.run(binDir, txmDir, corpusname, metadataXPath)) { |
|
152 |
if (!c.run(project, binDir, txmDir, corpusname, metadataXPath)) {
|
|
170 | 153 |
println "import process stopped"; |
171 | 154 |
return; |
172 | 155 |
} |
... | ... | |
192 | 175 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
193 | 176 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);//["'","(","[","{","«"]; |
194 | 177 |
|
195 |
Element text = params.addText(corpusElem, txtname, txmFile); |
|
196 |
|
|
178 |
Text t = new Text(project); |
|
179 |
t.setName(txtname); |
|
180 |
t.setSourceFile(txmFile) |
|
181 |
t.setTXMFile(txmFile) |
|
182 |
|
|
197 | 183 |
def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, corpusname, metadataXPath); |
198 |
Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html"); |
|
199 |
|
|
184 |
Edition edition = new Edition(t); |
|
185 |
edition.setName("default"); |
|
186 |
edition.setIndex(outdir.getAbsolutePath()); |
|
200 | 187 |
for (i = 0 ; i < ed.getPageFiles().size();) { |
201 | 188 |
File f = ed.getPageFiles().get(i); |
202 | 189 |
String wordid = ed.getIdx().get(i); |
203 |
params.addPage(edition, ""+(++i), wordid);
|
|
190 |
edition.addPage(""+(++i), wordid);
|
|
204 | 191 |
} |
205 | 192 |
} |
206 | 193 |
} |
207 | 194 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
208 | 195 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
209 |
File paramFile = new File(binDir, "import.xml"); |
|
210 |
DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true; |
|
196 |
|
|
197 |
readyToLoad = project.save(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/doc/pager.groovy (revision 1115) | ||
---|---|---|
404 | 404 |
createNextOutput(); |
405 | 405 |
} |
406 | 406 |
break; |
407 |
|
|
407 | 408 |
case "list": |
408 | 409 |
pagedWriter.writeEndElement(); // ul or ol |
409 | 410 |
pagedWriter.writeCharacters("\t") |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/compiler.groovy (revision 1115) | ||
---|---|---|
37 | 37 |
import org.txm.importer.scripts.xmltxm.*; |
38 | 38 |
import org.txm.utils.logger.Log; |
39 | 39 |
import org.txm.utils.treetagger.TreeTagger; |
40 |
|
|
40 |
import org.txm.objects.*; |
|
41 |
import org.txm.searchengine.cqp.corpus.*; |
|
41 | 42 |
import javax.xml.stream.*; |
42 |
|
|
43 | 43 |
import java.net.URL; |
44 | 44 |
import java.io.File; |
45 | 45 |
import java.io.IOException; |
... | ... | |
47 | 47 |
import java.util.HashMap; |
48 | 48 |
import java.util.List; |
49 | 49 |
|
50 |
// TODO: Auto-generated Javadoc |
|
51 | 50 |
/** |
52 |
* The Class compiler.
|
|
51 |
* The Class compiler builds the aligned CQP corpus indexes
|
|
53 | 52 |
*/ |
54 |
class compiler |
|
55 |
{ |
|
53 |
class compiler { |
|
56 | 54 |
/** The debug. */ |
57 | 55 |
private boolean debug= false; |
58 | 56 |
|
... | ... | |
81 | 79 |
String base=""; |
82 | 80 |
|
83 | 81 |
/** The project. */ |
84 |
String project=""; |
|
82 |
String projectName="";
|
|
85 | 83 |
|
86 | 84 |
/** The anahash. */ |
87 | 85 |
static boolean firstWord = true; |
... | ... | |
130 | 128 |
{ |
131 | 129 |
this.text = text |
132 | 130 |
this.base = base; |
133 |
this.project = project; |
|
131 |
this.projectName = project;
|
|
134 | 132 |
this.tuprops = tuprops; |
135 | 133 |
try { |
136 | 134 |
this.url = url; |
... | ... | |
222 | 220 |
break; |
223 | 221 |
case "text": |
224 | 222 |
output.write("<text id=\""+text+"\" base=\""+base+"\"" + |
225 |
" project=\""+project+"\""); |
|
223 |
" project=\""+projectName+"\"");
|
|
226 | 224 |
def textAttrs = []; |
227 | 225 |
for ( int i = 0 ; i < parser.getAttributeCount() ; i++) |
228 | 226 |
{ |
... | ... | |
424 | 422 |
* @param textAttributes the text attributes |
425 | 423 |
* @return true, if successful |
426 | 424 |
*/ |
427 |
public boolean run(File binDir, File txmDir, String basename) |
|
425 |
public boolean run(Project project, File binDir, File txmDir, String basename)
|
|
428 | 426 |
{ |
429 | 427 |
sattrsListener = null; // reset SAttribute Listener for each new import |
430 | 428 |
String rootDir = binDir.getAbsolutePath(); |
... | ... | |
434 | 432 |
println ("Error: CWB executables not well set.") |
435 | 433 |
return false; |
436 | 434 |
} |
437 |
if (!new File(rootDir).exists()) { |
|
438 |
println ("binary directory does not exists: "+rootDir)
|
|
439 |
return false;
|
|
440 |
}
|
|
435 |
|
|
436 |
new File(binDir,"cqp").mkdirs()
|
|
437 |
new File(binDir,"data").mkdirs()
|
|
438 |
new File(binDir,"registry").mkdirs()
|
|
441 | 439 |
|
442 |
new File(binDir,"/cqp/").deleteDir(); |
|
443 |
new File(binDir,"/cqp/").mkdir(); |
|
444 |
new File(binDir,"/data/").deleteDir(); |
|
445 |
new File(binDir,"/data/").mkdir(); |
|
446 |
new File(binDir,"registry/").mkdir(); |
|
447 |
|
|
448 | 440 |
String textid=""; |
449 | 441 |
int counttext =0; |
450 | 442 |
List<File> files = txmDir.listFiles(); |
... | ... | |
459 | 451 |
if (lang ==null) { |
460 | 452 |
println "ERROR: no lang defined for group $group . Aborting." |
461 | 453 |
} |
462 |
corpusIDS[group] = (lang+group).toLowerCase();
|
|
454 |
corpusIDS[group] = lang+group
|
|
463 | 455 |
} |
464 | 456 |
} |
465 | 457 |
println "Using corpus ID: $corpusIDS" |
... | ... | |
470 | 462 |
segs_id.put(group, 0); |
471 | 463 |
cqpName = basename+"_"+corpusIDS.get(group); |
472 | 464 |
createOutput(rootDir+"/cqp", "${cqpName}.cqp"); |
473 |
output.write("<txmcorpus id=\"${cqpName}\" lang=\"$lang\">\n")
|
|
465 |
output.write("<txmcorpus id=\"${cqpName}\" lang=\""+lang.toLowerCase()+"\">\n")
|
|
474 | 466 |
output.close(); |
475 | 467 |
|
476 | 468 |
//create txmDirs |
... | ... | |
484 | 476 |
for (int group : langGroups.keySet()) { |
485 | 477 |
//String lang = langs.get(langGroups.get(group)[0]); |
486 | 478 |
cqpName = basename+"_"+corpusIDS.get(group); |
479 |
|
|
480 |
CorpusBuild corpus = project.getCorpusBuild(cqpName); |
|
481 |
if (corpus != null) { |
|
482 |
//println "CLEAN PREVIOUS CORPUS" |
|
483 |
corpus.delete(); // remove old files |
|
484 |
} |
|
485 |
|
|
486 |
// make new one |
|
487 |
corpus = new MainCorpus(project); |
|
488 |
corpus.setID(cqpName); |
|
489 |
corpus.setName(cqpName); |
|
490 |
corpus.setDescription("Built with the TMX import module"); |
|
491 |
|
|
492 |
File cqpFile = new File(binDir,"cqp/"+cqpName+".cqp"); |
|
493 |
|
|
487 | 494 |
def filenames = langGroups.get(group); |
488 | 495 |
filenames.sort() |
489 | 496 |
//println("Process group no $group of files "+filenames) |
... | ... | |
496 | 503 |
} |
497 | 504 |
|
498 | 505 |
filename = filename.substring(0, filename.length()-4); |
499 |
String corpusname = (cqpName).toLowerCase(); |
|
500 | 506 |
counttext++; |
501 | 507 |
|
502 | 508 |
String txtname = f.getName(); |
503 |
txtname = txtname.substring(0, txtname.lastIndexOf("_"));
|
|
509 |
txtname = txtname.substring(0, txtname.lastIndexOf(".xml"));
|
|
504 | 510 |
seg_id = segs_id.get(group); |
505 | 511 |
builder = new compiler(f.toURI().toURL(), txtname, basename, "default", tuprops); |
506 | 512 |
builder.transfomFileCqp(rootDir+"/cqp",cqpName+".cqp"); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/Tmx2XmlFiles.groovy (revision 1115) | ||
---|---|---|
136 | 136 |
if (event == XMLStreamConstants.START_ELEMENT) { |
137 | 137 |
if (parser.getLocalName() == "tuv") { |
138 | 138 |
|
139 |
String lang = "fr";
|
|
139 |
String lang = "FR";
|
|
140 | 140 |
for(int i = 0 ; i < parser.getAttributeCount() ; i++) |
141 | 141 |
if (parser.getAttributeLocalName(i) == "lang") |
142 | 142 |
{ |
143 |
lang = (parser.getAttributeValue(i)).toLowerCase()
|
|
143 |
lang = (""+parser.getAttributeValue(i)).toUpperCase()
|
|
144 | 144 |
break; |
145 | 145 |
} |
146 | 146 |
|
... | ... | |
249 | 249 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) // get the lang attribute of the tuv |
250 | 250 |
if (parser.getAttributeLocalName(i) == "lang") |
251 | 251 |
{ |
252 |
lang = parser.getAttributeValue(i)
|
|
252 |
lang = (""+parser.getAttributeValue(i)).toUpperCase()
|
|
253 | 253 |
break; |
254 | 254 |
} |
255 | 255 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/tmxLoader.groovy (revision 1115) | ||
---|---|---|
43 | 43 |
import org.txm.utils.xml.DomUtils; |
44 | 44 |
|
45 | 45 |
String userDir = System.getProperty("user.home"); |
46 |
boolean debug = org.txm.utils.logger.Log.isPrintingErrors(); |
|
47 | 46 |
def MONITOR; |
48 |
BaseParameters params; |
|
49 |
try {params = paramsBinding;MONITOR=monitor} catch (Exception) |
|
50 |
{ println "DEV MODE";//exception means we debug |
|
51 |
debug = true |
|
52 |
params = new BaseParameters(new File(userDir, "xml/tmx/import.xml")) |
|
53 |
params.load() |
|
54 |
if (!org.txm.Toolbox.isInitialized()) { |
|
47 |
Project project; |
|
55 | 48 |
|
56 |
Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM")); |
|
57 |
Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models")); |
|
58 |
Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8"); |
|
59 |
Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ","); |
|
60 |
Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\""); |
|
61 |
Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM")); |
|
62 |
} |
|
63 |
} |
|
64 |
if (params == null) { println "no parameters. Aborting"; return; } |
|
49 |
try {project=projectBinding;MONITOR=monitor} catch (Exception) |
|
50 |
{ } |
|
51 |
if (project == null) { println "no project set. Aborting"; return; } |
|
65 | 52 |
|
66 |
String corpusname = params.getCorpusName(); |
|
67 |
Element corpusElem = params.corpora.get(corpusname); |
|
68 |
String basename = params.name; |
|
69 |
String rootDir = params.rootDir; |
|
70 |
String lang = corpusElem.getAttribute("lang"); |
|
71 |
String model = lang; |
|
72 |
String encoding = corpusElem.getAttribute("encoding"); |
|
73 |
boolean annotate = "true" == corpusElem.getAttribute("annotate"); |
|
74 |
String xsl = params.getXsltElement(corpusElem).getAttribute("xsl") |
|
75 |
def xslParams = params.getXsltParams(corpusElem);; |
|
76 |
int wordsPerPage = params.getWordsPerPage("default") |
|
77 |
String page_element = params.getPageElement("default") |
|
78 |
boolean build_edition = params.getDoEdition("default") |
|
53 |
String corpusname = project.getName(); |
|
54 |
String basename = corpusname |
|
55 |
String rootDir = project.getSrcdir(); |
|
56 |
String lang = project.getLang() |
|
57 |
String model = lang |
|
58 |
String encoding = project.getEncoding() |
|
59 |
boolean annotate = project.getAnnotate() |
|
60 |
String xsl = project.getFrontXSL(); |
|
61 |
def xslParams = project.getXsltParameters(); |
|
62 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage() |
|
63 |
String page_element = project.getEditionDefinition("default").getPageElement() |
|
64 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition() |
|
79 | 65 |
|
80 | 66 |
File srcDir = new File(rootDir); |
81 |
File binDir = new File(Toolbox.getTxmHomePath(),"corpora/"+basename); |
|
82 |
binDir.deleteDir(); |
|
67 |
File binDir = project.getProjectDirectory(); |
|
83 | 68 |
binDir.mkdirs(); |
84 | 69 |
if (!binDir.exists()) { |
85 | 70 |
println "Could not create binDir "+binDir |
... | ... | |
118 | 103 |
c.setLangs(textLangs); |
119 | 104 |
c.setCorpusIDS(corpusIDS); |
120 | 105 |
c.setLangGroups(langGroups); |
121 |
if (!c.run(binDir, txmDir, basename)) { |
|
106 |
if (!c.run(project, binDir, txmDir, basename)) {
|
|
122 | 107 |
println "Compiler failed" |
123 | 108 |
return; |
124 | 109 |
} |
... | ... | |
127 | 112 |
new File(binDir,"HTML").deleteDir(); |
128 | 113 |
new File(binDir,"HTML").mkdirs(); |
129 | 114 |
if (build_edition) { |
130 |
|
|
115 |
|
|
131 | 116 |
println "-- EDITION - Building edition" |
132 | 117 |
if (MONITOR != null) MONITOR.worked(20, "EDITION - Building edition") |
133 |
|
|
118 |
|
|
134 | 119 |
List<File> filelist = new File(binDir,"txm").listFiles(); |
135 | 120 |
def second = 0 |
136 | 121 |
|
... | ... | |
145 | 130 |
outdir.mkdirs(); |
146 | 131 |
//println "processing pages of corpus "+txmCorpusDir.getName() |
147 | 132 |
|
148 |
Element paraCorpusElem = corpusElem.cloneNode(true); |
|
149 |
paraCorpusElem.setAttribute("name", txmCorpusDir.getName()); |
|
133 |
// Element paraCorpusElem = corpusElem.cloneNode(true);
|
|
134 |
// paraCorpusElem.setAttribute("name", txmCorpusDir.getName());
|
|
150 | 135 |
|
151 |
for (File srcfile : txmFiles) {
|
|
136 |
for (File txmFile : txmFiles) {
|
|
152 | 137 |
print "." |
153 |
String txtname = srcfile.getName();
|
|
154 |
int i = txtname.lastIndexOf("_");
|
|
138 |
String txtname = txmFile.getName();
|
|
139 |
int i = txtname.lastIndexOf(".xml");
|
|
155 | 140 |
if(i > 0) txtname = txtname.substring(0, i); |
156 | 141 |
|
157 | 142 |
//println ""+srcfile.getName()+" -> "+splitTUsPerText[txtname] |
... | ... | |
161 | 146 |
splitTUsPerText[txtname] = splitTUs; |
162 | 147 |
} |
163 | 148 |
|
164 |
String l = textLangs.get(srcfile.getName())
|
|
149 |
String l = textLangs.get(txmFile.getName()).toLowerCase()
|
|
165 | 150 |
if (l == null) l = "fr"; |
166 | 151 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(l); |
167 | 152 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(l); |
168 | 153 |
|
169 |
Element text = params.addText(paraCorpusElem, txtname, srcfile); |
|
154 |
Text t = new Text(project); |
|
155 |
t.setName(txtname); |
|
156 |
t.setSourceFile(txmFile) |
|
157 |
t.setTXMFile(txmFile) |
|
170 | 158 |
|
171 |
def ed = new pager(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element,splitTUs); |
|
172 |
Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html"); |
|
173 |
|
|
159 |
def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element,splitTUs); |
|
160 |
Edition edition = new Edition(t); |
|
161 |
edition.setName("default"); |
|
162 |
edition.setIndex(outdir.getAbsolutePath()); |
|
174 | 163 |
for (i = 0 ; i < ed.getPageFiles().size();) { |
175 | 164 |
File f = ed.getPageFiles().get(i); |
176 | 165 |
String wordid = ed.getIdx().get(i); |
177 |
params.addPage(edition, ""+(++i), wordid);
|
|
166 |
edition.addPage(""+(++i), wordid);
|
|
178 | 167 |
} |
179 | 168 |
} |
180 | 169 |
|
181 |
params.corporaElement.appendChild(paraCorpusElem); |
|
182 |
|
|
183 | 170 |
File cssfile = new File(Toolbox.getTxmHomePath(), "css/tmx.css") |
184 | 171 |
if (cssfile.exists()) { |
185 | 172 |
FileCopy.copy(cssfile, new File(outdir, "tmx.css")); |
... | ... | |
191 | 178 |
println "" |
192 | 179 |
} |
193 | 180 |
} |
194 |
params.corporaElement.removeChild(corpusElem); |
|
195 | 181 |
|
196 | 182 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
197 | 183 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
198 |
File paramFile = new File(binDir, "import.xml"); |
|
199 |
DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true; |
|
184 |
|
|
185 |
readyToLoad = project.save(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivamailLoader.groovy (revision 1115) | ||
---|---|---|
42 | 42 |
String userDir = System.getProperty("user.home"); |
43 | 43 |
|
44 | 44 |
def MONITOR; |
45 |
boolean debug = org.txm.utils.logger.Log.isPrintingErrors(); |
|
46 |
BaseParameters params; |
|
47 |
try {params = paramsBinding;MONITOR=monitor} catch (Exception) { |
|
48 |
println "DEV MODE";//exception means we debug |
|
49 |
debug = true |
|
50 |
params = new BaseParameters(new File(userDir, "xml/factivatxt/import.xml")) |
|
51 |
params.load() |
|
52 |
if (!org.txm.Toolbox.isInitialized()) { |
|
45 |
Project project; |
|
53 | 46 |
|
54 |
Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM")); |
|
55 |
Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8"); |
|
56 |
Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ","); |
|
57 |
Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\""); |
|
58 |
Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM")); |
|
59 |
} |
|
60 |
} |
|
61 |
if (params == null) { println "no parameters. Aborting"; return; } |
|
47 |
try {project=projectBinding;MONITOR=monitor} catch (Exception) |
|
48 |
{ } |
|
49 |
if (project == null) { println "no project set. Aborting"; return; } |
|
62 | 50 |
|
63 |
String corpusname = params.getCorpusName(); |
|
64 |
|
|
65 |
Element corpusElem = params.corpora.get(corpusname); |
|
66 |
String basename = params.name; |
|
67 |
String rootDir = params.rootDir; |
|
68 |
String lang = corpusElem.getAttribute("lang"); |
|
51 |
String corpusname = project.getName(); |
|
52 |
String basename = corpusname |
|
53 |
String rootDir = project.getSrcdir(); |
|
54 |
String lang = project.getLang() |
|
69 | 55 |
String model = lang |
70 |
String encoding = corpusElem.getAttribute("encoding"); |
|
71 |
boolean annotate = "true" == corpusElem.getAttribute("annotate"); |
|
72 |
String xsl = params.getXsltElement(corpusElem).getAttribute("xsl") |
|
73 |
def xslParams = params.getXsltParams(corpusElem); |
|
56 |
String encoding = project.getEncoding() |
|
57 |
boolean annotate = project.getAnnotate() |
|
58 |
String xsl = project.getFrontXSL(); |
|
59 |
def xslParams = project.getXsltParameters(); |
|
60 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage() |
|
61 |
String page_element = project.getEditionDefinition("default").getPageElement() |
|
62 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition() |
|
74 | 63 |
|
75 | 64 |
File srcDir = new File(rootDir); |
76 |
File binDir = new File(Toolbox.getTxmHomePath(),"corpora/"+basename); |
|
77 |
binDir.deleteDir(); |
|
65 |
File binDir = project.getProjectDirectory(); |
|
78 | 66 |
binDir.mkdirs(); |
79 | 67 |
if (!binDir.exists()) { |
80 | 68 |
println "Could not create binDir "+binDir |
... | ... | |
119 | 107 |
c.setAnnotationSuccess(annotationSuccess) |
120 | 108 |
if (debug) c.setDebug(); |
121 | 109 |
c.setLang(lang); |
122 |
if (!c.run(binDir, txmDir, corpusname)) {
|
|
110 |
if (!c.run(project)) {
|
|
123 | 111 |
println "import process stopped"; |
124 | 112 |
return; |
125 | 113 |
} |
... | ... | |
135 | 123 |
def second = 0 |
136 | 124 |
|
137 | 125 |
println "Paginating texts: " |
138 |
for (File srcfile : filelist) {
|
|
126 |
for (File txmFile : filelist) {
|
|
139 | 127 |
print "." |
140 |
String txtname = srcfile.getName();
|
|
128 |
String txtname = txmFile.getName();
|
|
141 | 129 |
int i = txtname.lastIndexOf("."); |
142 | 130 |
if(i > 0) txtname = txtname.substring(0, i); |
143 | 131 |
|
144 | 132 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
145 | 133 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
146 | 134 |
|
147 |
Element text = params.addText(params.corpora.get(corpusname), txtname, srcfile); |
|
148 |
|
|
135 |
Text t = new Text(project); |
|
136 |
t.setName(txtname); |
|
137 |
t.setSourceFile(txmFile) |
|
138 |
t.setTXMFile(txmFile) |
|
149 | 139 |
def ed = new pager_old(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, 500, basename, "pb"); |
150 |
Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html"); |
|
151 |
|
|
140 |
Edition edition = new Edition(t); |
|
141 |
edition.setName("default"); |
|
142 |
edition.setIndex(outdir.getAbsolutePath()); |
|
152 | 143 |
for (i = 0 ; i < ed.getPageFiles().size();) { |
153 | 144 |
File f = ed.getPageFiles().get(i); |
154 | 145 |
String wordid = ed.getIdx().get(i); |
155 |
params.addPage(edition, ""+(++i), wordid);
|
|
146 |
edition.addPage(""+(++i), wordid);
|
|
156 | 147 |
} |
157 | 148 |
} |
158 | 149 |
|
159 | 150 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
160 |
File paramFile = new File(binDir, "import.xml"); |
|
161 |
DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true; |
|
151 |
|
|
152 |
readyToLoad = project.save(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivaLoader.groovy (revision 1115) | ||
---|---|---|
49 | 49 |
String userDir = System.getProperty("user.home"); |
50 | 50 |
|
51 | 51 |
def MONITOR; |
52 |
boolean debug = org.txm.utils.logger.Log.isPrintingErrors(); |
|
53 |
BaseParameters params; |
|
54 |
try {params = paramsBinding;MONITOR=monitor} catch (Exception) |
|
55 |
{ println "DEV MODE";//exception means we debug |
|
56 |
debug = true; |
|
57 |
params = new BaseParameters(new File(userDir, "xml/TESTS/factiva/import.xml")) |
|
58 |
params.load() |
|
59 |
if (!org.txm.Toolbox.isInitialized()) { |
|
52 |
Project project; |
|
60 | 53 |
|
61 |
Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM")); |
|
62 |
Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8"); |
|
63 |
Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ","); |
|
64 |
Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\""); |
|
65 |
Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM")); |
|
66 |
} |
|
67 |
} |
|
68 |
if (params == null) { println "no parameters. Aborting"; return; } |
|
54 |
try {project=projectBinding;MONITOR=monitor} catch (Exception) |
|
55 |
{ } |
|
56 |
if (project == null) { println "no project set. Aborting"; return; } |
|
69 | 57 |
|
70 |
String corpusname = params.getCorpusName(); |
|
71 |
Element corpusElem = params.corpora.get(corpusname); |
|
72 |
String basename = params.name; |
|
73 |
String rootDir = params.rootDir; |
|
74 |
String lang = corpusElem.getAttribute("lang"); |
|
58 |
String corpusname = project.getName(); |
|
59 |
String basename = corpusname |
|
60 |
String rootDir = project.getSrcdir(); |
|
61 |
String lang = project.getLang() |
|
75 | 62 |
String model = lang |
76 |
String encoding = corpusElem.getAttribute("encoding");
|
|
77 |
boolean annotate = "true" == corpusElem.getAttribute("annotate");
|
|
78 |
String xsl = params.getXsltElement(corpusElem).getAttribute("xsl")
|
|
79 |
def xslParams = params.getXsltParams(corpusElem);
|
|
80 |
int wordsPerPage = params.getWordsPerPage("default")
|
|
81 |
String page_element = params.getPageElement("default")
|
|
82 |
boolean build_edition = params.getDoEdition("default")
|
|
63 |
String encoding = project.getEncoding()
|
|
64 |
boolean annotate = project.getAnnotate()
|
|
65 |
String xsl = project.getFrontXSL();
|
|
66 |
def xslParams = project.getXsltParameters();
|
|
67 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
|
|
68 |
String page_element = project.getEditionDefinition("default").getPageElement()
|
|
69 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
|
|
83 | 70 |
|
84 | 71 |
File srcDir = new File(rootDir); |
85 |
File binDir = new File(Toolbox.getTxmHomePath(),"corpora/"+basename); |
|
86 |
binDir.deleteDir(); |
|
72 |
File binDir = project.getProjectDirectory(); |
|
87 | 73 |
binDir.mkdirs(); |
88 | 74 |
if (!binDir.exists()) { |
89 | 75 |
println "Could not create binDir "+binDir |
... | ... | |
242 | 228 |
c.setOptions(textSortAttribute, normalizeMetadata); |
243 | 229 |
c.setAnnotationSuccess(annotationSuccess) |
244 | 230 |
c.setLang(lang); |
245 |
if (!c.run(binDir, txmDir, corpusname, null, srcfiles, metadatas)) { |
|
231 |
if (!c.run(project, binDir, txmDir, corpusname, null, srcfiles, metadatas)) {
|
|
246 | 232 |
println "import process stopped"; |
247 | 233 |
return; |
248 | 234 |
} |
... | ... | |
273 | 259 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
274 | 260 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
275 | 261 |
|
276 |
Element text = params.addText(corpusElem, txtname, txmFile); |
|
262 |
Text t = new Text(project); |
|
263 |
t.setName(txtname); |
|
264 |
t.setSourceFile(txmFile) |
|
265 |
t.setTXMFile(txmFile) |
|
277 | 266 |
|
278 | 267 |
def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element); |
279 |
Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html"); |
|
268 |
Edition edition = new Edition(t); |
|
269 |
edition.setName("default"); |
|
270 |
edition.setIndex(outdir.getAbsolutePath()); |
|
280 | 271 |
|
281 | 272 |
for (i = 0 ; i < ed.getPageFiles().size();) { |
282 | 273 |
File f = ed.getPageFiles().get(i); |
283 | 274 |
String wordid = ed.getIdx().get(i); |
284 |
params.addPage(edition, ""+(++i), wordid);
|
|
275 |
edition.addPage(""+(++i), wordid);
|
|
285 | 276 |
} |
286 | 277 |
} |
287 | 278 |
} |
288 | 279 |
|
289 | 280 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
290 |
File paramFile = new File(binDir, "import.xml"); |
|
291 |
DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true; |
|
281 |
|
|
282 |
readyToLoad = project.save(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 1115) | ||
---|---|---|
67 | 67 |
String userDir = System.getProperty("user.home"); |
68 | 68 |
|
69 | 69 |
def MONITOR; |
70 |
boolean debug = org.txm.utils.logger.Log.isPrintingErrors(); |
|
71 |
BaseParameters params; |
|
72 |
try {params = paramsBinding;MONITOR=monitor} catch (Exception) |
|
73 |
{ println "DEV MODE";//exception means we debug |
|
74 |
debug = true |
|
75 |
params = new BaseParameters(new File(userDir, "xml/anapovoas/import.xml")) |
|
76 |
params.load() |
|
77 |
if (!org.txm.Toolbox.isInitialized()) { |
|
78 |
|
|
79 |
Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM")); |
|
80 |
Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models")); |
|
81 |
Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8"); |
|
82 |
Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, "\t"); |
|
83 |
Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, ""); |
|
84 |
Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM")); |
|
85 |
} |
|
70 |
Project project; |
|
71 |
|
|
72 |
try {project=projectBinding;MONITOR=monitor} catch (Exception) |
|
73 |
{ } |
|
74 |
if (project == null) { println "no project set. Aborting"; return; } |
|
75 |
|
|
76 |
String corpusname = project.getName(); |
|
77 |
String basename = corpusname |
|
78 |
String rootDir = project.getSrcdir(); |
|
79 |
String lang = project.getLang() |
|
80 |
String model = lang |
|
81 |
String encoding = project.getEncoding() |
|
82 |
boolean annotate = project.getAnnotate() |
|
83 |
String xsl = project.getFrontXSL(); |
|
84 |
def xslParams = project.getXsltParameters(); |
|
85 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage() |
|
86 |
String page_element = project.getEditionDefinition("default").getPageElement() |
|
87 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition() |
|
88 |
|
|
89 |
File srcDir = new File(rootDir); |
|
90 |
File binDir = project.getProjectDirectory(); |
|
91 |
binDir.mkdirs(); |
|
92 |
if (!binDir.exists()) { |
|
93 |
println "Could not create binDir "+binDir |
|
94 |
return; |
|
86 | 95 |
} |
87 |
if (params == null) { println "no parameters. Aborting"; return; } |
|
88 | 96 |
|
89 |
String corpusname = params.getCorpusName(); |
|
90 |
Element corpusElem = params.corpora.get(corpusname); |
|
91 |
String basename = params.name; |
|
92 |
String rootDir = params.rootDir; |
|
93 |
String lang = corpusElem.getAttribute("lang"); |
|
94 |
String model = lang |
|
95 |
String encoding = corpusElem.getAttribute("encoding"); |
|
96 |
boolean annotate = "true" == corpusElem.getAttribute("annotate"); |
|
97 |
String xsl = params.getXsltElement(corpusElem).getAttribute("xsl") |
|
98 |
def xslParams = params.getXsltParams(corpusElem); |
|
99 |
int wordsPerPage = params.getWordsPerPage("default") |
|
100 |
String page_element = params.getPageElement("default") |
|
101 |
boolean build_edition = params.getDoEdition("default") |
|
102 |
|
|
103 |
File srcDir = new File(rootDir); |
|
104 |
File binDir = new File(Toolbox.getTxmHomePath(),"corpora/"+basename); |
|
105 |
binDir.deleteDir(); |
|
106 |
binDir.mkdirs(); |
|
107 |
if (!binDir.exists()) { |
|
108 |
println "Could not create binDir "+binDir |
|
109 |
return; |
|
110 |
} |
|
111 |
|
|
112 | 97 |
File txmDir = new File(binDir,"txm/$corpusname"); |
113 | 98 |
txmDir.deleteDir(); |
114 | 99 |
txmDir.mkdirs(); |
... | ... | |
123 | 108 |
println "Error: could not create a copy of metadata file "+allMetadataFile.getAbsoluteFile(); |
124 | 109 |
return; |
125 | 110 |
} |
126 |
metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(),
|
|
127 |
Toolbox.getMetadataColumnSeparator(),
|
|
128 |
Toolbox.getMetadataTextSeparator(), 1) |
|
111 |
metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(), |
|
112 |
Toolbox.getMetadataColumnSeparator(),
|
|
113 |
Toolbox.getMetadataTextSeparator(), 1)
|
|
129 | 114 |
} |
130 | 115 |
else |
131 | 116 |
println "no metadata file: "+allMetadataFile |
... | ... | |
156 | 141 |
println " ignored csvHeaderSize: "+csvHeaderNumber |
157 | 142 |
//println " includeComments: "+includeComments |
158 | 143 |
} |
144 |
|
|
145 |
|
|
159 | 146 |
|
160 | 147 |
// Apply XSL |
161 | 148 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
... | ... | |
235 | 222 |
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE") |
236 | 223 |
println "-- ANNOTATE - Running NLP tools" |
237 | 224 |
boolean annotationSuccess = false; |
238 |
if (annotate) { |
|
239 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
240 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
|
241 |
annotationSuccess = true; |
|
242 |
} |
|
243 |
} |
|
225 |
if (annotate) {
|
|
226 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
|
227 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
|
228 |
annotationSuccess = true;
|
|
229 |
}
|
|
230 |
}
|
|
244 | 231 |
|
245 | 232 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
246 | 233 |
if (MONITOR != null) MONITOR.worked(25, "COMPILING") |
... | ... | |
252 | 239 |
comp.removeInterviewers(removeInterviewer); |
253 | 240 |
comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata); |
254 | 241 |
comp.setAnnotationSucces(annotationSuccess) |
255 |
if (!comp.run(trsfiles, corpusname, "default", binDir)) { |
|
242 |
if (!comp.run(project, trsfiles, corpusname, "default", binDir)) {
|
|
256 | 243 |
println "Failed to compile files"; |
257 | 244 |
return; |
258 | 245 |
} |
259 |
|
|
246 |
|
|
260 | 247 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
261 |
|
|
248 |
|
|
262 | 249 |
File htmlDir = new File(binDir,"HTML/$corpusname"); |
263 | 250 |
htmlDir.deleteDir() |
264 | 251 |
htmlDir.mkdirs(); |
265 | 252 |
if (build_edition) { |
266 |
|
|
253 |
|
|
267 | 254 |
if (MONITOR != null) MONITOR.worked(20, "EDITION") |
268 | 255 |
println "-- EDITION - Building editions" |
269 |
|
|
256 |
|
|
270 | 257 |
List<File> filelist = txmDir.listFiles(); |
271 | 258 |
Collections.sort(filelist); |
272 | 259 |
def second = 0 |
... | ... | |
281 | 268 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
282 | 269 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
283 | 270 |
|
284 |
Element text = params.addText(corpusElem, txtname, txmFile); |
|
271 |
Text t = new Text(project); |
|
272 |
t.setName(txtname); |
|
273 |
t.setSourceFile(txmFile) |
|
274 |
t.setTXMFile(txmFile) |
|
285 | 275 |
|
286 | 276 |
def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas); |
287 |
Element edition = params.addEdition(text, "default", htmlDir.getAbsolutePath(), "html"); |
|
288 |
|
|
277 |
Edition edition = new Edition(t); |
|
278 |
edition.setName("default"); |
|
279 |
edition.setIndex(htmlDir.getAbsolutePath()); |
|
289 | 280 |
for (i = 0 ; i < ed.getPageFiles().size();) { |
290 | 281 |
File f = ed.getPageFiles().get(i); |
291 | 282 |
String wordid = ed.getIdx().get(i); |
292 |
params.addPage(edition, ""+(++i), wordid);
|
|
283 |
edition.addPage(""+(++i), wordid);
|
|
293 | 284 |
} |
294 |
|
|
295 |
if (ed.getPageFiles().size() > 0) { |
|
296 |
Element editionBD = params.addEdition(text, "onepage", htmlDir.getAbsolutePath(), "html"); |
|
297 |
params.addPage(editionBD, "1", ed.getIndexes().get(0)); |
|
298 |
} |
|
299 | 285 |
} |
300 | 286 |
|
301 | 287 |
//copy transcriber.css |
... | ... | |
311 | 297 |
|
312 | 298 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
313 | 299 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
314 |
File paramFile = new File(binDir, "import.xml"); |
|
315 |
DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true; |
|
300 |
readyToLoad = project.save(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 1115) | ||
---|---|---|
33 | 33 |
|
34 | 34 |
import java.io.OutputStreamWriter; |
35 | 35 |
import java.util.LinkedHashMap; |
36 |
|
|
36 |
import org.txm.objects.* |
|
37 |
import org.txm.searchengine.cqp.corpus.* |
|
37 | 38 |
import org.txm.importer.cwb.CwbEncode |
38 | 39 |
import org.txm.importer.cwb.CwbMakeAll |
39 | 40 |
import org.txm.importer.cwb.CwbProcess; |
40 | 41 |
import org.txm.importer.cwb.PatchCwbRegistry; |
41 | 42 |
import org.txm.utils.Pair; |
42 | 43 |
|
43 |
// TODO: Auto-generated Javadoc |
|
44 | 44 |
/** |
45 | 45 |
* The Class compiler. |
46 | 46 |
*/ |
... | ... | |
109 | 109 |
this.removeinterviewers = value; |
110 | 110 |
} |
111 | 111 |
|
112 |
File cqpFile |
|
112 | 113 |
/** |
113 | 114 |
* Run. |
114 | 115 |
* |
... | ... | |
118 | 119 |
* @param outdir the outdir |
119 | 120 |
* @return true, if successful |
120 | 121 |
*/ |
121 |
public boolean run(List<File> xmlfiles, String corpusname, String projectname, File binDir) |
|
122 |
{ |
|
122 |
public boolean run(Project project, List<File> xmlfiles, String corpusname, String projectname, File binDir) { |
|
123 | 123 |
Collections.sort(xmlfiles); |
124 | 124 |
//println "run compiler with $xmlfiles, $basename and $outdir" |
125 | 125 |
this.outdir = binDir; |
... | ... | |
131 | 131 |
|
132 | 132 |
sectionAttrs = new HashSet<String>() // reset section attributs set |
133 | 133 |
|
134 |
File cqpDir = new File(binDir, "cqp") |
|
135 |
new File(binDir, "cqp").delete(); |
|
136 |
cqpDir.mkdir(); |
|
137 |
if (!new File(binDir, "cqp").exists()) { |
|
138 |
println "Can't create cqp directory" |
|
139 |
return false; |
|
134 |
CorpusBuild corpus = project.getCorpusBuild(project.getName()); |
|
135 |
if (corpus != null) { |
|
136 |
//println "CLEAN PREVIOUS CORPUS" |
|
137 |
corpus.delete(); // remove old files |
|
140 | 138 |
} |
139 |
|
|
140 |
// make new one |
|
141 |
corpus = new MainCorpus(project); |
|
142 |
corpus.setID(project.getName()); |
|
143 |
corpus.setName(project.getName()); |
|
144 |
corpus.setDescription("Built with the TXT+CSV import module"); |
|
145 |
|
|
146 |
cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
|
147 |
new File(binDir,"cqp").mkdirs() |
|
148 |
new File(binDir,"data").mkdirs() |
|
149 |
new File(binDir,"registry").mkdirs() |
|
141 | 150 |
|
142 |
File cqpFile = new File(cqpDir, corpusname.toLowerCase()+".cqp") |
|
143 |
|
|
144 | 151 |
// get all anatypes |
145 | 152 |
for (File f : xmlfiles) { |
146 | 153 |
getAnaTypes(f) |
... | ... | |
223 | 230 |
return false; |
224 | 231 |
} |
225 | 232 |
cwbMa.run(corpusname, registryFile.getParent()); |
226 |
|
|
227 | 233 |
|
228 | 234 |
} catch (Exception ex) {System.out.println(ex); return false;} |
229 | 235 |
|
230 |
|
|
231 | 236 |
return true; |
232 | 237 |
} |
233 | 238 |
|
... | ... | |
311 | 316 |
String filename = xmlfile.getName() |
312 | 317 |
String textid = filename.substring(0, filename.length() - 4); |
313 | 318 |
|
314 |
createOutput(new File(outdir, "cqp/"+corpusname.toLowerCase()+".cqp"));
|
|
319 |
createOutput(cqpFile);
|
|
315 | 320 |
String localname; |
316 | 321 |
|
317 | 322 |
//get all metadatas declared before Episode tag |
Formats disponibles : Unified diff