Révision 945
| tmp/org.txm.statsengine.r.core/src/org/txm/statsengine/r/core/StartRserve.java (revision 945) | ||
|---|---|---|
| 162 | 162 |
Thread.sleep(200); |
| 163 | 163 |
} catch (InterruptedException ix) { }
|
| 164 | 164 |
|
| 165 |
int attempts = 20;
|
|
| 165 |
int attempts = 10;
|
|
| 166 | 166 |
while (attempts > 0) {
|
| 167 | 167 |
try {
|
| 168 | 168 |
System.out.print("."); //$NON-NLS-1$
|
| ... | ... | |
| 171 | 171 |
return true; |
| 172 | 172 |
} catch (Exception e2) {
|
| 173 | 173 |
try {
|
| 174 |
Thread.sleep(2000);
|
|
| 174 |
Thread.sleep(1500);
|
|
| 175 | 175 |
} catch (InterruptedException ix) { }
|
| 176 | 176 |
} |
| 177 | 177 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/discours/importer.groovy (revision 945) | ||
|---|---|---|
| 98 | 98 |
println "Error: could not create a copy of metadata file "+csvfile.getAbsoluteFile(); |
| 99 | 99 |
return; |
| 100 | 100 |
} |
| 101 |
metadatas = new Metadatas(copy, Toolbox.getPreference(TBXPreferences.METADATA_ENCODING), Toolbox.getPreference(Toolbox.METADATA_COLSEPARATOR), Toolbox.getPreference(Toolbox.METADATA_TXTSEPARATOR), 1) |
|
| 101 |
metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(), |
|
| 102 |
Toolbox.getMetadataColumnSeparator(), |
|
| 103 |
Toolbox.getMetadataTextSeparator(), 1) |
|
| 102 | 104 |
} else {
|
| 103 | 105 |
println "No metadata file: "+csvfile |
| 104 | 106 |
println "Aborting" |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/transcriber/transcriberLoader.groovy (revision 945) | ||
|---|---|---|
| 122 | 122 |
println "Error: could not create a copy of metadata file "+allmetadatasfile.getAbsoluteFile(); |
| 123 | 123 |
return; |
| 124 | 124 |
} |
| 125 |
metadatas = new Metadatas(copy, Toolbox.getPreference(Toolbox.METADATA_ENCODING), Toolbox.getPreference(Toolbox.METADATA_COLSEPARATOR), Toolbox.getPreference(Toolbox.METADATA_TXTSEPARATOR), 1) |
|
| 125 |
metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(), |
|
| 126 |
Toolbox.getMetadataColumnSeparator(), |
|
| 127 |
Toolbox.getMetadataTextSeparator(), 1) |
|
| 126 | 128 |
} |
| 127 | 129 |
else |
| 128 | 130 |
println "no metadata file: "+allmetadatasfile |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/txt/txtLoader.groovy (revision 945) | ||
|---|---|---|
| 97 | 97 |
println "Error: could not create a copy of metadata file "+allmetadatasfile.getAbsoluteFile(); |
| 98 | 98 |
return; |
| 99 | 99 |
} |
| 100 |
metadatas = new Metadatas(copy, Toolbox.getPreference(Toolbox.METADATA_ENCODING), Toolbox.getPreference(Toolbox.METADATA_COLSEPARATOR), Toolbox.getPreference(Toolbox.METADATA_TXTSEPARATOR), 1) |
|
| 100 |
metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(), |
|
| 101 |
Toolbox.getMetadataColumnSeparator(), |
|
| 102 |
Toolbox.getMetadataTextSeparator(), 1) |
|
| 101 | 103 |
} else {
|
| 102 | 104 |
println "No metadata file: "+allmetadatasfile |
| 103 | 105 |
} |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/Annotater.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
|
|
| 5 |
|
|
| 6 |
/** |
|
| 7 |
* |
|
| 8 |
* Takes the XML-TXM files and wrap a TAL Tool to update the XML-TXM files |
|
| 9 |
* |
|
| 10 |
* @author mdecorde |
|
| 11 |
* |
|
| 12 |
*/ |
|
| 13 |
public abstract class Annotater extends ImportStep {
|
|
| 14 |
|
|
| 15 |
public Annotater(ImportModule module) {
|
|
| 16 |
super(module); |
|
| 17 |
|
|
| 18 |
inputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.corpusName); |
|
| 19 |
outputDirectory = new File(module.getBinaryDirectory(), "txm"); |
|
| 20 |
} |
|
| 21 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/Pager.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.util.ArrayList; |
|
| 5 |
|
|
| 6 |
import org.txm.utils.DeleteDir; |
|
| 7 |
|
|
| 8 |
/** |
|
| 9 |
* Takes the XML-TXM files and build an edition |
|
| 10 |
* |
|
| 11 |
* @author mdecorde |
|
| 12 |
* |
|
| 13 |
*/ |
|
| 14 |
public class Pager extends ImportStep {
|
|
| 15 |
|
|
| 16 |
protected File htmlDirectory; |
|
| 17 |
protected String corpusname; |
|
| 18 |
protected ArrayList<File> files; |
|
| 19 |
|
|
| 20 |
public Pager(ImportModule module, String editionName) {
|
|
| 21 |
super(module); |
|
| 22 |
|
|
| 23 |
corpusname = module.getCorpusName(); |
|
| 24 |
|
|
| 25 |
inputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.getCorpusName()); |
|
| 26 |
htmlDirectory = new File(module.getBinaryDirectory(), "HTML/"+corpusname); |
|
| 27 |
outputDirectory = new File(htmlDirectory, editionName); |
|
| 28 |
|
|
| 29 |
if (!module.isUpdatingCorpus()) {
|
|
| 30 |
DeleteDir.deleteDirectory(outputDirectory); |
|
| 31 |
outputDirectory.mkdirs(); |
|
| 32 |
} |
|
| 33 |
} |
|
| 34 |
|
|
| 35 |
@Override |
|
| 36 |
public void cancel() {
|
|
| 37 |
// TODO Auto-generated method stub |
|
| 38 |
|
|
| 39 |
} |
|
| 40 |
|
|
| 41 |
@Override |
|
| 42 |
public void process() {
|
|
| 43 |
process(null); // no default files order set |
|
| 44 |
} |
|
| 45 |
|
|
| 46 |
public void process(ArrayList<File> files) {
|
|
| 47 |
this.files = files; |
|
| 48 |
} |
|
| 49 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/Importer.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
|
|
| 5 |
/** |
|
| 6 |
* Takes any form of source files |
|
| 7 |
* |
|
| 8 |
* After this step, the XML-TXM files are created. |
|
| 9 |
* |
|
| 10 |
* they are validated before continuing |
|
| 11 |
* @author mdecorde |
|
| 12 |
* |
|
| 13 |
*/ |
|
| 14 |
public abstract class Importer extends ImportStep {
|
|
| 15 |
|
|
| 16 |
public Importer(ImportModule module) {
|
|
| 17 |
super(module); |
|
| 18 |
inputDirectory = module.getSourceDirectory(); |
|
| 19 |
outputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.getCorpusName()); |
|
| 20 |
outputDirectory.mkdirs(); |
|
| 21 |
} |
|
| 22 |
|
|
| 23 |
public abstract void checkFiles(); |
|
| 24 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/ImportKeys.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
public class ImportKeys {
|
|
| 4 |
|
|
| 5 |
public static final String CLEAN = "clean.directories"; |
|
| 6 |
public static final String TTMODEL = "annotate.model"; |
|
| 7 |
public static final String TTANNOTATE = "annotate.run"; |
|
| 8 |
public static final String LANG = "lang"; |
|
| 9 |
|
|
| 10 |
public static final String MULTITHREAD = "multithread"; |
|
| 11 |
public static final String DEBUG = "debug"; |
|
| 12 |
public static final String UPDATECORPUS = "corpus.update"; |
|
| 13 |
|
|
| 14 |
public static final String NORMALISEANAVALUES = "normalize.ana.values"; |
|
| 15 |
public static final String NORMALISEATTRIBUTEVALUES = "normalize.attribute.values"; |
|
| 16 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/ImportStep.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.util.HashMap; |
|
| 5 |
|
|
| 6 |
/** |
|
| 7 |
* One of the step of an import module |
|
| 8 |
* |
|
| 9 |
* @author mdecorde |
|
| 10 |
* |
|
| 11 |
*/ |
|
| 12 |
public abstract class ImportStep {
|
|
| 13 |
|
|
| 14 |
protected File inputDirectory, outputDirectory; |
|
| 15 |
protected ImportModule module; |
|
| 16 |
|
|
| 17 |
protected HashMap<String, Object> stepProperties = new HashMap<String, Object>(); |
|
| 18 |
protected boolean isSuccessFul = false; |
|
| 19 |
protected String reason = "not set."; |
|
| 20 |
protected boolean stopAtFirstError = true; |
|
| 21 |
protected boolean debug = true; |
|
| 22 |
|
|
| 23 |
public ImportStep(ImportModule module) {
|
|
| 24 |
this.module = module; |
|
| 25 |
debug = module.debug; |
|
| 26 |
} |
|
| 27 |
|
|
| 28 |
public File getInputDirectory() {
|
|
| 29 |
return inputDirectory; |
|
| 30 |
} |
|
| 31 |
|
|
| 32 |
public File getOutputDirectory() {
|
|
| 33 |
return outputDirectory; |
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
public ImportModule getImportModule() {
|
|
| 37 |
return module; |
|
| 38 |
} |
|
| 39 |
|
|
| 40 |
public boolean isSuccessFul() {
|
|
| 41 |
return isSuccessFul; |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
public String getReason() {
|
|
| 45 |
return reason; |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
/** |
|
| 49 |
* Called when a step is interrupted to clean streams and stuff |
|
| 50 |
*/ |
|
| 51 |
public abstract void cancel(); |
|
| 52 |
|
|
| 53 |
public abstract void process(); |
|
| 54 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/Compiler.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.util.ArrayList; |
|
| 5 |
|
|
| 6 |
import org.txm.utils.DeleteDir; |
|
| 7 |
|
|
| 8 |
/** |
|
| 9 |
* Takes XML-TXM files, build the CQP files and call cwb utils |
|
| 10 |
* |
|
| 11 |
* @author mdecorde |
|
| 12 |
* |
|
| 13 |
*/ |
|
| 14 |
public class Compiler extends ImportStep {
|
|
| 15 |
|
|
| 16 |
protected File cqpDirectory, registryDirectory, dataDirectory; |
|
| 17 |
protected ArrayList<File> files; |
|
| 18 |
|
|
| 19 |
/** |
|
| 20 |
* Creates the output directories |
|
| 21 |
* |
|
| 22 |
* @param module |
|
| 23 |
*/ |
|
| 24 |
public Compiler(ImportModule module) {
|
|
| 25 |
super(module); |
|
| 26 |
|
|
| 27 |
inputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.getCorpusName()); |
|
| 28 |
cqpDirectory = new File(module.getBinaryDirectory(), "cqp"); |
|
| 29 |
outputDirectory = new File(module.getBinaryDirectory(), "data"); |
|
| 30 |
registryDirectory = new File(module.getBinaryDirectory(), "registry"); |
|
| 31 |
dataDirectory = new File(outputDirectory, module.getCorpusName()); |
|
| 32 |
|
|
| 33 |
DeleteDir.deleteDirectory(outputDirectory); |
|
| 34 |
outputDirectory.mkdirs(); |
|
| 35 |
|
|
| 36 |
DeleteDir.deleteDirectory(dataDirectory); |
|
| 37 |
dataDirectory.mkdirs(); |
|
| 38 |
|
|
| 39 |
DeleteDir.deleteDirectory(registryDirectory); |
|
| 40 |
registryDirectory.mkdirs(); |
|
| 41 |
|
|
| 42 |
if (!module.isUpdatingCorpus()) {
|
|
| 43 |
DeleteDir.deleteDirectory(cqpDirectory); |
|
| 44 |
cqpDirectory.mkdir(); |
|
| 45 |
} |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
@Override |
|
| 49 |
public void cancel() {
|
|
| 50 |
// TODO Auto-generated method stub |
|
| 51 |
} |
|
| 52 |
|
|
| 53 |
@Override |
|
| 54 |
public void process() {
|
|
| 55 |
process(null); // no default files order set |
|
| 56 |
} |
|
| 57 |
|
|
| 58 |
public void process(ArrayList<File> files) {
|
|
| 59 |
this.files = files; |
|
| 60 |
} |
|
| 61 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/Step.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
public class Step {
|
|
| 4 |
|
|
| 5 |
public Step() {
|
|
| 6 |
// TODO Auto-generated constructor stub |
|
| 7 |
} |
|
| 8 |
|
|
| 9 |
public boolean process() {
|
|
| 10 |
return true; |
|
| 11 |
} |
|
| 12 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xtz/ImportModule.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.io.FileFilter; |
|
| 5 |
import java.util.ArrayList; |
|
| 6 |
import java.util.Arrays; |
|
| 7 |
import java.util.Collections; |
|
| 8 |
import java.util.logging.Level; |
|
| 9 |
|
|
| 10 |
import org.txm.Toolbox; |
|
| 11 |
import org.txm.core.preferences.TBXPreferences; |
|
| 12 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
| 13 |
import org.txm.utils.xml.DomUtils; |
|
| 14 |
import org.txm.objects.BaseParameters; |
|
| 15 |
import org.txm.utils.DeleteDir; |
|
| 16 |
import org.txm.utils.logger.Log; |
|
| 17 |
|
|
| 18 |
public class ImportModule {
|
|
| 19 |
|
|
| 20 |
public BaseParameters importParameters; |
|
| 21 |
|
|
| 22 |
public String corpusVersionProduced; |
|
| 23 |
|
|
| 24 |
public File sourceDirectory; |
|
| 25 |
public File binaryDirectory; |
|
| 26 |
|
|
| 27 |
public Importer importer; |
|
| 28 |
public Annotater annotater; |
|
| 29 |
public Compiler compiler; |
|
| 30 |
public Pager pager; |
|
| 31 |
|
|
| 32 |
/** |
|
| 33 |
* set the variable to false to stop the import process at next step |
|
| 34 |
*/ |
|
| 35 |
public boolean isSuccessful = true; |
|
| 36 |
public String reason = "none"; |
|
| 37 |
public boolean debug = false; |
|
| 38 |
public boolean multithread = false; |
|
| 39 |
public boolean updateCorpus = false; |
|
| 40 |
public String corpusName; |
|
| 41 |
|
|
| 42 |
IProgressMonitor monitor; |
|
| 43 |
|
|
| 44 |
public void setMonitor(IProgressMonitor monitor) {
|
|
| 45 |
this.monitor = monitor; |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
|
|
| 49 |
public boolean isMultiThread() {
|
|
| 50 |
return multithread; |
|
| 51 |
} |
|
| 52 |
|
|
| 53 |
public boolean isDebugging() {
|
|
| 54 |
return debug; |
|
| 55 |
} |
|
| 56 |
|
|
| 57 |
public ImportModule(File importParametersFile) {
|
|
| 58 |
try {
|
|
| 59 |
BaseParameters b = new BaseParameters(importParametersFile); |
|
| 60 |
init(b); |
|
| 61 |
} catch (Exception e) {
|
|
| 62 |
e.printStackTrace(); |
|
| 63 |
} |
|
| 64 |
} |
|
| 65 |
|
|
| 66 |
public ImportModule(BaseParameters p) {
|
|
| 67 |
init(p); |
|
| 68 |
} |
|
| 69 |
|
|
| 70 |
public boolean isUpdatingCorpus() {
|
|
| 71 |
return updateCorpus; |
|
| 72 |
} |
|
| 73 |
|
|
| 74 |
protected void init(BaseParameters p) {
|
|
| 75 |
this.importParameters = p; |
|
| 76 |
this.importParameters.load(); |
|
| 77 |
corpusName = importParameters.name; |
|
| 78 |
//this.debug = "true".equals(importParameters.getKeyValueParameters().get(ImportKeys.DEBUG)); |
|
| 79 |
|
|
| 80 |
if (Log.getLevel().intValue() < Level.WARNING.intValue()) {
|
|
| 81 |
debug = true; |
|
| 82 |
} |
|
| 83 |
this.multithread = "true".equals(importParameters.getKeyValueParameters().get(ImportKeys.MULTITHREAD)); |
|
| 84 |
this.updateCorpus = "true".equals(importParameters.getKeyValueParameters().get(ImportKeys.UPDATECORPUS)); |
|
| 85 |
|
|
| 86 |
|
|
| 87 |
this.sourceDirectory = importParameters.paramFile.getParentFile(); |
|
| 88 |
this.binaryDirectory = new File(Toolbox.getTxmHomePath(), "corpora/"+corpusName.toUpperCase()); |
|
| 89 |
|
|
| 90 |
if (!updateCorpus) { // clean directories only if it's a new import
|
|
| 91 |
DeleteDir.deleteDirectory(binaryDirectory); |
|
| 92 |
binaryDirectory.mkdir(); |
|
| 93 |
|
|
| 94 |
File txmDir = new File(binaryDirectory, "txm"); |
|
| 95 |
txmDir.mkdir(); |
|
| 96 |
} |
|
| 97 |
} |
|
| 98 |
|
|
| 99 |
public void start() throws InterruptedException {
|
|
| 100 |
|
|
| 101 |
binaryDirectory.mkdirs(); // ensure output exists |
|
| 102 |
//System.out.println("ImportModule.start");
|
|
| 103 |
if (!updateCorpus) { // create XML-TXM files and annotate
|
|
| 104 |
//System.out.println("ImportModule.start: not updating");
|
|
| 105 |
if (importer != null) {
|
|
| 106 |
//System.out.println("ImportModule.start: importer: "+importer);
|
|
| 107 |
if (monitor != null) System.out.println("-- IMPORTER - Reading source files");
|
|
| 108 |
importer.process(); |
|
| 109 |
//importer.checkFiles(); |
|
| 110 |
isSuccessful = isSuccessful & importer.isSuccessFul(); |
|
| 111 |
if (!isSuccessful) {
|
|
| 112 |
System.out.println("Error while importing corpus during 'importer' step, reason="+importer.getReason());
|
|
| 113 |
return; |
|
| 114 |
} |
|
| 115 |
} else {
|
|
| 116 |
System.out.println("XML-TXM files already produced in "+new File(binaryDirectory, "txm/"+corpusName));
|
|
| 117 |
} |
|
| 118 |
|
|
| 119 |
boolean annotate = "true".equals(importParameters.getCorpusElement().getAttribute("annotate"));
|
|
| 120 |
if (annotate && annotater != null) {
|
|
| 121 |
if (monitor != null) System.out.println("-- ANNOTATE - Running NLP tools");
|
|
| 122 |
annotater.process(); |
|
| 123 |
isSuccessful = isSuccessful & annotater.isSuccessFul(); |
|
| 124 |
if (!isSuccessful) {
|
|
| 125 |
System.out.println("Error while importing corpus during 'annotate' step, reason="+annotater.getReason());
|
|
| 126 |
return; |
|
| 127 |
} |
|
| 128 |
} else {
|
|
| 129 |
//System.out.println("XML-TXM files already annotated.");
|
|
| 130 |
} |
|
| 131 |
} else {
|
|
| 132 |
System.out.println("Updating corpus...");
|
|
| 133 |
} |
|
| 134 |
|
|
| 135 |
//System.out.println("GET FILES ORDER");
|
|
| 136 |
final ArrayList<File> files = getTXMFilesOrder(); |
|
| 137 |
|
|
| 138 |
Thread Tcompiler = new Thread() {
|
|
| 139 |
public void run() {
|
|
| 140 |
if (compiler != null) {
|
|
| 141 |
if (monitor != null) System.out.println("-- COMPILING - Building Search Engine indexes");
|
|
| 142 |
compiler.process(files); |
|
| 143 |
isSuccessful = isSuccessful & compiler.isSuccessFul(); |
|
| 144 |
if (!isSuccessful) {
|
|
| 145 |
System.out.println("Error while importing corpus during 'compiler' step, reason="+compiler.getReason());
|
|
| 146 |
return; |
|
| 147 |
} |
|
| 148 |
} else {
|
|
| 149 |
System.out.println("No CQP index created.");
|
|
| 150 |
} |
|
| 151 |
} |
|
| 152 |
}; |
|
| 153 |
|
|
| 154 |
Thread Tpager = new Thread() {
|
|
| 155 |
public void run() {
|
|
| 156 |
|
|
| 157 |
if (pager != null) {
|
|
| 158 |
if (monitor != null) System.out.println("-- EDITION - Building edition");
|
|
| 159 |
pager.process(files); |
|
| 160 |
isSuccessful = isSuccessful & pager.isSuccessFul(); |
|
| 161 |
if (!isSuccessful) {
|
|
| 162 |
System.out.println("Error while importing corpus during 'pager' step, reason="+pager.getReason());
|
|
| 163 |
return; |
|
| 164 |
} |
|
| 165 |
} else {
|
|
| 166 |
System.out.println("No edition produced.");
|
|
| 167 |
} |
|
| 168 |
} |
|
| 169 |
}; |
|
| 170 |
|
|
| 171 |
Tcompiler.start(); |
|
| 172 |
if (!multithread) {
|
|
| 173 |
Tcompiler.join(); // wait for the end if not multithreaded |
|
| 174 |
if (!isSuccessful) { // don't call pager is compiler step failed
|
|
| 175 |
return; |
|
| 176 |
} |
|
| 177 |
} |
|
| 178 |
|
|
| 179 |
Tpager.start(); |
|
| 180 |
if (multithread) Tcompiler.join(); // wait for both thread to end |
|
| 181 |
Tpager.join(); |
|
| 182 |
} |
|
| 183 |
|
|
| 184 |
protected ArrayList<File> getTXMFilesOrder() {
|
|
| 185 |
//System.out.println("DEFAULT FILES ORDER");
|
|
| 186 |
File txmDirectory = new File(binaryDirectory, "txm/"+corpusName); |
|
| 187 |
ArrayList<File> files = new ArrayList<File>(Arrays.asList(txmDirectory.listFiles(new FileFilter() {
|
|
| 188 |
@Override |
|
| 189 |
public boolean accept(File file) {
|
|
| 190 |
return file.isFile() && file.getName().endsWith(".xml");
|
|
| 191 |
} |
|
| 192 |
}))); |
|
| 193 |
|
|
| 194 |
Collections.sort(files); |
|
| 195 |
|
|
| 196 |
return files; |
|
| 197 |
} |
|
| 198 |
|
|
| 199 |
|
|
| 200 |
public void end() {
|
|
| 201 |
File paramFile = new File(binaryDirectory, "import.xml"); |
|
| 202 |
try {
|
|
| 203 |
DomUtils.save(importParameters.root.getOwnerDocument(), paramFile); |
|
| 204 |
isSuccessful = true; |
|
| 205 |
} catch (Exception e) {
|
|
| 206 |
// TODO Auto-generated catch block |
|
| 207 |
e.printStackTrace(); |
|
| 208 |
isSuccessful = false; |
|
| 209 |
} |
|
| 210 |
} |
|
| 211 |
|
|
| 212 |
public String getCorpusName() {
|
|
| 213 |
return corpusName; |
|
| 214 |
} |
|
| 215 |
|
|
| 216 |
public String getReason() {
|
|
| 217 |
return reason; |
|
| 218 |
} |
|
| 219 |
|
|
| 220 |
public boolean isSuccessFul() {
|
|
| 221 |
return isSuccessful; |
|
| 222 |
} |
|
| 223 |
|
|
| 224 |
public BaseParameters getParameters() {
|
|
| 225 |
return importParameters; |
|
| 226 |
} |
|
| 227 |
|
|
| 228 |
public File getSourceDirectory() {
|
|
| 229 |
return sourceDirectory; |
|
| 230 |
} |
|
| 231 |
|
|
| 232 |
public File getBinaryDirectory() {
|
|
| 233 |
return binaryDirectory; |
|
| 234 |
} |
|
| 235 |
|
|
| 236 |
public void process() throws InterruptedException {
|
|
| 237 |
start(); |
|
| 238 |
if (isSuccessful) |
|
| 239 |
end(); |
|
| 240 |
} |
|
| 241 |
|
|
| 242 |
public static void main(String[] args) {
|
|
| 243 |
File importParametersFile = new File("/home/mdecorde/xml/brown/import.xml");
|
|
| 244 |
|
|
| 245 |
ImportModule module = new ImportModule(importParametersFile); |
|
| 246 |
System.out.println("Parameters: "+module.getParameters());
|
|
| 247 |
try {
|
|
| 248 |
module.start(); |
|
| 249 |
|
|
| 250 |
if (module.isSuccessful) {
|
|
| 251 |
System.out.println("Import sucessful. reloading corpora...");
|
|
| 252 |
} else {
|
|
| 253 |
System.out.println("Import failed, reason = "+module.getReason());
|
|
| 254 |
} |
|
| 255 |
} catch (Exception e) {
|
|
| 256 |
e.printStackTrace(); |
|
| 257 |
} |
|
| 258 |
} |
|
| 259 |
} |
|
| tmp/org.txm.core/src/java/org/txm/metadatas/Metadatas.java (revision 945) | ||
|---|---|---|
| 235 | 235 |
*/ |
| 236 | 236 |
public static boolean convertCsvToXml(File csvfile, File xmlFile, String encoding, String separator, String txtseparator, int nbheaderline) throws Exception |
| 237 | 237 |
{
|
| 238 |
if (separator == null || separator.length() == 0) {
|
|
| 239 |
separator = "\t"; |
|
| 240 |
} |
|
| 241 |
if (encoding == null || encoding.length() == 0) {
|
|
| 242 |
encoding = "UTF-8"; |
|
| 243 |
} |
|
| 238 | 244 |
xmlFile.createNewFile(); |
| 239 | 245 |
|
| 240 | 246 |
if(!csvfile.exists()) |
| tmp/org.txm.core/src/java/org/txm/Toolbox.java (revision 945) | ||
|---|---|---|
| 84 | 84 |
private static boolean state = false; |
| 85 | 85 |
|
| 86 | 86 |
public static Workspace workspace; |
| 87 |
|
|
| 88 | 87 |
|
| 89 | 88 |
/** |
| 90 | 89 |
* |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/ImportKeys.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
public class ImportKeys {
|
|
| 4 |
|
|
| 5 |
public static final String CLEAN = "clean.directories"; |
|
| 6 |
public static final String TTMODEL = "annotate.model"; |
|
| 7 |
public static final String TTANNOTATE = "annotate.run"; |
|
| 8 |
public static final String LANG = "lang"; |
|
| 9 |
|
|
| 10 |
public static final String MULTITHREAD = "multithread"; |
|
| 11 |
public static final String DEBUG = "debug"; |
|
| 12 |
public static final String UPDATECORPUS = "corpus.update"; |
|
| 13 |
|
|
| 14 |
public static final String NORMALISEANAVALUES = "normalize.ana.values"; |
|
| 15 |
public static final String NORMALISEATTRIBUTEVALUES = "normalize.attribute.values"; |
|
| 16 |
} |
|
| 0 | 17 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/ImportStep.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.util.HashMap; |
|
| 5 |
|
|
| 6 |
/** |
|
| 7 |
* One of the step of an import module |
|
| 8 |
* |
|
| 9 |
* @author mdecorde |
|
| 10 |
* |
|
| 11 |
*/ |
|
| 12 |
public abstract class ImportStep {
|
|
| 13 |
|
|
| 14 |
protected File inputDirectory, outputDirectory; |
|
| 15 |
protected ImportModule module; |
|
| 16 |
|
|
| 17 |
protected HashMap<String, Object> stepProperties = new HashMap<String, Object>(); |
|
| 18 |
protected boolean isSuccessFul = false; |
|
| 19 |
protected String reason = "not set."; |
|
| 20 |
protected boolean stopAtFirstError = true; |
|
| 21 |
protected boolean debug = true; |
|
| 22 |
|
|
| 23 |
public ImportStep(ImportModule module) {
|
|
| 24 |
this.module = module; |
|
| 25 |
debug = module.debug; |
|
| 26 |
} |
|
| 27 |
|
|
| 28 |
public File getInputDirectory() {
|
|
| 29 |
return inputDirectory; |
|
| 30 |
} |
|
| 31 |
|
|
| 32 |
public File getOutputDirectory() {
|
|
| 33 |
return outputDirectory; |
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
public ImportModule getImportModule() {
|
|
| 37 |
return module; |
|
| 38 |
} |
|
| 39 |
|
|
| 40 |
public boolean isSuccessFul() {
|
|
| 41 |
return isSuccessFul; |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
public String getReason() {
|
|
| 45 |
return reason; |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
/** |
|
| 49 |
* Called when a step is interrupted to clean streams and stuff |
|
| 50 |
*/ |
|
| 51 |
public abstract void cancel(); |
|
| 52 |
|
|
| 53 |
public abstract void process(); |
|
| 54 |
} |
|
| 0 | 55 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/Compiler.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.util.ArrayList; |
|
| 5 |
|
|
| 6 |
import org.txm.utils.DeleteDir; |
|
| 7 |
|
|
| 8 |
/** |
|
| 9 |
* Takes XML-TXM files, build the CQP files and call cwb utils |
|
| 10 |
* |
|
| 11 |
* @author mdecorde |
|
| 12 |
* |
|
| 13 |
*/ |
|
| 14 |
public class Compiler extends ImportStep {
|
|
| 15 |
|
|
| 16 |
protected File cqpDirectory, registryDirectory, dataDirectory; |
|
| 17 |
protected ArrayList<File> files; |
|
| 18 |
|
|
| 19 |
/** |
|
| 20 |
* Creates the output directories |
|
| 21 |
* |
|
| 22 |
* @param module |
|
| 23 |
*/ |
|
| 24 |
public Compiler(ImportModule module) {
|
|
| 25 |
super(module); |
|
| 26 |
|
|
| 27 |
inputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.getCorpusName()); |
|
| 28 |
cqpDirectory = new File(module.getBinaryDirectory(), "cqp"); |
|
| 29 |
outputDirectory = new File(module.getBinaryDirectory(), "data"); |
|
| 30 |
registryDirectory = new File(module.getBinaryDirectory(), "registry"); |
|
| 31 |
dataDirectory = new File(outputDirectory, module.getCorpusName()); |
|
| 32 |
|
|
| 33 |
DeleteDir.deleteDirectory(outputDirectory); |
|
| 34 |
outputDirectory.mkdirs(); |
|
| 35 |
|
|
| 36 |
DeleteDir.deleteDirectory(dataDirectory); |
|
| 37 |
dataDirectory.mkdirs(); |
|
| 38 |
|
|
| 39 |
DeleteDir.deleteDirectory(registryDirectory); |
|
| 40 |
registryDirectory.mkdirs(); |
|
| 41 |
|
|
| 42 |
if (!module.isUpdatingCorpus()) {
|
|
| 43 |
DeleteDir.deleteDirectory(cqpDirectory); |
|
| 44 |
cqpDirectory.mkdir(); |
|
| 45 |
} |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
@Override |
|
| 49 |
public void cancel() {
|
|
| 50 |
// TODO Auto-generated method stub |
|
| 51 |
} |
|
| 52 |
|
|
| 53 |
@Override |
|
| 54 |
public void process() {
|
|
| 55 |
process(null); // no default files order set |
|
| 56 |
} |
|
| 57 |
|
|
| 58 |
public void process(ArrayList<File> files) {
|
|
| 59 |
this.files = files; |
|
| 60 |
} |
|
| 61 |
} |
|
| 0 | 62 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/Annotater.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
|
|
| 5 |
|
|
| 6 |
/** |
|
| 7 |
* |
|
| 8 |
* Takes the XML-TXM files and wrap a TAL Tool to update the XML-TXM files |
|
| 9 |
* |
|
| 10 |
* @author mdecorde |
|
| 11 |
* |
|
| 12 |
*/ |
|
| 13 |
public abstract class Annotater extends ImportStep {
|
|
| 14 |
|
|
| 15 |
public Annotater(ImportModule module) {
|
|
| 16 |
super(module); |
|
| 17 |
|
|
| 18 |
inputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.corpusName); |
|
| 19 |
outputDirectory = new File(module.getBinaryDirectory(), "txm"); |
|
| 20 |
} |
|
| 21 |
} |
|
| 0 | 22 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/Step.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
public class Step {
|
|
| 4 |
|
|
| 5 |
public Step() {
|
|
| 6 |
// TODO Auto-generated constructor stub |
|
| 7 |
} |
|
| 8 |
|
|
| 9 |
public boolean process() {
|
|
| 10 |
return true; |
|
| 11 |
} |
|
| 12 |
} |
|
| 0 | 13 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/Pager.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.util.ArrayList; |
|
| 5 |
|
|
| 6 |
import org.txm.utils.DeleteDir; |
|
| 7 |
|
|
| 8 |
/** |
|
| 9 |
* Takes the XML-TXM files and build an edition |
|
| 10 |
* |
|
| 11 |
* @author mdecorde |
|
| 12 |
* |
|
| 13 |
*/ |
|
| 14 |
public class Pager extends ImportStep {
|
|
| 15 |
|
|
| 16 |
protected File htmlDirectory; |
|
| 17 |
protected String corpusname; |
|
| 18 |
protected ArrayList<File> files; |
|
| 19 |
|
|
| 20 |
public Pager(ImportModule module, String editionName) {
|
|
| 21 |
super(module); |
|
| 22 |
|
|
| 23 |
corpusname = module.getCorpusName(); |
|
| 24 |
|
|
| 25 |
inputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.getCorpusName()); |
|
| 26 |
htmlDirectory = new File(module.getBinaryDirectory(), "HTML/"+corpusname); |
|
| 27 |
outputDirectory = new File(htmlDirectory, editionName); |
|
| 28 |
|
|
| 29 |
if (!module.isUpdatingCorpus()) {
|
|
| 30 |
DeleteDir.deleteDirectory(outputDirectory); |
|
| 31 |
outputDirectory.mkdirs(); |
|
| 32 |
} |
|
| 33 |
} |
|
| 34 |
|
|
| 35 |
@Override |
|
| 36 |
public void cancel() {
|
|
| 37 |
// TODO Auto-generated method stub |
|
| 38 |
|
|
| 39 |
} |
|
| 40 |
|
|
| 41 |
@Override |
|
| 42 |
public void process() {
|
|
| 43 |
process(null); // no default files order set |
|
| 44 |
} |
|
| 45 |
|
|
| 46 |
public void process(ArrayList<File> files) {
|
|
| 47 |
this.files = files; |
|
| 48 |
} |
|
| 49 |
} |
|
| 0 | 50 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/ImportModule.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.io.FileFilter; |
|
| 5 |
import java.util.ArrayList; |
|
| 6 |
import java.util.Arrays; |
|
| 7 |
import java.util.Collections; |
|
| 8 |
import java.util.logging.Level; |
|
| 9 |
|
|
| 10 |
import org.txm.Toolbox; |
|
| 11 |
import org.txm.core.preferences.TBXPreferences; |
|
| 12 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
| 13 |
import org.txm.utils.xml.DomUtils; |
|
| 14 |
import org.txm.objects.BaseParameters; |
|
| 15 |
import org.txm.utils.DeleteDir; |
|
| 16 |
import org.txm.utils.logger.Log; |
|
| 17 |
|
|
| 18 |
public class ImportModule {
|
|
| 19 |
|
|
| 20 |
public BaseParameters importParameters; |
|
| 21 |
|
|
| 22 |
public String corpusVersionProduced; |
|
| 23 |
|
|
| 24 |
public File sourceDirectory; |
|
| 25 |
public File binaryDirectory; |
|
| 26 |
|
|
| 27 |
public Importer importer; |
|
| 28 |
public Annotater annotater; |
|
| 29 |
public Compiler compiler; |
|
| 30 |
public Pager pager; |
|
| 31 |
|
|
| 32 |
/** |
|
| 33 |
* set the variable to false to stop the import process at next step |
|
| 34 |
*/ |
|
| 35 |
public boolean isSuccessful = true; |
|
| 36 |
public String reason = "none"; |
|
| 37 |
public boolean debug = false; |
|
| 38 |
public boolean multithread = false; |
|
| 39 |
public boolean updateCorpus = false; |
|
| 40 |
public String corpusName; |
|
| 41 |
|
|
| 42 |
IProgressMonitor monitor; |
|
| 43 |
|
|
| 44 |
public void setMonitor(IProgressMonitor monitor) {
|
|
| 45 |
this.monitor = monitor; |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
|
|
| 49 |
public boolean isMultiThread() {
|
|
| 50 |
return multithread; |
|
| 51 |
} |
|
| 52 |
|
|
| 53 |
public boolean isDebugging() {
|
|
| 54 |
return debug; |
|
| 55 |
} |
|
| 56 |
|
|
| 57 |
public ImportModule(File importParametersFile) {
|
|
| 58 |
try {
|
|
| 59 |
BaseParameters b = new BaseParameters(importParametersFile); |
|
| 60 |
init(b); |
|
| 61 |
} catch (Exception e) {
|
|
| 62 |
e.printStackTrace(); |
|
| 63 |
} |
|
| 64 |
} |
|
| 65 |
|
|
| 66 |
public ImportModule(BaseParameters p) {
|
|
| 67 |
init(p); |
|
| 68 |
} |
|
| 69 |
|
|
| 70 |
public boolean isUpdatingCorpus() {
|
|
| 71 |
return updateCorpus; |
|
| 72 |
} |
|
| 73 |
|
|
| 74 |
protected void init(BaseParameters p) {
|
|
| 75 |
this.importParameters = p; |
|
| 76 |
this.importParameters.load(); |
|
| 77 |
corpusName = importParameters.name; |
|
| 78 |
//this.debug = "true".equals(importParameters.getKeyValueParameters().get(ImportKeys.DEBUG)); |
|
| 79 |
|
|
| 80 |
if (Log.getLevel().intValue() < Level.WARNING.intValue()) {
|
|
| 81 |
debug = true; |
|
| 82 |
} |
|
| 83 |
this.multithread = "true".equals(importParameters.getKeyValueParameters().get(ImportKeys.MULTITHREAD)); |
|
| 84 |
this.updateCorpus = "true".equals(importParameters.getKeyValueParameters().get(ImportKeys.UPDATECORPUS)); |
|
| 85 |
|
|
| 86 |
|
|
| 87 |
this.sourceDirectory = importParameters.paramFile.getParentFile(); |
|
| 88 |
this.binaryDirectory = new File(Toolbox.getTxmHomePath(), "corpora/"+corpusName.toUpperCase()); |
|
| 89 |
|
|
| 90 |
if (!updateCorpus) { // clean directories only if it's a new import
|
|
| 91 |
DeleteDir.deleteDirectory(binaryDirectory); |
|
| 92 |
binaryDirectory.mkdir(); |
|
| 93 |
|
|
| 94 |
File txmDir = new File(binaryDirectory, "txm"); |
|
| 95 |
txmDir.mkdir(); |
|
| 96 |
} |
|
| 97 |
} |
|
| 98 |
|
|
| 99 |
public void start() throws InterruptedException {
|
|
| 100 |
|
|
| 101 |
binaryDirectory.mkdirs(); // ensure output exists |
|
| 102 |
//System.out.println("ImportModule.start");
|
|
| 103 |
if (!updateCorpus) { // create XML-TXM files and annotate
|
|
| 104 |
//System.out.println("ImportModule.start: not updating");
|
|
| 105 |
if (importer != null) {
|
|
| 106 |
//System.out.println("ImportModule.start: importer: "+importer);
|
|
| 107 |
if (monitor != null) System.out.println("-- IMPORTER - Reading source files");
|
|
| 108 |
importer.process(); |
|
| 109 |
//importer.checkFiles(); |
|
| 110 |
isSuccessful = isSuccessful & importer.isSuccessFul(); |
|
| 111 |
if (!isSuccessful) {
|
|
| 112 |
System.out.println("Error while importing corpus during 'importer' step, reason="+importer.getReason());
|
|
| 113 |
return; |
|
| 114 |
} |
|
| 115 |
} else {
|
|
| 116 |
System.out.println("XML-TXM files already produced in "+new File(binaryDirectory, "txm/"+corpusName));
|
|
| 117 |
} |
|
| 118 |
|
|
| 119 |
boolean annotate = "true".equals(importParameters.getCorpusElement().getAttribute("annotate"));
|
|
| 120 |
if (annotate && annotater != null) {
|
|
| 121 |
if (monitor != null) System.out.println("-- ANNOTATE - Running NLP tools");
|
|
| 122 |
annotater.process(); |
|
| 123 |
isSuccessful = isSuccessful & annotater.isSuccessFul(); |
|
| 124 |
if (!isSuccessful) {
|
|
| 125 |
System.out.println("Error while importing corpus during 'annotate' step, reason="+annotater.getReason());
|
|
| 126 |
return; |
|
| 127 |
} |
|
| 128 |
} else {
|
|
| 129 |
//System.out.println("XML-TXM files already annotated.");
|
|
| 130 |
} |
|
| 131 |
} else {
|
|
| 132 |
System.out.println("Updating corpus...");
|
|
| 133 |
} |
|
| 134 |
|
|
| 135 |
//System.out.println("GET FILES ORDER");
|
|
| 136 |
final ArrayList<File> files = getTXMFilesOrder(); |
|
| 137 |
|
|
| 138 |
Thread Tcompiler = new Thread() {
|
|
| 139 |
public void run() {
|
|
| 140 |
if (compiler != null) {
|
|
| 141 |
if (monitor != null) System.out.println("-- COMPILING - Building Search Engine indexes");
|
|
| 142 |
compiler.process(files); |
|
| 143 |
isSuccessful = isSuccessful & compiler.isSuccessFul(); |
|
| 144 |
if (!isSuccessful) {
|
|
| 145 |
System.out.println("Error while importing corpus during 'compiler' step, reason="+compiler.getReason());
|
|
| 146 |
return; |
|
| 147 |
} |
|
| 148 |
} else {
|
|
| 149 |
System.out.println("No CQP index created.");
|
|
| 150 |
} |
|
| 151 |
} |
|
| 152 |
}; |
|
| 153 |
|
|
| 154 |
Thread Tpager = new Thread() {
|
|
| 155 |
public void run() {
|
|
| 156 |
|
|
| 157 |
if (pager != null) {
|
|
| 158 |
if (monitor != null) System.out.println("-- EDITION - Building edition");
|
|
| 159 |
pager.process(files); |
|
| 160 |
isSuccessful = isSuccessful & pager.isSuccessFul(); |
|
| 161 |
if (!isSuccessful) {
|
|
| 162 |
System.out.println("Error while importing corpus during 'pager' step, reason="+pager.getReason());
|
|
| 163 |
return; |
|
| 164 |
} |
|
| 165 |
} else {
|
|
| 166 |
System.out.println("No edition produced.");
|
|
| 167 |
} |
|
| 168 |
} |
|
| 169 |
}; |
|
| 170 |
|
|
| 171 |
Tcompiler.start(); |
|
| 172 |
if (!multithread) {
|
|
| 173 |
Tcompiler.join(); // wait for the end if not multithreaded |
|
| 174 |
if (!isSuccessful) { // don't call pager is compiler step failed
|
|
| 175 |
return; |
|
| 176 |
} |
|
| 177 |
} |
|
| 178 |
|
|
| 179 |
Tpager.start(); |
|
| 180 |
if (multithread) Tcompiler.join(); // wait for both thread to end |
|
| 181 |
Tpager.join(); |
|
| 182 |
} |
|
| 183 |
|
|
| 184 |
protected ArrayList<File> getTXMFilesOrder() {
|
|
| 185 |
//System.out.println("DEFAULT FILES ORDER");
|
|
| 186 |
File txmDirectory = new File(binaryDirectory, "txm/"+corpusName); |
|
| 187 |
ArrayList<File> files = new ArrayList<File>(Arrays.asList(txmDirectory.listFiles(new FileFilter() {
|
|
| 188 |
@Override |
|
| 189 |
public boolean accept(File file) {
|
|
| 190 |
return file.isFile() && file.getName().endsWith(".xml");
|
|
| 191 |
} |
|
| 192 |
}))); |
|
| 193 |
|
|
| 194 |
Collections.sort(files); |
|
| 195 |
|
|
| 196 |
return files; |
|
| 197 |
} |
|
| 198 |
|
|
| 199 |
|
|
| 200 |
public void end() {
|
|
| 201 |
File paramFile = new File(binaryDirectory, "import.xml"); |
|
| 202 |
try {
|
|
| 203 |
DomUtils.save(importParameters.root.getOwnerDocument(), paramFile); |
|
| 204 |
isSuccessful = true; |
|
| 205 |
} catch (Exception e) {
|
|
| 206 |
// TODO Auto-generated catch block |
|
| 207 |
e.printStackTrace(); |
|
| 208 |
isSuccessful = false; |
|
| 209 |
} |
|
| 210 |
} |
|
| 211 |
|
|
| 212 |
public String getCorpusName() {
|
|
| 213 |
return corpusName; |
|
| 214 |
} |
|
| 215 |
|
|
| 216 |
public String getReason() {
|
|
| 217 |
return reason; |
|
| 218 |
} |
|
| 219 |
|
|
| 220 |
public boolean isSuccessFul() {
|
|
| 221 |
return isSuccessful; |
|
| 222 |
} |
|
| 223 |
|
|
| 224 |
public BaseParameters getParameters() {
|
|
| 225 |
return importParameters; |
|
| 226 |
} |
|
| 227 |
|
|
| 228 |
public File getSourceDirectory() {
|
|
| 229 |
return sourceDirectory; |
|
| 230 |
} |
|
| 231 |
|
|
| 232 |
public File getBinaryDirectory() {
|
|
| 233 |
return binaryDirectory; |
|
| 234 |
} |
|
| 235 |
|
|
| 236 |
public void process() throws InterruptedException {
|
|
| 237 |
start(); |
|
| 238 |
if (isSuccessful) |
|
| 239 |
end(); |
|
| 240 |
} |
|
| 241 |
|
|
| 242 |
public static void main(String[] args) {
|
|
| 243 |
File importParametersFile = new File("/home/mdecorde/xml/brown/import.xml");
|
|
| 244 |
|
|
| 245 |
ImportModule module = new ImportModule(importParametersFile); |
|
| 246 |
System.out.println("Parameters: "+module.getParameters());
|
|
| 247 |
try {
|
|
| 248 |
module.start(); |
|
| 249 |
|
|
| 250 |
if (module.isSuccessful) {
|
|
| 251 |
System.out.println("Import sucessful. reloading corpora...");
|
|
| 252 |
} else {
|
|
| 253 |
System.out.println("Import failed, reason = "+module.getReason());
|
|
| 254 |
} |
|
| 255 |
} catch (Exception e) {
|
|
| 256 |
e.printStackTrace(); |
|
| 257 |
} |
|
| 258 |
} |
|
| 259 |
} |
|
| 0 | 260 | |
| tmp/org.txm.core/src/java/org/txm/importer/xtz/Importer.java (revision 945) | ||
|---|---|---|
| 1 |
package org.txm.importer.xtz; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
|
|
| 5 |
/** |
|
| 6 |
* Takes any form of source files |
|
| 7 |
* |
|
| 8 |
* After this step, the XML-TXM files are created. |
|
| 9 |
* |
|
| 10 |
* they are validated before continuing |
|
| 11 |
* @author mdecorde |
|
| 12 |
* |
|
| 13 |
*/ |
|
| 14 |
public abstract class Importer extends ImportStep {
|
|
| 15 |
|
|
| 16 |
public Importer(ImportModule module) {
|
|
| 17 |
super(module); |
|
| 18 |
inputDirectory = module.getSourceDirectory(); |
|
| 19 |
outputDirectory = new File(module.getBinaryDirectory(), "txm/"+module.getCorpusName()); |
|
| 20 |
outputDirectory.mkdirs(); |
|
| 21 |
} |
|
| 22 |
|
|
| 23 |
public abstract void checkFiles(); |
|
| 24 |
} |
|
| 0 | 25 | |
| tmp/org.txm.core/META-INF/MANIFEST.MF (revision 945) | ||
|---|---|---|
| 390 | 390 |
org.txm.importer.filters, |
| 391 | 391 |
org.txm.importer.scripting, |
| 392 | 392 |
org.txm.importer.xmltxm, |
| 393 |
org.txm.importer.xtz, |
|
| 393 | 394 |
org.txm.js, |
| 394 | 395 |
org.txm.js.viewer, |
| 395 | 396 |
org.txm.metadatas, |
| tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/Corpus.java (revision 945) | ||
|---|---|---|
| 1107 | 1107 |
this.getQualifiedCqpId(), queryResultId, |
| 1108 | 1108 |
query.getQueryString()); |
| 1109 | 1109 |
queryResult = new QueryResult(queryResultId, queryResultName, this, query); |
| 1110 |
|
|
| 1110 | 1111 |
if (save) super.addQueryLog(query.toString(), new ArrayList<String>()); |
| 1111 | 1112 |
} catch (Exception e) {
|
| 1112 | 1113 |
org.txm.utils.logger.Log.printStackTrace(e); |
Formats disponibles : Unified diff