Révision 2921
tmp/org.txm.rcp/src/main/java/org/txm/rcp/commands/OpenWelcomePage.java (revision 2921) | ||
---|---|---|
66 | 66 |
// version += "beta"; |
67 | 67 |
|
68 | 68 |
if ("fr".equals(locale)) { |
69 |
return OpenBrowser.openfile(WELCOME + version) != null; |
|
69 |
return OpenBrowser.openfile(WELCOME + version + "/welcome") != null;
|
|
70 | 70 |
} |
71 | 71 |
else { |
72 |
return OpenBrowser.openfile(OpenLocalizedWebPage.getValidURL(WELCOME + version + "/" + locale, (WELCOME + version))) != null;
|
|
72 |
return OpenBrowser.openfile(OpenLocalizedWebPage.getValidURL(WELCOME + version + "/" + locale + "/welcome", (WELCOME + version + "/welcome"))) != null;
|
|
73 | 73 |
} |
74 | 74 |
} |
75 | 75 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/Property.java (revision 2921) | ||
---|---|---|
164 | 164 |
} |
165 | 165 |
} |
166 | 166 |
catch (CqiClientException e) { |
167 |
Log.warning(TXMCoreMessages.bind("** No property {0} found in the {1} corpus: {2}", s, corpus, e));
|
|
167 |
Log.warning(TXMCoreMessages.bind("** Could not find the ''{0}'' word property found in the ''{1}'' corpus: {2}", s, corpus, e));
|
|
168 | 168 |
Log.printStackTrace(e); |
169 | 169 |
} |
170 | 170 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2921) | ||
---|---|---|
61 | 61 |
boolean removeInterviewer = false;//if true the transcription of speakers (en1 and enq2) defined in metadatas file will be ignored |
62 | 62 |
boolean includeComments = false; |
63 | 63 |
boolean ignoreTranscriberMetadata = false; |
64 |
int csvHeaderNumber = 1; |
|
64 |
//int csvHeaderNumber = 1;
|
|
65 | 65 |
int maxlines = 200; |
66 | 66 |
|
67 | 67 |
String userDir = System.getProperty("user.home"); |
... | ... | |
131 | 131 |
} |
132 | 132 |
File propertyFile = new File(srcDir, "import.properties")//default |
133 | 133 |
Properties props = new Properties(); |
134 |
String[] metadatasToKeep; |
|
134 |
//String[] metadatasToKeep;
|
|
135 | 135 |
if (propertyFile.exists() && propertyFile.canRead()) { |
136 | 136 |
FileInputStream input = new FileInputStream(propertyFile); |
137 | 137 |
props.load(input); |
... | ... | |
141 | 141 |
removeInterviewer = Boolean.parseBoolean(props.get("removeInterviewer").toString()); |
142 | 142 |
if (props.getProperty("ignoreTranscriberMetadata") != null) |
143 | 143 |
ignoreTranscriberMetadata = Boolean.parseBoolean(props.get("ignoreTranscriberMetadata").toString()); |
144 |
if (props.getProperty("metadataList") != null) |
|
145 |
metadatasToKeep = props.get("metadataList").toString().split("|"); |
|
146 |
if (props.getProperty("csvHeaderNumber") != null) |
|
147 |
csvHeaderNumber = props.get("csvHeaderNumber").toString().split("|"); |
|
144 |
// if (props.getProperty("metadataList") != null)
|
|
145 |
// metadatasToKeep = props.get("metadataList").toString().split("|");
|
|
146 |
// if (props.getProperty("csvHeaderNumber") != null)
|
|
147 |
// csvHeaderNumber = props.get("csvHeaderNumber").toString().split("|");
|
|
148 | 148 |
//if (props.getProperty("includeComments") != null) |
149 | 149 |
// includeComments = props.get("includeComments").toString(); |
150 | 150 |
|
151 | 151 |
println "import properties: " |
152 | 152 |
println " removeInterviewer: "+removeInterviewer |
153 | 153 |
println " ignoreTranscriberMetadata: "+ignoreTranscriberMetadata |
154 |
println " metadataToKeep: "+metadatasToKeep |
|
155 |
println " ignored csvHeaderSize: "+csvHeaderNumber |
|
154 |
// println " metadataToKeep: "+metadatasToKeep
|
|
155 |
// println " ignored csvHeaderSize: "+csvHeaderNumber
|
|
156 | 156 |
//println " includeComments: "+includeComments |
157 | 157 |
} |
158 | 158 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/importer.groovy (revision 2921) | ||
---|---|---|
42 | 42 |
import org.txm.scripts.filters.TagSentences.* |
43 | 43 |
import org.txm.scripts.filters.Tokeniser.* |
44 | 44 |
|
45 |
// TODO: Auto-generated Javadoc |
|
46 | 45 |
/** |
47 | 46 |
* The Class importer. |
48 | 47 |
*/ |
... | ... | |
57 | 56 |
* @param basename the basename |
58 | 57 |
* @return true, if successful |
59 | 58 |
*/ |
60 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, List<String> suffixes, String basename, String lang) |
|
61 |
{ |
|
59 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, List<String> suffixes, String basename, String lang) { |
|
62 | 60 |
|
63 | 61 |
File stokenizedDir = new File(binDir,"stokenized"); |
64 | 62 |
stokenizedDir.deleteDir(); |
... | ... | |
113 | 111 |
} |
114 | 112 |
|
115 | 113 |
println("Create simple XML files ("+srcfiles.size()+" files)") |
114 |
ConsoleProgressBar cpb = new ConsoleProgressBar(srcfiles.size()) |
|
116 | 115 |
for (File srcfile : srcfiles) { |
117 | 116 |
if (srcfile.length() == 0) { |
118 | 117 |
println "Skipping empty file: "+srcfile |
119 | 118 |
continue; |
120 | 119 |
} |
121 |
print "."
|
|
120 |
cpb.tick()
|
|
122 | 121 |
File xmlfile; |
123 | 122 |
try { |
124 | 123 |
int point = srcfile.getName().indexOf("."); |
... | ... | |
217 | 216 |
xmlfile.delete() |
218 | 217 |
} |
219 | 218 |
} |
220 |
println ""
|
|
219 |
cpb.done()
|
|
221 | 220 |
|
222 | 221 |
println("Tokenizing "+txmDir.listFiles().length+" files") |
222 |
cpb = new ConsoleProgressBar(txmDir.listFiles().length) |
|
223 | 223 |
for (File pfile : txmDir.listFiles()) { |
224 |
print "."
|
|
224 |
cpb.tick()
|
|
225 | 225 |
File tfile = new File(tokenizedDir, pfile.getName()); |
226 | 226 |
|
227 | 227 |
SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(pfile, tfile, lang); |
... | ... | |
231 | 231 |
tfile.delete() |
232 | 232 |
} |
233 | 233 |
} |
234 |
println ""
|
|
234 |
cpb.done()
|
|
235 | 235 |
|
236 | 236 |
// Tag sentences |
237 | 237 |
List<File> stokenfiles = tokenizedDir.listFiles() |
238 | 238 |
File stokenizeDir = new File(tokenizedDir.getParentFile(), "stokenized") |
239 | 239 |
stokenizeDir.mkdir(); |
240 |
cpb = new ConsoleProgressBar(stokenfiles.size()) |
|
240 | 241 |
println("Tagging sentences of "+stokenfiles.size()+" files") |
241 | 242 |
for (File f : stokenfiles) { |
242 |
print "."
|
|
243 |
cpb.tick()
|
|
243 | 244 |
Sequence S = new Sequence(); |
244 | 245 |
Filter F1 = new CutHeader(); |
245 | 246 |
Filter F7 = new TagSentences(); |
... | ... | |
265 | 266 |
S = F1 = F7 = F11 = null; |
266 | 267 |
headerfile.delete();//remove the prepared file to clean |
267 | 268 |
} |
268 |
println ""
|
|
269 |
cpb.done()
|
|
269 | 270 |
|
270 | 271 |
//TRANSFORM INTO XML-TEI-TXM |
271 | 272 |
println("Building xml-tei-txm ("+stokenizeDir.listFiles().length+" files)") |
273 |
cpb = new ConsoleProgressBar(stokenizeDir.listFiles().length) |
|
272 | 274 |
for (File tfile : stokenizeDir.listFiles()) { |
273 |
print "."
|
|
275 |
cpb.tick()
|
|
274 | 276 |
File xmlfile = new File(txmDir, tfile.getName()); |
275 | 277 |
|
276 | 278 |
def correspType = new HashMap<String,String>() |
... | ... | |
290 | 292 |
new File(xmlfile.getParent(),xmlfile.getName()).delete() |
291 | 293 |
} |
292 | 294 |
} |
293 |
println ""
|
|
295 |
cpb.done()
|
|
294 | 296 |
|
295 | 297 |
files = txmDir.listFiles() |
296 | 298 |
if (files == null || files.size() == 0) return false |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/txtLoader.groovy (revision 2921) | ||
---|---|---|
35 | 35 |
import org.txm.*; |
36 | 36 |
import org.txm.objects.*; |
37 | 37 |
import org.txm.core.engines.*; |
38 |
import org.txm.utils.ConsoleProgressBar |
|
38 | 39 |
import org.txm.utils.i18n.*; |
39 | 40 |
import org.txm.metadatas.*; |
40 | 41 |
import org.txm.utils.io.FileCopy; |
... | ... | |
107 | 108 |
if (metadatas != null) { |
108 | 109 |
println "-- INJECTING METADATA - from csv file: "+allMetadataFile |
109 | 110 |
println("Injecting metadata: "+metadatas.getHeadersList()+" in texts of directory "+txmDir) |
110 |
for (File infile : txmDir.listFiles()) { |
|
111 |
print "." |
|
111 |
def filesToInject = txmDir.listFiles() |
|
112 |
ConsoleProgressBar cpb = new ConsoleProgressBar(filesToInject.size()) |
|
113 |
for (File infile : filesToInject) { |
|
114 |
cpb.tick() |
|
112 | 115 |
File outfile = File.createTempFile("temp", ".xml", infile.getParentFile()); |
113 | 116 |
|
114 | 117 |
if (!metadatas.injectMetadatasInXml(infile, outfile, "text", null)) { |
... | ... | |
121 | 124 |
} |
122 | 125 |
} |
123 | 126 |
} |
124 |
println ""
|
|
127 |
cpb.done()
|
|
125 | 128 |
} |
126 | 129 |
|
127 | 130 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xmltxm/xmltxmLoader.groovy (revision 2921) | ||
---|---|---|
197 | 197 |
def second = 0 |
198 | 198 |
|
199 | 199 |
println "Paginating text: " |
200 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.size()) |
|
200 | 201 |
for (File txmFile : files) { |
201 | 202 |
|
202 | 203 |
String txtname = txmFile.getName(); |
... | ... | |
213 | 214 |
|
214 | 215 |
if (second) { print(", ") } |
215 | 216 |
if (second > 0 && (second++ % 5) == 0) println "" |
216 |
print(".");
|
|
217 |
cpb.tick()
|
|
217 | 218 |
|
218 | 219 |
def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element); |
219 | 220 |
Edition edition = new Edition(t); |
... | ... | |
226 | 227 |
edition.addPage(""+(++i), wordid); |
227 | 228 |
} |
228 | 229 |
} |
229 |
println ""
|
|
230 |
cpb.done()
|
|
230 | 231 |
} |
231 | 232 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
232 | 233 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/importer.groovy (revision 2921) | ||
---|---|---|
52 | 52 |
class importer { |
53 | 53 |
|
54 | 54 |
/** The validation. */ |
55 |
boolean validation;
|
|
55 |
boolean validation |
|
56 | 56 |
|
57 | 57 |
/** The tokenize. */ |
58 |
boolean tokenize;
|
|
58 |
boolean tokenize |
|
59 | 59 |
|
60 | 60 |
/** The sentence. */ |
61 |
boolean sentence = false;
|
|
61 |
boolean sentence = false |
|
62 | 62 |
|
63 | 63 |
/** |
64 | 64 |
* Do tokenize. |
... | ... | |
83 | 83 |
*/ |
84 | 84 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, String ignoredElements, String lang) |
85 | 85 |
{ |
86 |
new File(binDir,"tokenized").deleteDir();
|
|
87 |
new File(binDir,"tokenized").mkdir();
|
|
86 |
new File(binDir,"tokenized").deleteDir() |
|
87 |
new File(binDir,"tokenized").mkdir() |
|
88 | 88 |
|
89 |
ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones
|
|
89 |
ArrayList<String> milestones = new ArrayList<String>()//the tags who you want them to stay milestones |
|
90 | 90 |
|
91 | 91 |
//TEST EACH XML FILE IF VALID |
92 |
List<File> srcfiles = txmDir.listFiles();
|
|
92 |
List<File> srcfiles = txmDir.listFiles() |
|
93 | 93 |
List<File> okfiles = [] |
94 | 94 |
|
95 |
println "Sources cleaning & validation";
|
|
96 |
def files = txmDir.listFiles()
|
|
95 |
println "Sources cleaning & validation" |
|
96 |
def files = txmDir.listFiles() |
|
97 | 97 |
for (File f : txmDir.listFiles()) { // clean directory |
98 | 98 |
String name = f.getName() |
99 | 99 |
if (f.isHidden() || !name.toLowerCase().endsWith(".xml")) { |
100 | 100 |
if (!f.delete()) { |
101 | 101 |
println "WARNING: could not clean $txmDir directory: TXM could not delete $f" |
102 |
return false;
|
|
102 |
return false |
|
103 | 103 |
} |
104 | 104 |
} |
105 | 105 |
} |
... | ... | |
107 | 107 |
files = txmDir.listFiles() |
108 | 108 |
if (files == null || files.size() == 0) { |
109 | 109 |
println "No XML file (*.xml) to process. Aborting" |
110 |
return false;
|
|
110 |
return false |
|
111 | 111 |
} |
112 | 112 |
files = files.sort() |
113 |
for (File f : txmDir.listFiles()) { |
|
114 |
if (!f.getName().toLowerCase().endsWith(".xml")) continue; |
|
115 |
|
|
113 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.size()) |
|
114 |
for (File f : files) { |
|
115 |
if (!f.getName().toLowerCase().endsWith(".xml")) { |
|
116 |
continue |
|
117 |
} |
|
116 | 118 |
if (!f.isHidden() && f.canRead() && !f.getName().endsWith(".properties") && !f.getName().startsWith("metadata") && !f.isDirectory()) { |
117 |
print "."
|
|
119 |
cpb.tick()
|
|
118 | 120 |
if (!ValidateXml.test(f)) { |
119 |
if (stopIfMalformed) |
|
120 |
return; |
|
121 |
else |
|
122 |
continue; |
|
121 |
if (stopIfMalformed) { |
|
122 |
return |
|
123 |
} else { |
|
124 |
continue |
|
125 |
} |
|
123 | 126 |
} else { |
124 |
if (!f.getName().equals("import.xml")) |
|
125 |
okfiles.add(f); |
|
127 |
if (!f.getName().equals("import.xml")) { |
|
128 |
okfiles.add(f) |
|
129 |
} |
|
126 | 130 |
} |
127 | 131 |
} |
128 | 132 |
} |
129 |
println ""
|
|
133 |
cpb.done()
|
|
130 | 134 |
|
131 | 135 |
okfiles.sort() |
132 | 136 |
println "Files to process: "+okfiles |
133 | 137 |
if (okfiles.size() == 0) { |
134 | 138 |
println "No file. Check if the file extensions are '.xml'" |
135 |
return false;
|
|
139 |
return false |
|
136 | 140 |
} |
137 | 141 |
|
138 | 142 |
// Fix surrogates |
139 | 143 |
File srcDirectory = new File(binDir, "src") |
144 |
srcDirectory.deleteDir() |
|
140 | 145 |
srcDirectory.mkdir() |
141 | 146 |
for (File f : okfiles) { |
142 | 147 |
File outputFile = new File (srcDirectory, f.getName()) |
143 |
CleanFile.removeSurrogateFromXmlFile(f, outputFile);
|
|
148 |
CleanFile.removeSurrogateFromXmlFile(f, outputFile) |
|
144 | 149 |
} |
145 | 150 |
okfiles = srcDirectory.listFiles() |
146 | 151 |
okfiles.sort() |
... | ... | |
150 | 155 |
List<File> tokenfiles; |
151 | 156 |
if (tokenize || sentence) { |
152 | 157 |
println "Tokenizing (words=$tokenize sentences=$sentence) "+okfiles.size()+" files" |
158 |
ConsoleProgressBar cpb2 = new ConsoleProgressBar(files.size()) |
|
153 | 159 |
for (File f : okfiles) { |
154 |
print "."
|
|
155 |
File infile = f;
|
|
156 |
File outfile = new File(binDir, "tokenized/"+f.getName());
|
|
160 |
cpb2.tick()
|
|
161 |
File infile = f |
|
162 |
File outfile = new File(binDir, "tokenized/"+f.getName()) |
|
157 | 163 |
SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(infile, outfile, lang) |
158 |
if (ignoredElements != null && ignoredElements.trim().length() > 0) |
|
164 |
if (ignoredElements != null && ignoredElements.trim().length() > 0) {
|
|
159 | 165 |
tokenizer.setOutSideTextTagsAndKeepContent(ignoredElements) |
166 |
} |
|
160 | 167 |
if (!tokenizer.process()) { |
161 | 168 |
println("Failed to process "+f) |
162 | 169 |
outfile.delete() |
163 | 170 |
} |
164 | 171 |
} |
165 | 172 |
tokenfiles = new File(binDir, "tokenized").listFiles() |
166 |
println ""
|
|
173 |
cpb2.done()
|
|
167 | 174 |
} else { |
168 |
tokenfiles = okfiles;
|
|
175 |
tokenfiles = okfiles |
|
169 | 176 |
} |
170 | 177 |
|
171 | 178 |
// Change XML tag "text" to "textunit" |
... | ... | |
180 | 187 |
} |
181 | 188 |
//TRANSFORM INTO XML-TEI-TXM |
182 | 189 |
println("Building XML-TXM ("+tokenfiles.size()+" files)") |
190 |
cpb = new ConsoleProgressBar(tokenfiles.size()) |
|
183 | 191 |
for (File f : tokenfiles) { |
184 | 192 |
//ArrayList<String> milestones = new ArrayList<String>(); |
185 |
print "."
|
|
186 |
File file = f;
|
|
187 |
String txmfile = f.getName();
|
|
193 |
cpb.tick()
|
|
194 |
File file = f |
|
195 |
String txmfile = f.getName() |
|
188 | 196 |
|
189 | 197 |
def correspType = new HashMap<String,String>() |
190 | 198 |
def correspRef = new HashMap<String,String>() |
191 | 199 |
//il faut lister les id de tous les respStmt |
192 |
def respId = [];
|
|
200 |
def respId = [] |
|
193 | 201 |
//fait la correspondance entre le respId et le rapport d'execution de l'outil |
194 |
def applications = new HashMap<String,HashMap<String,String>>();
|
|
202 |
def applications = new HashMap<String,HashMap<String,String>>() |
|
195 | 203 |
//fait la correspondance entre le respId et les attributs type de la propriété ana du w txm |
196 | 204 |
//pour construire les ref vers les taxonomies |
197 |
def taxonomiesUtilisees = new HashMap<String,String[]>();
|
|
205 |
def taxonomiesUtilisees = new HashMap<String,String[]>() |
|
198 | 206 |
//associe un id d'item avec sa description et son URI |
199 |
def itemsURI = new HashMap<String,HashMap<String,String>>();
|
|
207 |
def itemsURI = new HashMap<String,HashMap<String,String>>() |
|
200 | 208 |
//informations de respStmt |
201 | 209 |
//resps (respId <voir ci-dessus>, [description, person, date]) |
202 |
def resps = new HashMap<String,String[]>();
|
|
210 |
def resps = new HashMap<String,String[]>() |
|
203 | 211 |
//lance le traitement |
204 |
def builder = new Xml2Ana(file);
|
|
205 |
builder.setConvertAllAtrtibutes true;
|
|
206 |
builder.setCorrespondances(correspRef, correspType);
|
|
212 |
def builder = new Xml2Ana(file) |
|
213 |
builder.setConvertAllAtrtibutes true |
|
214 |
builder.setCorrespondances(correspRef, correspType) |
|
207 | 215 |
builder.setHeaderInfos(respId,resps, applications, taxonomiesUtilisees, itemsURI) |
208 | 216 |
//builder.setAddTEIHeader(); |
209 | 217 |
if (!builder.process(new File(txmDir,txmfile))) { |
210 |
println("Failed to process "+f);
|
|
211 |
new File(txmDir,txmfile).delete();
|
|
218 |
println("Failed to process "+f) |
|
219 |
new File(txmDir,txmfile).delete() |
|
212 | 220 |
} |
213 | 221 |
} |
214 |
println ""
|
|
222 |
cpb.done()
|
|
215 | 223 |
okfiles = txmDir.listFiles() |
216 | 224 |
|
217 | 225 |
return okfiles != null && okfiles.size() > 0; |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/xmlLoader.groovy (revision 2921) | ||
---|---|---|
25 | 25 |
// $LastChangedRevision: 3349 $ |
26 | 26 |
// $LastChangedBy: mdecorde $ |
27 | 27 |
// |
28 |
package org.txm.scripts.importer.xml;
|
|
28 |
package org.txm.scripts.importer.xml |
|
29 | 29 |
|
30 |
import javax.xml.stream.XMLStreamReader; |
|
31 |
|
|
32 |
import org.txm.scripts.importer.RemoveTag; |
|
33 |
import org.txm.importer.ApplyXsl2; |
|
34 |
import org.txm.importer.ValidateXml; |
|
35 |
import org.txm.scripts.importer.xml.importer; |
|
36 |
import org.txm.scripts.importer.xml.compiler; |
|
37 |
import org.txm.scripts.importer.xml.pager_old; |
|
38 |
import org.txm.objects.*; |
|
39 |
import org.txm.tokenizer.TokenizerClasses; |
|
30 |
import org.txm.importer.ApplyXsl2 |
|
31 |
import org.txm.importer.ValidateXml |
|
32 |
import org.txm.scripts.importer.RemoveTag |
|
33 |
import org.txm.scripts.importer.xml.importer |
|
34 |
import org.txm.scripts.importer.xml.compiler |
|
35 |
import org.txm.scripts.importer.xml.pager_old |
|
36 |
import org.txm.objects.* |
|
37 |
import org.txm.tokenizer.TokenizerClasses |
|
40 | 38 |
import org.txm.utils.* |
41 |
import org.txm.utils.io.*; |
|
42 |
import org.txm.*; |
|
43 |
import org.txm.core.engines.*; |
|
44 |
import org.txm.importer.scripts.xmltxm.*; |
|
45 |
import org.txm.utils.i18n.*; |
|
46 |
import org.txm.metadatas.*; |
|
47 |
import javax.xml.stream.*; |
|
39 |
import org.txm.utils.io.* |
|
40 |
import org.txm.utils.xml.DomUtils |
|
41 |
import org.txm.* |
|
42 |
import org.txm.core.engines.* |
|
43 |
import org.txm.importer.scripts.xmltxm.* |
|
44 |
import org.txm.utils.i18n.* |
|
45 |
import org.txm.metadatas.* |
|
46 |
import javax.xml.stream.* |
|
48 | 47 |
import org.w3c.dom.Element |
49 |
import org.txm.utils.xml.DomUtils; |
|
50 | 48 |
|
51 |
String userDir = System.getProperty("user.home"); |
|
52 | 49 |
|
53 |
def MONITOR; |
|
54 |
Project project; |
|
50 |
String userDir = System.getProperty("user.home") |
|
55 | 51 |
|
52 |
def MONITOR |
|
53 |
Project project |
|
54 |
|
|
56 | 55 |
try {project=projectBinding;MONITOR=monitor} catch (Exception) |
57 | 56 |
{ } |
58 | 57 |
if (project == null) { println "no project set. Aborting"; return; } |
59 | 58 |
|
60 |
String corpusname = project.getName();
|
|
59 |
String corpusname = project.getName() |
|
61 | 60 |
String basename = corpusname |
62 |
String rootDir = project.getSrcdir();
|
|
61 |
String rootDir = project.getSrcdir() |
|
63 | 62 |
String lang = project.getLang() |
64 | 63 |
String model = lang |
65 | 64 |
String encoding = project.getEncoding() |
66 | 65 |
boolean annotate = project.getAnnotate() |
67 |
String xsl = project.getFrontXSL();
|
|
68 |
def xslParams = project.getXsltParameters();
|
|
66 |
String xsl = project.getFrontXSL() |
|
67 |
def xslParams = project.getXsltParameters() |
|
69 | 68 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage() |
70 | 69 |
String page_element = project.getEditionDefinition("default").getPageElement() |
71 | 70 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition() |
72 | 71 |
boolean doTokenizeStep = project.getDoTokenizerStep() |
73 | 72 |
|
74 |
File srcDir = new File(rootDir);
|
|
73 |
File srcDir = new File(rootDir) |
|
75 | 74 |
File binDir = project.getProjectDirectory() |
76 |
binDir.mkdirs();
|
|
75 |
binDir.mkdirs() |
|
77 | 76 |
if (!binDir.exists()) { |
78 |
println "Could not create binDir "+binDir
|
|
77 |
println "Could not create the result directory: "+binDir
|
|
79 | 78 |
return; |
80 | 79 |
} |
81 | 80 |
|
82 |
File txmDir = new File(binDir, "txm/$corpusname"); |
|
83 |
txmDir.deleteDir(); |
|
84 |
txmDir.mkdirs(); |
|
81 |
File txmDir = new File(binDir, "txm/$corpusname") |
|
82 |
txmDir.deleteDir() |
|
83 |
txmDir.mkdirs() |
|
84 |
new File(binDir, "src").deleteDir() |
|
85 | 85 |
|
86 | 86 |
File propertyFile = new File(rootDir, "import.properties")//default |
87 |
Properties props = new Properties();
|
|
88 |
String[] metadatasToKeep;
|
|
87 |
Properties props = new Properties() |
|
88 |
String[] metadatasToKeep |
|
89 | 89 |
|
90 |
String textSortAttribute = null;
|
|
91 |
boolean normalizeMetadata = false;
|
|
92 |
String ignoredElements = null;
|
|
93 |
boolean stopIfMalformed = false;
|
|
90 |
String textSortAttribute = null |
|
91 |
boolean normalizeMetadata = false |
|
92 |
String ignoredElements = null |
|
93 |
boolean stopIfMalformed = false |
|
94 | 94 |
|
95 | 95 |
println "Trying to read import properties file: "+propertyFile |
96 | 96 |
if (propertyFile.exists() && propertyFile.canRead()) { |
97 |
InputStreamReader input = new InputStreamReader(new FileInputStream(propertyFile) , "UTF-8"); |
|
98 |
props.load(input); |
|
99 |
input.close(); |
|
100 |
if(props.getProperty("sortmetadata") != null) |
|
101 |
textSortAttribute = props.get("sortmetadata").toString(); |
|
102 |
if (props.getProperty("normalizemetadata") != null) |
|
103 |
normalizeMetadata = Boolean.parseBoolean(props.get("normalizemetadata").toString()); |
|
104 |
if (props.getProperty("ignoredelements") != null) |
|
105 |
ignoredElements = props.get("ignoredelements").toString(); |
|
106 |
if (props.getProperty("stopifmalformed") != null) |
|
107 |
stopIfMalformed = Boolean.parseBoolean(props.get("stopifmalformed").toString()); |
|
108 |
|
|
109 |
println "import properties: " |
|
97 |
InputStreamReader input = new InputStreamReader(new FileInputStream(propertyFile) , "UTF-8") |
|
98 |
props.load(input) |
|
99 |
input.close() |
|
100 |
if (props.getProperty("sortmetadata") != null) { |
|
101 |
textSortAttribute = props.get("sortmetadata").toString() |
|
102 |
} |
|
103 |
if (props.getProperty("normalizemetadata") != null) { |
|
104 |
normalizeMetadata = Boolean.parseBoolean(props.get("normalizemetadata").toString()) |
|
105 |
} |
|
106 |
if (props.getProperty("ignoredelements") != null) { |
|
107 |
ignoredElements = props.get("ignoredelements").toString() |
|
108 |
} |
|
109 |
if (props.getProperty("stopifmalformed") != null) { |
|
110 |
stopIfMalformed = Boolean.parseBoolean(props.get("stopifmalformed").toString()) |
|
111 |
} |
|
112 |
println "Import properties: " |
|
110 | 113 |
println " sort metadata: "+textSortAttribute |
111 | 114 |
println " normalize attributes: "+normalizeMetadata |
112 | 115 |
println " ignored elements: "+ignoredElements |
113 | 116 |
println " stop if a XML source is malformed: "+stopIfMalformed |
114 | 117 |
} |
115 | 118 |
|
116 |
File allMetadataFile = Metadatas.findMetadataFile(srcDir);
|
|
119 |
File allMetadataFile = Metadatas.findMetadataFile(srcDir) |
|
117 | 120 |
|
118 | 121 |
// Apply XSL |
119 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
|
120 |
if (MONITOR != null) MONITOR.worked(1, "APPLYING XSL")
|
|
122 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done() } |
|
123 |
if (MONITOR != null) { MONITOR.worked(1, "APPLYING XSL") }
|
|
121 | 124 |
if (xsl != null && xslParams != null && xsl.trim().length() > 0) { |
122 | 125 |
new File(binDir, "src").deleteDir() // remove old outputed files if any |
123 | 126 |
if (ApplyXsl2.processImportSources(new File(xsl), srcDir, new File(binDir, "src"), xslParams)) |
124 | 127 |
// return; // error during process |
125 |
srcDir = new File(binDir, "src");
|
|
128 |
srcDir = new File(binDir, "src") |
|
126 | 129 |
println "" |
127 | 130 |
} |
128 | 131 |
|
129 | 132 |
// copy xml+dtd files |
130 | 133 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
131 |
List<File> srcfiles = srcDir.listFiles();
|
|
132 |
def ignoredFiles = [];
|
|
134 |
List<File> srcfiles = srcDir.listFiles() |
|
135 |
def ignoredFiles = [] |
|
133 | 136 |
if (srcfiles != null) { |
134 | 137 |
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into the "txm" directory |
135 | 138 |
File f = srcfiles.get(i) |
136 | 139 |
if (f.isDirectory() || f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) { |
137 |
srcfiles.remove(i);
|
|
138 |
i--;
|
|
139 |
continue; // don't raise warnings for those files
|
|
140 |
srcfiles.remove(i) |
|
141 |
i-- |
|
142 |
continue // don't raise warnings for those files |
|
140 | 143 |
} |
141 | 144 |
if (f.getName().toLowerCase().endsWith(".xml") && ValidateXml.test(f)) { |
142 |
FileCopy.copy(f, new File(txmDir, f.getName()));
|
|
145 |
FileCopy.copy(f, new File(txmDir, f.getName())) |
|
143 | 146 |
} else { |
144 |
ignoredFiles << f;
|
|
147 |
ignoredFiles << f |
|
145 | 148 |
} |
146 | 149 |
} |
147 | 150 |
} else { |
148 | 151 |
println "The $srcDir source directory is empty. Aborting." |
149 |
return;
|
|
152 |
return |
|
150 | 153 |
} |
151 | 154 |
|
152 | 155 |
if (ignoredFiles.size() > 0) { |
... | ... | |
154 | 157 |
} |
155 | 158 |
if (txmDir.listFiles() == null) { |
156 | 159 |
println "No txm file to process" |
157 |
return;
|
|
160 |
return |
|
158 | 161 |
} |
159 | 162 |
|
160 | 163 |
// filtering |
... | ... | |
172 | 175 |
}*/ |
173 | 176 |
|
174 | 177 |
//get metadata values from CSV |
175 |
Metadatas metadatas; // text metadata
|
|
178 |
Metadatas metadatas // text metadata |
|
176 | 179 |
|
177 | 180 |
println "Trying to read metadata from: "+allMetadataFile |
178 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
|
181 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done() } |
|
179 | 182 |
if (allMetadataFile.exists()) { |
180 | 183 |
println " Metadata file found: "+allMetadataFile |
181 | 184 |
File copy = new File(binDir, allMetadataFile.getName()) |
182 | 185 |
if (!FileCopy.copy(allMetadataFile, copy)) { |
183 |
println "Error: could not create a copy of metadata file "+allMetadataFile.getAbsoluteFile();
|
|
184 |
return;
|
|
186 |
println "Error: could not create a copy of metadata file "+allMetadataFile.getAbsoluteFile() |
|
187 |
return |
|
185 | 188 |
} |
186 | 189 |
metadatas = new Metadatas(copy, Toolbox.getMetadataEncoding(), Toolbox.getMetadataColumnSeparator(), Toolbox.getMetadataTextSeparator(), 1) |
187 | 190 |
} else { |
... | ... | |
189 | 192 |
} |
190 | 193 |
|
191 | 194 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
192 |
if (MONITOR != null) MONITOR.worked(5, "IMPORTER")
|
|
195 |
if (MONITOR != null) { MONITOR.worked(5, "IMPORTER") }
|
|
193 | 196 |
println "-- IMPORTER - Reading source files" |
194 |
def imp = new importer();
|
|
197 |
def imp = new importer() |
|
195 | 198 |
imp.doValidation(true) // change this to not validate xml |
196 | 199 |
|
197 | 200 |
imp.doTokenize(doTokenizeStep) // change this, to not tokenize xml |
198 |
imp.setStopIfMalformed(stopIfMalformed);
|
|
201 |
imp.setStopIfMalformed(stopIfMalformed) |
|
199 | 202 |
if (!imp.run( srcDir, binDir, txmDir, basename, ignoredElements, lang)) { |
200 |
println "import process stopped";
|
|
201 |
return;
|
|
203 |
println "import process stopped" |
|
204 |
return |
|
202 | 205 |
} |
203 | 206 |
|
204 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
|
205 |
if (MONITOR != null) MONITOR.worked(20, "INJECTING METADATA")
|
|
207 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done() } |
|
208 |
if (MONITOR != null) { MONITOR.worked(20, "INJECTING METADATA") }
|
|
206 | 209 |
if (metadatas != null) { |
207 | 210 |
|
208 |
|
|
209 | 211 |
println("-- INJECTING METADATA - "+metadatas.getHeadersList()+" in texts of directory "+new File(binDir,"txm")) |
210 | 212 |
|
211 | 213 |
def files = txmDir.listFiles() |
212 | 214 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.size()) |
213 | 215 |
for (File infile : files) { |
214 | 216 |
cpb.tick() |
215 |
File outfile = File.createTempFile("temp", ".xml", infile.getParentFile());
|
|
217 |
File outfile = File.createTempFile("temp", ".xml", infile.getParentFile()) |
|
216 | 218 |
if (!metadatas.injectMetadatasInXml(infile, outfile, "text", null)) { |
217 | 219 |
outfile.delete(); |
218 | 220 |
} else { |
... | ... | |
230 | 232 |
return; |
231 | 233 |
} |
232 | 234 |
|
233 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
|
234 |
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
|
|
235 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done() } |
|
236 |
if (MONITOR != null) { MONITOR.worked(20, "ANNOTATE") }
|
|
235 | 237 |
|
236 | 238 |
boolean annotationSuccess = false; |
237 | 239 |
if (annotate) { |
238 | 240 |
println "-- ANNOTATE - Running NLP tools" |
239 | 241 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
240 | 242 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
241 |
annotationSuccess = true;
|
|
243 |
annotationSuccess = true |
|
242 | 244 |
} |
243 | 245 |
} |
244 | 246 |
|
... | ... | |
248 | 250 |
def c = new compiler(); |
249 | 251 |
if(debug) c.setDebug(); |
250 | 252 |
//c.setCwbPath("~/TXM/cwb/bin"); |
251 |
c.setOptions(textSortAttribute, normalizeMetadata);
|
|
253 |
c.setOptions(textSortAttribute, normalizeMetadata) |
|
252 | 254 |
c.setAnnotationSuccess(annotationSuccess) |
253 |
c.setLang(lang);
|
|
255 |
c.setLang(lang) |
|
254 | 256 |
if (!c.run(project, binDir, txmDir, corpusname, null, srcfiles, metadatas)) { |
255 |
println "import process stopped";
|
|
257 |
println "import process stopped" |
|
256 | 258 |
return; |
257 | 259 |
} |
258 | 260 |
|
259 | 261 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
260 | 262 |
|
261 |
new File(binDir,"HTML/$corpusname").deleteDir();
|
|
262 |
new File(binDir,"HTML/$corpusname").mkdirs();
|
|
263 |
new File(binDir,"HTML/$corpusname").deleteDir() |
|
264 |
new File(binDir,"HTML/$corpusname").mkdirs() |
|
263 | 265 |
if (build_edition) { |
264 | 266 |
|
265 | 267 |
println "-- EDITION - Building edition" |
266 |
if (MONITOR != null) MONITOR.worked(25, "EDITION")
|
|
268 |
if (MONITOR != null) { MONITOR.worked(25, "EDITION") }
|
|
267 | 269 |
|
268 |
File outdir = new File(binDir,"/HTML/$corpusname/default/");
|
|
269 |
outdir.mkdirs();
|
|
270 |
List<File> filelist = txmDir.listFiles();
|
|
271 |
Collections.sort(filelist);
|
|
270 |
File outdir = new File(binDir,"/HTML/$corpusname/default/") |
|
271 |
outdir.mkdirs() |
|
272 |
List<File> filelist = txmDir.listFiles() |
|
273 |
Collections.sort(filelist) |
|
272 | 274 |
def second = 0 |
273 | 275 |
|
274 | 276 |
println "Paginating "+filelist.size()+" texts" |
275 | 277 |
ConsoleProgressBar cpb = new ConsoleProgressBar(filelist.size()); |
276 | 278 |
for (File txmFile : filelist) { |
277 | 279 |
cpb.tick() |
278 |
String txtname = txmFile.getName();
|
|
279 |
int i = txtname.lastIndexOf(".");
|
|
280 |
if (i > 0) txtname = txtname.substring(0, i);
|
|
280 |
String txtname = txmFile.getName() |
|
281 |
int i = txtname.lastIndexOf(".") |
|
282 |
if (i > 0) { txtname = txtname.substring(0, i) }
|
|
281 | 283 |
|
282 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
|
|
283 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
|
|
284 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang) |
|
285 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang) |
|
284 | 286 |
|
285 |
Text t = new Text(project);
|
|
286 |
t.setName(txtname);
|
|
287 |
Text t = new Text(project) |
|
288 |
t.setName(txtname) |
|
287 | 289 |
t.setSourceFile(txmFile) |
288 | 290 |
t.setTXMFile(txmFile) |
289 | 291 |
|
290 | 292 |
def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element); |
291 |
Edition edition = new Edition(t);
|
|
292 |
edition.setName("default");
|
|
293 |
edition.setIndex(outdir.getAbsolutePath());
|
|
293 |
Edition edition = new Edition(t) |
|
294 |
edition.setName("default") |
|
295 |
edition.setIndex(outdir.getAbsolutePath()) |
|
294 | 296 |
|
295 | 297 |
for (i = 0 ; i < ed.getPageFiles().size();) { |
296 |
File f = ed.getPageFiles().get(i);
|
|
297 |
String wordid = "w_0";
|
|
298 |
if (i < ed.getIdx().size()) wordid = ed.getIdx().get(i);
|
|
299 |
edition.addPage(""+(++i), wordid);
|
|
298 |
File f = ed.getPageFiles().get(i) |
|
299 |
String wordid = "w_0" |
|
300 |
if (i < ed.getIdx().size()) { wordid = ed.getIdx().get(i) }
|
|
301 |
edition.addPage(""+(++i), wordid) |
|
300 | 302 |
} |
301 | 303 |
} |
302 | 304 |
cpb.done() |
303 | 305 |
} |
304 | 306 |
|
305 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
|
306 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING")
|
|
307 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done() } |
|
308 |
if (MONITOR != null) { MONITOR.worked(20, "FINALIZING") }
|
|
307 | 309 |
|
308 |
readyToLoad = project.save(); |
|
310 |
readyToLoad = project.save() |
tmp/org.txm.concordance.rcp/src/org/txm/concordance/rcp/editors/ConcordanceEditor.java (revision 2921) | ||
---|---|---|
2173 | 2173 |
float W = 1f + queryLabel.getSize().x / (float) queryLabel.getText().length(); |
2174 | 2174 |
// System.out.println("W= "+W+" Maxs "+refMax+ " " +leftMax+" " +keywordMax+" "+rightMax); |
2175 | 2175 |
|
2176 |
if (sash == null) return; // there was a problem during initialization |
|
2177 |
if (sash.getLayoutData() == null) return; // there was a problem during initialization |
|
2178 |
|
|
2176 | 2179 |
((FormData) sash.getLayoutData()).left = new FormAttachment(0, 25 + (int) (refMax * W)); |
2177 | 2180 |
referenceColumn.setWidth(2 * (int) (refMax * W)); |
2178 | 2181 |
getResultArea().layout(true, true); |
Formats disponibles : Unified diff