Révision 2288
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZPager.groovy (revision 2288) | ||
---|---|---|
74 | 74 |
} |
75 | 75 |
|
76 | 76 |
def second = 0 |
77 |
def texts = module.getProject().getTexts() |
|
78 |
println "-- Building 'default' edition of ${texts.size()} texts..." |
|
77 |
def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) } |
|
78 |
def textsToProcess = texts.findAll() { text -> |
|
79 |
File txmFile = text.getXMLTXMFile() |
|
80 |
File firstHTMLPageFile = new File(outputDirectory, text.getName()+"_1.html"); |
|
81 |
boolean mustBuild = false; |
|
82 |
if (!firstHTMLPageFile.exists() || txmFile.lastModified() >= firstHTMLPageFile.lastModified()) { |
|
83 |
return true |
|
84 |
} |
|
85 |
|
|
86 |
if (!text.isDirty() && !mustBuild) { |
|
87 |
Log.finer("skipping 'default html' step of $text"); |
|
88 |
return false |
|
89 |
} |
|
90 |
|
|
91 |
return true |
|
92 |
} |
|
93 |
println "-- Building 'default' edition of ${textsToProcess.size()}/${texts.size()} texts..." |
|
79 | 94 |
|
80 | 95 |
def css = ["css/txm.css", "css/${corpusname}.css"] // default CSS inclusion |
81 | 96 |
|
... | ... | |
90 | 105 |
} |
91 | 106 |
} |
92 | 107 |
|
93 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size()) |
|
94 |
for (Text text : texts) { |
|
108 |
ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size())
|
|
109 |
for (Text text : textsToProcess) {
|
|
95 | 110 |
|
96 | 111 |
File txmFile = text.getXMLTXMFile() |
97 | 112 |
try { |
... | ... | |
99 | 114 |
|
100 | 115 |
String textname = text.getName() |
101 | 116 |
|
102 |
File firstHTMLPageFile = new File(outputDirectory, textname+"_1.html"); |
|
103 |
boolean mustBuild = false; |
|
104 |
if (!firstHTMLPageFile.exists() || txmFile.lastModified() >= firstHTMLPageFile.lastModified()) { |
|
105 |
mustBuild = true |
|
106 |
} |
|
107 |
|
|
108 |
if (!text.isDirty() && !mustBuild) { |
|
109 |
Log.finer("skipping 'default html' step of $text"); |
|
110 |
continue |
|
111 |
} |
|
112 |
|
|
113 | 117 |
Edition edition = text.getEdition("default") |
114 | 118 |
if (edition != null) { |
115 | 119 |
edition.delete() |
... | ... | |
161 | 165 |
} |
162 | 166 |
|
163 | 167 |
// save changes |
164 |
println "" |
|
165 | 168 |
return true; |
166 | 169 |
} |
167 | 170 |
|
... | ... | |
183 | 186 |
|
184 | 187 |
def second = 0 |
185 | 188 |
|
186 |
def texts = module.getProject().getTexts()
|
|
189 |
def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
|
|
187 | 190 |
println "-- Building 'facs' edition of ${texts.size()} texts..." |
188 | 191 |
File newEditionDirectory = new File(htmlDirectory, "facs"); |
189 | 192 |
newEditionDirectory.mkdir(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompilerStep.groovy (revision 2288) | ||
---|---|---|
65 | 65 |
inputData = xmlFile.toURI().toURL().openStream(); |
66 | 66 |
factory = XMLInputFactory.newInstance(); |
67 | 67 |
parser = factory.createXMLStreamReader(inputData); |
68 |
|
|
69 | 68 |
} catch (Exception ex) { |
70 | 69 |
System.err.println("Exception while parsing $xmlFile : "+ex); |
71 | 70 |
} |
... | ... | |
97 | 96 |
*/ |
98 | 97 |
public boolean process() |
99 | 98 |
{ |
100 |
if (!createOutput(cqpFile)) |
|
99 |
if (!createOutput(cqpFile)) {
|
|
101 | 100 |
return false; |
102 |
|
|
101 |
} |
|
102 |
|
|
103 | 103 |
String headvalue="" |
104 | 104 |
String vAna = ""; |
105 | 105 |
String vForm = ""; |
106 | 106 |
String wordid= ""; |
107 | 107 |
String vHead = ""; |
108 | 108 |
|
109 |
|
|
110 | 109 |
int p_id = 0; |
111 | 110 |
int s_id = 0; |
112 | 111 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy (revision 2288) | ||
---|---|---|
80 | 80 |
// get all anatypes |
81 | 81 |
sattrsListener = SAttributesListener.scanFiles(inputDirectory, wtag) |
82 | 82 |
def texts = module.getProject().getTexts() |
83 |
println "-- Listing structures&properties to create for "+texts.size()+" texts..."
|
|
83 |
println "-- Scanning structures&properties to create for "+texts.size()+" texts..."
|
|
84 | 84 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size()) |
85 | 85 |
for (Text t : texts) { |
86 | 86 |
try { |
... | ... | |
130 | 130 |
// anatypes << type |
131 | 131 |
} |
132 | 132 |
|
133 |
def cqpFiles = [] |
|
133 |
def cqpFiles = [] // ordered cqp files to concat before calling cwb-encode
|
|
134 | 134 |
int cqpFilesUpdated = 0; |
135 | 135 |
public boolean doCQPStep() { |
136 |
println "-- Building CQP files $inputDirectory..." |
|
136 |
|
|
137 | 137 |
cqpDirectory.mkdir(); // if not created |
138 | 138 |
|
139 |
def texts = module.getProject().getTexts() |
|
140 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size()) |
|
141 |
cqpFilesUpdated = 0; |
|
142 |
for (Text text : texts) { |
|
143 |
cpb.tick(); |
|
144 |
|
|
139 |
def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) } |
|
140 |
def textsToProcess = texts.findAll() { text -> |
|
145 | 141 |
File xmlFile = text.getXMLTXMFile() |
146 | 142 |
String textname = text.getName() |
147 | 143 |
|
148 | 144 |
File cqpFile = new File(cqpDirectory, textname + ".cqp") |
149 |
cqpFiles << cqpFile |
|
150 |
|
|
145 |
cqpFiles << cqpFile // insert cqp files to concat later |
|
151 | 146 |
// skip step if cqpFile exists AND is more recent than the XML-TXM File |
152 | 147 |
boolean mustBuild = false; |
153 | 148 |
if (!cqpFile.exists() || xmlFile.lastModified() >= cqpFile.lastModified()) { |
154 |
mustBuild = true
|
|
149 |
return true
|
|
155 | 150 |
} |
156 | 151 |
|
157 | 152 |
if (!text.isDirty() && !mustBuild) { |
158 | 153 |
Log.finer("skipping .cqp step of $text"); |
159 |
continue
|
|
154 |
return false
|
|
160 | 155 |
} |
161 | 156 |
|
157 |
return true |
|
158 |
} |
|
159 |
println "-- Building CQP files ${textsToProcess.size()}/${texts.size()}..." |
|
160 |
|
|
161 |
ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size()) |
|
162 |
cqpFilesUpdated = 0; |
|
163 |
for (Text text : textsToProcess) { |
|
164 |
cpb.tick(); |
|
165 |
|
|
166 |
File xmlFile = text.getXMLTXMFile() |
|
167 |
String textname = text.getName() |
|
168 |
|
|
169 |
File cqpFile = new File(cqpDirectory, textname + ".cqp") |
|
170 |
|
|
162 | 171 |
cqpFilesUpdated++ |
163 | 172 |
|
164 | 173 |
XTZCompilerStep step = new XTZCompilerStep(xmlFile, cqpFile, textname, corpusname, "default", anatypes, wtag) |
... | ... | |
180 | 189 |
|
181 | 190 |
List<String> pargs = [] |
182 | 191 |
pargs.add("id") |
183 |
for (String ana : anatypes) |
|
192 |
for (String ana : anatypes) {
|
|
184 | 193 |
pargs.add(ana) |
185 |
|
|
194 |
} |
|
195 |
|
|
186 | 196 |
String[] pAttrs = pargs |
187 | 197 |
|
188 | 198 |
def structs = sattrsListener.getStructs() |
... | ... | |
203 | 213 |
} |
204 | 214 |
|
205 | 215 |
String concat = name+":"+structsProf.get(name); // append the depth |
206 |
for (String attributeName : structs.get(name)) // append the attributes |
|
216 |
for (String attributeName : structs.get(name)) { // append the attributes
|
|
207 | 217 |
concat += "+"+attributeName.toLowerCase(); |
208 |
|
|
218 |
} |
|
219 |
|
|
209 | 220 |
if (structs.get(name).size() == 0) { |
210 | 221 |
concat += "+n"; |
211 | 222 |
} else { |
212 |
if (!structs.get(name).contains("n")) |
|
223 |
if (!structs.get(name).contains("n")) {
|
|
213 | 224 |
concat += "+n" |
225 |
} |
|
214 | 226 |
} |
215 | 227 |
|
216 | 228 |
if ((name == "p" || name == "body" || name == "back" || name == "front") |
217 |
&& !concat.contains("+n+") && !concat.endsWith("+n"))
|
|
229 |
&& !concat.contains("+n+") && !concat.endsWith("+n")) {
|
|
218 | 230 |
concat += "+n" |
219 |
|
|
231 |
} |
|
220 | 232 |
sargs.add(concat) |
221 | 233 |
} |
222 | 234 |
|
223 | 235 |
String textSAttributes = "text:0+id+base+project"; |
224 | 236 |
for (String name : tmpTextAttrs) { |
225 |
if (!("id".equals(name) || "base".equals(name) || "project".equals(name))) |
|
237 |
if (!("id".equals(name) || "base".equals(name) || "project".equals(name))) {
|
|
226 | 238 |
textSAttributes += "+"+name.toLowerCase() |
239 |
} |
|
227 | 240 |
} |
228 | 241 |
|
229 | 242 |
sargs.add(textSAttributes) |
... | ... | |
233 | 246 |
|
234 | 247 |
String[] sAttributes = sargs |
235 | 248 |
String[] pAttributes = pAttrs |
236 |
println " Word properties: "+pAttributes |
|
237 |
println " Structures: "+sargs |
|
249 |
println " Word properties: "+pAttributes.join(', ')
|
|
250 |
println " Structures: "+sargs.join(', ')
|
|
238 | 251 |
File allcqpFile = new File(cqpDirectory, "all.cqp"); |
239 | 252 |
allcqpFile.delete() |
240 | 253 |
try { |
... | ... | |
244 | 257 |
} |
245 | 258 |
|
246 | 259 |
if (!cwbEn.run(outputDirectory.getAbsolutePath() + "/$corpusname", |
247 |
allcqpFile.getAbsolutePath(), |
|
248 |
regPath, pAttributes, sAttributes, false)) { |
|
260 |
allcqpFile.getAbsolutePath(), regPath, pAttributes, sAttributes, false)) { |
|
249 | 261 |
println "** cwb-encode did not ends well. Activate finer logs to see details." |
250 | 262 |
return false; |
251 | 263 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImport.groovy (revision 2288) | ||
---|---|---|
8 | 8 |
|
9 | 9 |
import org.eclipse.core.runtime.IProgressMonitor; |
10 | 10 |
import org.txm.utils.xml.DomUtils; |
11 |
import org.txm.core.preferences.TBXPreferences; |
|
11 | 12 |
import org.txm.metadatas.Metadatas |
12 | 13 |
import org.txm.utils.io.FileCopy; |
13 | 14 |
import org.txm.* |
... | ... | |
43 | 44 |
return super.getTXMFilesOrder(); |
44 | 45 |
} |
45 | 46 |
Metadatas metadata = importer.getMetadata(); |
47 |
if (metadata == null) { // if metadata was not built, try building it |
|
48 |
File allMetadataFile = Metadatas.findMetadataFile(binaryDirectory); |
|
49 |
if (allMetadataFile.exists()) { |
|
50 |
metadata = new Metadatas(allMetadataFile, |
|
51 |
Toolbox.getPreference(TBXPreferences.METADATA_ENCODING), |
|
52 |
Toolbox.getPreference(TBXPreferences.METADATA_COLSEPARATOR), |
|
53 |
Toolbox.getPreference(TBXPreferences.METADATA_TXTSEPARATOR), 1) |
|
54 |
} |
|
55 |
} |
|
56 |
|
|
46 | 57 |
if (metadata == null) { |
47 | 58 |
println "no metadata, using default text order" |
48 | 59 |
return super.getTXMFilesOrder(); |
49 | 60 |
} |
61 |
|
|
50 | 62 |
File txmDirectory = new File(binaryDirectory, "txm/"+corpusName); |
51 | 63 |
ArrayList<File> files = new ArrayList<File>(Arrays.asList(txmDirectory.listFiles(new FileFilter() { |
52 | 64 |
@Override |
... | ... | |
61 | 73 |
def ti = metadata.get(t) |
62 | 74 |
for (org.txm.metadatas.Entry e : ti) { |
63 | 75 |
if ("textorder".equals(e.getId())) { |
64 |
textorder[t] = ti.get("textorder")
|
|
76 |
textorder[t] = ti.value()
|
|
65 | 77 |
} |
66 | 78 |
} |
67 | 79 |
} |
... | ... | |
86 | 98 |
} |
87 | 99 |
}); |
88 | 100 |
//println files |
89 |
return files;
|
|
101 |
return texts;
|
|
90 | 102 |
} |
91 | 103 |
|
92 | 104 |
public void start() throws InterruptedException { |
Formats disponibles : Unified diff