Révision 2288
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZPager.groovy (revision 2288) | ||
|---|---|---|
| 74 | 74 |
} |
| 75 | 75 |
|
| 76 | 76 |
def second = 0 |
| 77 |
def texts = module.getProject().getTexts() |
|
| 78 |
println "-- Building 'default' edition of ${texts.size()} texts..."
|
|
| 77 |
def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
|
|
| 78 |
def textsToProcess = texts.findAll() { text ->
|
|
| 79 |
File txmFile = text.getXMLTXMFile() |
|
| 80 |
File firstHTMLPageFile = new File(outputDirectory, text.getName()+"_1.html"); |
|
| 81 |
boolean mustBuild = false; |
|
| 82 |
if (!firstHTMLPageFile.exists() || txmFile.lastModified() >= firstHTMLPageFile.lastModified()) {
|
|
| 83 |
return true |
|
| 84 |
} |
|
| 85 |
|
|
| 86 |
if (!text.isDirty() && !mustBuild) {
|
|
| 87 |
Log.finer("skipping 'default html' step of $text");
|
|
| 88 |
return false |
|
| 89 |
} |
|
| 90 |
|
|
| 91 |
return true |
|
| 92 |
} |
|
| 93 |
println "-- Building 'default' edition of ${textsToProcess.size()}/${texts.size()} texts..."
|
|
| 79 | 94 |
|
| 80 | 95 |
def css = ["css/txm.css", "css/${corpusname}.css"] // default CSS inclusion
|
| 81 | 96 |
|
| ... | ... | |
| 90 | 105 |
} |
| 91 | 106 |
} |
| 92 | 107 |
|
| 93 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size()) |
|
| 94 |
for (Text text : texts) {
|
|
| 108 |
ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size())
|
|
| 109 |
for (Text text : textsToProcess) {
|
|
| 95 | 110 |
|
| 96 | 111 |
File txmFile = text.getXMLTXMFile() |
| 97 | 112 |
try {
|
| ... | ... | |
| 99 | 114 |
|
| 100 | 115 |
String textname = text.getName() |
| 101 | 116 |
|
| 102 |
File firstHTMLPageFile = new File(outputDirectory, textname+"_1.html"); |
|
| 103 |
boolean mustBuild = false; |
|
| 104 |
if (!firstHTMLPageFile.exists() || txmFile.lastModified() >= firstHTMLPageFile.lastModified()) {
|
|
| 105 |
mustBuild = true |
|
| 106 |
} |
|
| 107 |
|
|
| 108 |
if (!text.isDirty() && !mustBuild) {
|
|
| 109 |
Log.finer("skipping 'default html' step of $text");
|
|
| 110 |
continue |
|
| 111 |
} |
|
| 112 |
|
|
| 113 | 117 |
Edition edition = text.getEdition("default")
|
| 114 | 118 |
if (edition != null) {
|
| 115 | 119 |
edition.delete() |
| ... | ... | |
| 161 | 165 |
} |
| 162 | 166 |
|
| 163 | 167 |
// save changes |
| 164 |
println "" |
|
| 165 | 168 |
return true; |
| 166 | 169 |
} |
| 167 | 170 |
|
| ... | ... | |
| 183 | 186 |
|
| 184 | 187 |
def second = 0 |
| 185 | 188 |
|
| 186 |
def texts = module.getProject().getTexts()
|
|
| 189 |
def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
|
|
| 187 | 190 |
println "-- Building 'facs' edition of ${texts.size()} texts..."
|
| 188 | 191 |
File newEditionDirectory = new File(htmlDirectory, "facs"); |
| 189 | 192 |
newEditionDirectory.mkdir(); |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompilerStep.groovy (revision 2288) | ||
|---|---|---|
| 65 | 65 |
inputData = xmlFile.toURI().toURL().openStream(); |
| 66 | 66 |
factory = XMLInputFactory.newInstance(); |
| 67 | 67 |
parser = factory.createXMLStreamReader(inputData); |
| 68 |
|
|
| 69 | 68 |
} catch (Exception ex) {
|
| 70 | 69 |
System.err.println("Exception while parsing $xmlFile : "+ex);
|
| 71 | 70 |
} |
| ... | ... | |
| 97 | 96 |
*/ |
| 98 | 97 |
public boolean process() |
| 99 | 98 |
{
|
| 100 |
if (!createOutput(cqpFile)) |
|
| 99 |
if (!createOutput(cqpFile)) {
|
|
| 101 | 100 |
return false; |
| 102 |
|
|
| 101 |
} |
|
| 102 |
|
|
| 103 | 103 |
String headvalue="" |
| 104 | 104 |
String vAna = ""; |
| 105 | 105 |
String vForm = ""; |
| 106 | 106 |
String wordid= ""; |
| 107 | 107 |
String vHead = ""; |
| 108 | 108 |
|
| 109 |
|
|
| 110 | 109 |
int p_id = 0; |
| 111 | 110 |
int s_id = 0; |
| 112 | 111 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy (revision 2288) | ||
|---|---|---|
| 80 | 80 |
// get all anatypes |
| 81 | 81 |
sattrsListener = SAttributesListener.scanFiles(inputDirectory, wtag) |
| 82 | 82 |
def texts = module.getProject().getTexts() |
| 83 |
println "-- Listing structures&properties to create for "+texts.size()+" texts..."
|
|
| 83 |
println "-- Scanning structures&properties to create for "+texts.size()+" texts..."
|
|
| 84 | 84 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size()) |
| 85 | 85 |
for (Text t : texts) {
|
| 86 | 86 |
try {
|
| ... | ... | |
| 130 | 130 |
// anatypes << type |
| 131 | 131 |
} |
| 132 | 132 |
|
| 133 |
def cqpFiles = [] |
|
| 133 |
def cqpFiles = [] // ordered cqp files to concat before calling cwb-encode
|
|
| 134 | 134 |
int cqpFilesUpdated = 0; |
| 135 | 135 |
public boolean doCQPStep() {
|
| 136 |
println "-- Building CQP files $inputDirectory..." |
|
| 136 |
|
|
| 137 | 137 |
cqpDirectory.mkdir(); // if not created |
| 138 | 138 |
|
| 139 |
def texts = module.getProject().getTexts() |
|
| 140 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.size()) |
|
| 141 |
cqpFilesUpdated = 0; |
|
| 142 |
for (Text text : texts) {
|
|
| 143 |
cpb.tick(); |
|
| 144 |
|
|
| 139 |
def texts = orderedTextIDs.collect() { id -> module.getProject().getText(id) }
|
|
| 140 |
def textsToProcess = texts.findAll() { text ->
|
|
| 145 | 141 |
File xmlFile = text.getXMLTXMFile() |
| 146 | 142 |
String textname = text.getName() |
| 147 | 143 |
|
| 148 | 144 |
File cqpFile = new File(cqpDirectory, textname + ".cqp") |
| 149 |
cqpFiles << cqpFile |
|
| 150 |
|
|
| 145 |
cqpFiles << cqpFile // insert cqp files to concat later |
|
| 151 | 146 |
// skip step if cqpFile exists AND is more recent than the XML-TXM File |
| 152 | 147 |
boolean mustBuild = false; |
| 153 | 148 |
if (!cqpFile.exists() || xmlFile.lastModified() >= cqpFile.lastModified()) {
|
| 154 |
mustBuild = true
|
|
| 149 |
return true
|
|
| 155 | 150 |
} |
| 156 | 151 |
|
| 157 | 152 |
if (!text.isDirty() && !mustBuild) {
|
| 158 | 153 |
Log.finer("skipping .cqp step of $text");
|
| 159 |
continue
|
|
| 154 |
return false
|
|
| 160 | 155 |
} |
| 161 | 156 |
|
| 157 |
return true |
|
| 158 |
} |
|
| 159 |
println "-- Building CQP files ${textsToProcess.size()}/${texts.size()}..."
|
|
| 160 |
|
|
| 161 |
ConsoleProgressBar cpb = new ConsoleProgressBar(textsToProcess.size()) |
|
| 162 |
cqpFilesUpdated = 0; |
|
| 163 |
for (Text text : textsToProcess) {
|
|
| 164 |
cpb.tick(); |
|
| 165 |
|
|
| 166 |
File xmlFile = text.getXMLTXMFile() |
|
| 167 |
String textname = text.getName() |
|
| 168 |
|
|
| 169 |
File cqpFile = new File(cqpDirectory, textname + ".cqp") |
|
| 170 |
|
|
| 162 | 171 |
cqpFilesUpdated++ |
| 163 | 172 |
|
| 164 | 173 |
XTZCompilerStep step = new XTZCompilerStep(xmlFile, cqpFile, textname, corpusname, "default", anatypes, wtag) |
| ... | ... | |
| 180 | 189 |
|
| 181 | 190 |
List<String> pargs = [] |
| 182 | 191 |
pargs.add("id")
|
| 183 |
for (String ana : anatypes) |
|
| 192 |
for (String ana : anatypes) {
|
|
| 184 | 193 |
pargs.add(ana) |
| 185 |
|
|
| 194 |
} |
|
| 195 |
|
|
| 186 | 196 |
String[] pAttrs = pargs |
| 187 | 197 |
|
| 188 | 198 |
def structs = sattrsListener.getStructs() |
| ... | ... | |
| 203 | 213 |
} |
| 204 | 214 |
|
| 205 | 215 |
String concat = name+":"+structsProf.get(name); // append the depth |
| 206 |
for (String attributeName : structs.get(name)) // append the attributes |
|
| 216 |
for (String attributeName : structs.get(name)) { // append the attributes
|
|
| 207 | 217 |
concat += "+"+attributeName.toLowerCase(); |
| 208 |
|
|
| 218 |
} |
|
| 219 |
|
|
| 209 | 220 |
if (structs.get(name).size() == 0) {
|
| 210 | 221 |
concat += "+n"; |
| 211 | 222 |
} else {
|
| 212 |
if (!structs.get(name).contains("n"))
|
|
| 223 |
if (!structs.get(name).contains("n")) {
|
|
| 213 | 224 |
concat += "+n" |
| 225 |
} |
|
| 214 | 226 |
} |
| 215 | 227 |
|
| 216 | 228 |
if ((name == "p" || name == "body" || name == "back" || name == "front") |
| 217 |
&& !concat.contains("+n+") && !concat.endsWith("+n"))
|
|
| 229 |
&& !concat.contains("+n+") && !concat.endsWith("+n")) {
|
|
| 218 | 230 |
concat += "+n" |
| 219 |
|
|
| 231 |
} |
|
| 220 | 232 |
sargs.add(concat) |
| 221 | 233 |
} |
| 222 | 234 |
|
| 223 | 235 |
String textSAttributes = "text:0+id+base+project"; |
| 224 | 236 |
for (String name : tmpTextAttrs) {
|
| 225 |
if (!("id".equals(name) || "base".equals(name) || "project".equals(name)))
|
|
| 237 |
if (!("id".equals(name) || "base".equals(name) || "project".equals(name))) {
|
|
| 226 | 238 |
textSAttributes += "+"+name.toLowerCase() |
| 239 |
} |
|
| 227 | 240 |
} |
| 228 | 241 |
|
| 229 | 242 |
sargs.add(textSAttributes) |
| ... | ... | |
| 233 | 246 |
|
| 234 | 247 |
String[] sAttributes = sargs |
| 235 | 248 |
String[] pAttributes = pAttrs |
| 236 |
println " Word properties: "+pAttributes |
|
| 237 |
println " Structures: "+sargs |
|
| 249 |
println " Word properties: "+pAttributes.join(', ')
|
|
| 250 |
println " Structures: "+sargs.join(', ')
|
|
| 238 | 251 |
File allcqpFile = new File(cqpDirectory, "all.cqp"); |
| 239 | 252 |
allcqpFile.delete() |
| 240 | 253 |
try {
|
| ... | ... | |
| 244 | 257 |
} |
| 245 | 258 |
|
| 246 | 259 |
if (!cwbEn.run(outputDirectory.getAbsolutePath() + "/$corpusname", |
| 247 |
allcqpFile.getAbsolutePath(), |
|
| 248 |
regPath, pAttributes, sAttributes, false)) {
|
|
| 260 |
allcqpFile.getAbsolutePath(), regPath, pAttributes, sAttributes, false)) {
|
|
| 249 | 261 |
println "** cwb-encode did not ends well. Activate finer logs to see details." |
| 250 | 262 |
return false; |
| 251 | 263 |
} |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImport.groovy (revision 2288) | ||
|---|---|---|
| 8 | 8 |
|
| 9 | 9 |
import org.eclipse.core.runtime.IProgressMonitor; |
| 10 | 10 |
import org.txm.utils.xml.DomUtils; |
| 11 |
import org.txm.core.preferences.TBXPreferences; |
|
| 11 | 12 |
import org.txm.metadatas.Metadatas |
| 12 | 13 |
import org.txm.utils.io.FileCopy; |
| 13 | 14 |
import org.txm.* |
| ... | ... | |
| 43 | 44 |
return super.getTXMFilesOrder(); |
| 44 | 45 |
} |
| 45 | 46 |
Metadatas metadata = importer.getMetadata(); |
| 47 |
if (metadata == null) { // if metadata was not built, try building it
|
|
| 48 |
File allMetadataFile = Metadatas.findMetadataFile(binaryDirectory); |
|
| 49 |
if (allMetadataFile.exists()) {
|
|
| 50 |
metadata = new Metadatas(allMetadataFile, |
|
| 51 |
Toolbox.getPreference(TBXPreferences.METADATA_ENCODING), |
|
| 52 |
Toolbox.getPreference(TBXPreferences.METADATA_COLSEPARATOR), |
|
| 53 |
Toolbox.getPreference(TBXPreferences.METADATA_TXTSEPARATOR), 1) |
|
| 54 |
} |
|
| 55 |
} |
|
| 56 |
|
|
| 46 | 57 |
if (metadata == null) {
|
| 47 | 58 |
println "no metadata, using default text order" |
| 48 | 59 |
return super.getTXMFilesOrder(); |
| 49 | 60 |
} |
| 61 |
|
|
| 50 | 62 |
File txmDirectory = new File(binaryDirectory, "txm/"+corpusName); |
| 51 | 63 |
ArrayList<File> files = new ArrayList<File>(Arrays.asList(txmDirectory.listFiles(new FileFilter() {
|
| 52 | 64 |
@Override |
| ... | ... | |
| 61 | 73 |
def ti = metadata.get(t) |
| 62 | 74 |
for (org.txm.metadatas.Entry e : ti) {
|
| 63 | 75 |
if ("textorder".equals(e.getId())) {
|
| 64 |
textorder[t] = ti.get("textorder")
|
|
| 76 |
textorder[t] = ti.value()
|
|
| 65 | 77 |
} |
| 66 | 78 |
} |
| 67 | 79 |
} |
| ... | ... | |
| 86 | 98 |
} |
| 87 | 99 |
}); |
| 88 | 100 |
//println files |
| 89 |
return files;
|
|
| 101 |
return texts;
|
|
| 90 | 102 |
} |
| 91 | 103 |
|
| 92 | 104 |
public void start() throws InterruptedException {
|
Formats disponibles : Unified diff