Révision 2031
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/BuildDemocratCorpusVersion1Macro.groovy (revision 2031) | ||
|---|---|---|
| 88 | 88 |
return; |
| 89 | 89 |
} |
| 90 | 90 |
|
| 91 |
if (!header.contains("txm_file")) {
|
|
| 92 |
println "** 'txm_file' column not found in $metadataFile header=$header" |
|
| 93 |
return; |
|
| 94 |
} |
|
| 95 |
|
|
| 91 | 96 |
def corpora = [:] |
| 92 | 97 |
for (Project project : Workspace.getInstance().getProjects()) {
|
| 93 | 98 |
for (MainCorpus corpus : project.getChildren(MainCorpus.class)) {
|
| ... | ... | |
| 98 | 103 |
while (csvReader.readRecord()) {
|
| 99 | 104 |
String text_id = csvReader.get("text_id")
|
| 100 | 105 |
if (text_id == null || text_id.length() == 0) {
|
| 101 |
println "** @id not found for record="+csvReader.getRawRecord() |
|
| 106 |
println "** @text_id not found for record="+csvReader.getRawRecord()
|
|
| 102 | 107 |
continue; |
| 103 | 108 |
} |
| 104 | 109 |
println "text: "+text_id |
| 105 | 110 |
|
| 106 | 111 |
String corpus_id = csvReader.get("corpus_id")
|
| 107 | 112 |
if (corpus_id == null || corpus_id.length() == 0) {
|
| 108 |
println " ** @corpus_id <- @text_id=$text_id"
|
|
| 109 |
corpus_id = text_id; |
|
| 113 |
println " ** @corpus_id <- @text_id="+text_id.toUpperCase()
|
|
| 114 |
corpus_id = text_id.toUpperCase();
|
|
| 110 | 115 |
} |
| 111 | 116 |
|
| 117 |
String txm_file = csvReader.get("txm_file")
|
|
| 118 |
if (txm_file == null || txm_file.length() == 0) {
|
|
| 119 |
println " ** @txm_file <- @corpus_id=${corpus_id}.txm"
|
|
| 120 |
txm_file = corpus_id.toUpperCase()+".txm"; |
|
| 121 |
} |
|
| 122 |
|
|
| 112 | 123 |
MainCorpus corpus = corpora[corpus_id] |
| 113 | 124 |
Project project = null |
| 114 | 125 |
if (corpus == null) {
|
| 115 |
File binCorpusFile = new File(inputDirectory, corpus_id+".txm")
|
|
| 126 |
File binCorpusFile = new File(inputDirectory, txm_file)
|
|
| 116 | 127 |
if (!binCorpusFile.exists()) {
|
| 117 | 128 |
println " ** no corpus binary file found for ID=$corpus_id : $binCorpusFile" |
| 118 | 129 |
continue; |
Formats disponibles : Unified diff