Révision 2031
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/BuildDemocratCorpusVersion1Macro.groovy (revision 2031) | ||
---|---|---|
88 | 88 |
return; |
89 | 89 |
} |
90 | 90 |
|
91 |
if (!header.contains("txm_file")) { |
|
92 |
println "** 'txm_file' column not found in $metadataFile header=$header" |
|
93 |
return; |
|
94 |
} |
|
95 |
|
|
91 | 96 |
def corpora = [:] |
92 | 97 |
for (Project project : Workspace.getInstance().getProjects()) { |
93 | 98 |
for (MainCorpus corpus : project.getChildren(MainCorpus.class)) { |
... | ... | |
98 | 103 |
while (csvReader.readRecord()) { |
99 | 104 |
String text_id = csvReader.get("text_id") |
100 | 105 |
if (text_id == null || text_id.length() == 0) { |
101 |
println "** @id not found for record="+csvReader.getRawRecord() |
|
106 |
println "** @text_id not found for record="+csvReader.getRawRecord()
|
|
102 | 107 |
continue; |
103 | 108 |
} |
104 | 109 |
println "text: "+text_id |
105 | 110 |
|
106 | 111 |
String corpus_id = csvReader.get("corpus_id") |
107 | 112 |
if (corpus_id == null || corpus_id.length() == 0) { |
108 |
println " ** @corpus_id <- @text_id=$text_id"
|
|
109 |
corpus_id = text_id; |
|
113 |
println " ** @corpus_id <- @text_id="+text_id.toUpperCase()
|
|
114 |
corpus_id = text_id.toUpperCase();
|
|
110 | 115 |
} |
111 | 116 |
|
117 |
String txm_file = csvReader.get("txm_file") |
|
118 |
if (txm_file == null || txm_file.length() == 0) { |
|
119 |
println " ** @txm_file <- @corpus_id=${corpus_id}.txm" |
|
120 |
txm_file = corpus_id.toUpperCase()+".txm"; |
|
121 |
} |
|
122 |
|
|
112 | 123 |
MainCorpus corpus = corpora[corpus_id] |
113 | 124 |
Project project = null |
114 | 125 |
if (corpus == null) { |
115 |
File binCorpusFile = new File(inputDirectory, corpus_id+".txm")
|
|
126 |
File binCorpusFile = new File(inputDirectory, txm_file)
|
|
116 | 127 |
if (!binCorpusFile.exists()) { |
117 | 128 |
println " ** no corpus binary file found for ID=$corpus_id : $binCorpusFile" |
118 | 129 |
continue; |
Formats disponibles : Unified diff