Révision 2942
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/AddSectionsFromTableMacro.groovy (revision 2942) | ||
---|---|---|
1 |
package org.txm.macro.transcription |
|
2 |
|
|
3 |
import java.nio.charset.Charset |
|
4 |
|
|
5 |
import java.time.LocalTime |
|
6 |
import java.time.format.DateTimeFormatter |
|
7 |
import org.txm.utils.* |
|
8 |
|
|
9 |
@Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="") |
|
10 |
File metadataFile; |
|
11 |
|
|
12 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="") |
|
13 |
File trsDirectory; |
|
14 |
|
|
15 |
@Field @Option(name="joinTRSColumn", usage="Colonne de jointure de transcription", widget="String", required=true, def="Lien notice principale") |
|
16 |
def joinTRSColumn |
|
17 |
|
|
18 |
@Field @Option(name="startTimeColumn", usage="Colonne de timing de début de section", widget="String", required=true, def="antract_debut") |
|
19 |
def startTimeColumn = "antract_debut" |
|
20 |
|
|
21 |
@Field @Option(name="endTimeColumn", usage="Colonne de timing de fin de section", widget="String", required=true, def="antract_fin") |
|
22 |
def endTimeColumn = "antract_fin" |
|
23 |
|
|
24 |
@Field @Option(name="typeColumns", usage="Colonnes des métadonnées de type de section", widget="String", required=true, def="Titre propre") |
|
25 |
def typeColumns |
|
26 |
|
|
27 |
@Field @Option(name="topicColumns", usage="Colonnes des métadonnées de topic de section", widget="String", required=true, def="Date de diffusion") |
|
28 |
def topicColumns |
|
29 |
|
|
30 |
@Field @Option(name="metadataColumns", usage="Colonnes de metadonnées de section", widget="String", required=true, def="Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)") |
|
31 |
def metadataColumns |
|
32 |
|
|
33 |
@Field @Option(name="metadataColumnsGroups", usage="Colonnes des gruopes de metadonnées de section", widget="String", required=true, def="metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text") |
|
34 |
def metadataColumnsGroups |
|
35 |
|
|
36 |
if (!ParametersDialog.open(this)) return; |
|
37 |
typeColumns = typeColumns.split(";") |
|
38 |
topicColumns = topicColumns.split(";") |
|
39 |
metadataColumns = metadataColumns.split(";") |
|
40 |
metadataColumnsGroups = metadataColumnsGroups.split(";") |
|
41 |
|
|
42 |
if (metadataColumns.size() != metadataColumnsGroups.size()) { |
|
43 |
println "ERROR in metadata declarations&groups:" |
|
44 |
println "COLUMNS: "+metadataColumns |
|
45 |
println "GROUPS : "+metadataColumnsGroups |
|
46 |
return |
|
47 |
} |
|
48 |
|
|
49 |
if (!trsDirectory.exists()) { |
|
50 |
println "$trsDirectory not found" |
|
51 |
return |
|
52 |
} |
|
53 |
|
|
54 |
println "Loading data from $metadataFile..." |
|
55 |
TableReader reader = new TableReader(metadataFile)//, "\t".charAt(0), Charset.forName("UTF-8") |
|
56 |
reader.readHeaders() |
|
57 |
def header = reader.getHeaders() |
|
58 |
if (!header.contains(joinTRSColumn)) { |
|
59 |
println "No TRS ID $joinTRSColumn column found" |
|
60 |
return |
|
61 |
} |
|
62 |
if (!header.contains(startTimeColumn)) { |
|
63 |
println "No start time $startTimeColumn column found" |
|
64 |
return |
|
65 |
} |
|
66 |
if (!header.contains(endTimeColumn)) { |
|
67 |
println "No end time $endTimeColumn column found" |
|
68 |
return |
|
69 |
} |
|
70 |
for (def col : metadataColumns) { |
|
71 |
if (!header.contains(endTimeColumn)) { |
|
72 |
println "No $col column found" |
|
73 |
return |
|
74 |
} |
|
75 |
} |
|
76 |
for (def col : typeColumns) { |
|
77 |
if (!header.contains(endTimeColumn)) { |
|
78 |
println "No type $col column found" |
|
79 |
return |
|
80 |
} |
|
81 |
} |
|
82 |
for (def col : topicColumns) { |
|
83 |
if (!header.contains(endTimeColumn)) { |
|
84 |
println "No topic $col column found" |
|
85 |
return |
|
86 |
} |
|
87 |
} |
|
88 |
|
|
89 |
File outputDirectory = new File(trsDirectory, "out") |
|
90 |
println "Writing result to $outputDirectory..." |
|
91 |
|
|
92 |
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME |
|
93 |
def strTotime(def str) { |
|
94 |
if (str.lastIndexOf(":") == -1) { |
|
95 |
return null |
|
96 |
} |
|
97 |
|
|
98 |
bonusFrame = Integer.parseInt(str.substring(str.lastIndexOf(":")+1)) |
|
99 |
//if (str.contains("135475")) println "ERROR $str in $infos -> $bonusFrame" |
|
100 |
if (bonusFrame > 25) { |
|
101 |
bonusFrame=0; |
|
102 |
} |
|
103 |
totalFrame = str.substring(0, str.lastIndexOf(":")) |
|
104 |
|
|
105 |
LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter) |
|
106 |
totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond() |
|
107 |
|
|
108 |
def ret = totalFrame + (bonusFrame/25) |
|
109 |
return ret |
|
110 |
} |
|
111 |
|
|
112 |
def sectionGroupsToInsert = [:] |
|
113 |
println "Reading data..." |
|
114 |
while (reader.readRecord()) { |
|
115 |
String id = reader.get(joinTRSColumn).trim() |
|
116 |
if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4) |
|
117 |
if (id.length() == 0) continue; |
|
118 |
|
|
119 |
if (!sectionGroupsToInsert.containsKey(id)) { |
|
120 |
sectionGroupsToInsert[id] = [] |
|
121 |
} |
|
122 |
def section = sectionGroupsToInsert[id] |
|
123 |
|
|
124 |
if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections |
|
125 |
|
|
126 |
def m = [:] |
|
127 |
|
|
128 |
for (def todo : ["topic":topicColumns, "type":typeColumns]) { |
|
129 |
def data = [] |
|
130 |
for (def col : todo.value) { |
|
131 |
if (reader.get(col).trim().length() > 0) { |
|
132 |
data << reader.get(col).trim().replace("\n", "") |
|
133 |
} |
|
134 |
} |
|
135 |
m[todo.key] = data.join("\t") |
|
136 |
} |
|
137 |
def metadataList = [] |
|
138 |
def metadataGroupList = [] |
|
139 |
for (int i = 0 ; i < metadataColumns.size() ; i++) { |
|
140 |
def col = metadataColumns[i] |
|
141 |
String c = AsciiUtils.buildAttributeId(col) |
|
142 |
m[c] = reader.get(col) |
|
143 |
metadataList << c |
|
144 |
metadataGroupList << metadataColumnsGroups[i] |
|
145 |
} |
|
146 |
m["metadata"] = metadataList.join("|") |
|
147 |
m["metadata_groups"] = metadataGroupList.join("|") |
|
148 |
|
|
149 |
m["startTime"] = strTotime(reader.get(startTimeColumn)) |
|
150 |
m["endTime"] = strTotime(reader.get(endTimeColumn)) |
|
151 |
m["synchronized"] = "true" |
|
152 |
|
|
153 |
section << [m["startTime"], m["endTime"], m] |
|
154 |
} |
|
155 |
} |
|
156 |
|
|
157 |
println "Inserting sections... "+sectionGroupsToInsert.size() |
|
158 |
|
|
159 |
ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size()) |
|
160 |
for (String id : sectionGroupsToInsert.keySet()) { |
|
161 |
cpb.tick() |
|
162 |
File trsFile = new File(trsDirectory, id+".trs") |
|
163 |
if (!trsFile.exists()) { |
|
164 |
continue |
|
165 |
} |
|
166 |
//println "Processing $id..." |
|
167 |
def sections = sectionGroupsToInsert[id] |
|
168 |
sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> b[1] } |
|
169 |
|
|
170 |
// Open input file |
|
171 |
def slurper = new groovy.util.XmlParser(false, true, true); |
|
172 |
slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration |
|
173 |
slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file |
|
174 |
def trs = slurper.parse(trsFile.toURI().toString()) |
|
175 |
def trsEpisodes = trs.Episode // 1 |
|
176 |
if (trsEpisodes.size() > 1) { |
|
177 |
println "multiple Episode node in $trsFile" |
|
178 |
continue |
|
179 |
} |
|
180 |
def trsEpisode = trsEpisodes[0] |
|
181 |
def trsSections = trs.Episode.Section // 1 |
|
182 |
if (trsSections.size() > 1) { |
|
183 |
println "multiple Section node in $trsFile" |
|
184 |
continue |
|
185 |
} |
|
186 |
def trsSection = trsSections[0] |
|
187 |
|
|
188 |
def turns = trsSection.Turn |
|
189 |
def newSections = [] |
|
190 |
def iSection = 0; |
|
191 |
def currentSection = null |
|
192 |
def currentNode = null |
|
193 |
|
|
194 |
for (def turn : turns) { |
|
195 |
def start = Float.parseFloat(turn.@startTime) |
|
196 |
def end = Float.parseFloat(turn.@endTime) |
|
197 |
|
|
198 |
def found = null; |
|
199 |
for (int i = iSection ; i < sections.size() ; i++) { |
|
200 |
if (end < sections[i][0]) { // Turn is before section |
|
201 |
|
|
202 |
} else if (sections[i][1] < start) { // Turn is before section |
|
203 |
|
|
204 |
} else { |
|
205 |
found = sections[i] |
|
206 |
iSection = i |
|
207 |
break; // stop searching and set iSection to accelerate next search |
|
208 |
} |
|
209 |
} |
|
210 |
|
|
211 |
if (found == null) { |
|
212 |
if (currentSection != null || currentNode == null) { |
|
213 |
currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] ) |
|
214 |
currentSection = null; |
|
215 |
} |
|
216 |
} else { |
|
217 |
if (found != currentSection) { |
|
218 |
if (currentNode != null && currentNode.@synchronized == "false") { |
|
219 |
def tmp = currentNode.Turn |
|
220 |
currentNode.@endTime = tmp[-1].@endTime |
|
221 |
} |
|
222 |
|
|
223 |
currentSection = found |
|
224 |
currentNode = new Node(trsEpisode, "Section", currentSection[2]) |
|
225 |
} |
|
226 |
} |
|
227 |
trsSection.remove(turn) |
|
228 |
currentNode.append(turn) |
|
229 |
} |
|
230 |
|
|
231 |
//remove the initial section |
|
232 |
trsEpisode.remove(trsSection) |
|
233 |
|
|
234 |
outputDirectory.mkdir() |
|
235 |
File outfile = new File(outputDirectory, trsFile.getName()) |
|
236 |
outfile.withWriter("UTF-8") { writer -> |
|
237 |
writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n') |
|
238 |
def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer)) |
|
239 |
printer.setPreserveWhitespace(true) |
|
240 |
printer.print(trs) |
|
241 |
} |
|
242 |
} |
|
243 |
cpb.done() |
|
244 |
reader.close() |
|
245 |
println "Done." |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/PrepareAFVOIXOFFCorpusMacro.groovy (revision 2942) | ||
---|---|---|
1 |
package org.txm.macro.projects.antract |
|
2 |
|
|
3 |
import groovy.transform.Field |
|
4 |
import org.txm.rcp.swt.widget.parameters.* |
|
5 |
|
|
6 |
import org.txm.macro.transcription.* |
|
7 |
import org.txm.utils.io.FileCopy |
|
8 |
import org.txm.macro.projects.antract.BuildAFMetadataMacro |
|
9 |
import org.txm.macro.projects.antract.BuildAFMetadataMacro |
|
10 |
|
|
11 |
@Field @Option(name="tableFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="all.xlsx") // /home/mdecorde/TEMP/ANTRACT/AF/all.xlsx |
|
12 |
File tableFile; |
|
13 |
|
|
14 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS à corriger", widget="Folder", required=true, def="AF") |
|
15 |
def trsDirectory |
|
16 |
|
|
17 |
if (!ParametersDialog.open(this)) return; |
|
18 |
|
|
19 |
File workingDirectory = tableFile.getParentFile() |
|
20 |
|
|
21 |
// extract infos for sujets and emissions from the main table file |
|
22 |
gse.runMacro(BuildAFMetadataMacro, ["tableFile":tableFile, |
|
23 |
"buildSujetsMetadata": true, |
|
24 |
"buildEmissionsMetadata": true]) |
|
25 |
|
|
26 |
File emissionsFile = new File(workingDirectory, "emissions.xlsx") |
|
27 |
File sujetsFile = new File(workingDirectory, "sujets.xlsx") |
|
28 |
|
|
29 |
// fix TRS files in the trsDirectory directory |
|
30 |
gse.runMacro(FixINATRSMacro, ["trsDirectory":trsDirectory]) |
|
31 |
|
|
32 |
// insert the section in the TRS files |
|
33 |
gse.runMacro(AddSectionsFromTableMacro, ["metadataFile": sujetsFile |
|
34 |
, "trsDirectory": trsDirectory |
|
35 |
, "joinTRSColumn": "Lien notice principale" |
|
36 |
, "startTimeColumn": "antract_debut" |
|
37 |
, "endTimeColumn": "antract_fin" |
|
38 |
, "typeColumns": "Titre propre" |
|
39 |
, "topicColumns": "Date de diffusion" |
|
40 |
, "metadataColumns": "Titre propre;Date de diffusion;Identifiant de la notice;Notes du titre;Type de date;Durée;Genre;Langue VO / VE;Nature de production;Producteurs (Aff.);Thématique;Nom fichier segmenté (info);antract_video;antract_debut;antract_fin;antract_duree;antract_tc_type;antract_tc_date;Résumé;Séquences;Descripteurs (Aff. Lig.);Générique (Aff. Lig.)" |
|
41 |
, "metadataColumnsGroups": "metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;metadata;secondary;secondary;secondary;secondary;secondary;secondary;secondary;text;text;text;text"]) |
|
42 |
|
|
43 |
//copy the emissions file in the source directory |
|
44 |
FileCopy.copy(emissionsFile, new File(new File(trsDirectory, "out"), "metadata.xlsx")) |
|
45 |
|
|
46 |
//done \o/ |
|
47 |
println "Done: import to finalize using the "+new File(trsDirectory, "out")+" directory." |
|
48 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/BuildAFMetadataMacro.groovy (revision 2942) | ||
---|---|---|
1 |
package org.txm.macro.projects.antract |
|
2 |
|
|
3 |
import java.io.File |
|
4 |
import java.nio.charset.Charset |
|
5 |
|
|
6 |
import java.time.LocalTime |
|
7 |
import java.time.format.DateTimeFormatter |
|
8 |
import java.util.Arrays |
|
9 |
import java.util.HashMap |
|
10 |
import java.util.List |
|
11 |
|
|
12 |
import org.txm.libs.msoffice.ReadExcel |
|
13 |
import org.txm.utils.* |
|
14 |
|
|
15 |
@Field @Option(name="tableFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="all.xlsx") // /home/mdecorde/TEMP/ANTRACT/AF/all.xlsx |
|
16 |
File tableFile; |
|
17 |
|
|
18 |
@Field @Option(name="buildSujetsMetadata", usage="Build the sujets metadata", widget="Boolean", required=true, def="true") |
|
19 |
boolean buildSujetsMetadata; |
|
20 |
|
|
21 |
@Field @Option(name="buildEmissionsMetadata", usage="Build the emissions metadata", widget="Boolean", required=true, def="true") |
|
22 |
boolean buildEmissionsMetadata; |
|
23 |
|
|
24 |
if (!ParametersDialog.open(this)) return; |
|
25 |
|
|
26 |
File table2File = null; |
|
27 |
|
|
28 |
System.out.println("opening $tableFile..."); |
|
29 |
ReadExcel excel = new ReadExcel(tableFile, null); |
|
30 |
|
|
31 |
if (buildEmissionsMetadata) { |
|
32 |
//emissions |
|
33 |
table2File = new File(tableFile.getParentFile(), "emissions.xlsx"); |
|
34 |
HashMap<String, String> lineRules = new HashMap<>(); // line tests to select line to keep |
|
35 |
List<String> columnsSelection; // list of columns to keep |
|
36 |
HashMap<String, String> columnsToCopy = new HashMap<>(); |
|
37 |
HashMap<String, String> columnsToRenameRules = new HashMap<>(); |
|
38 |
HashMap<String, String[]> searchAndReplaceRules = new HashMap<>(); |
|
39 |
|
|
40 |
columnsSelection = Arrays.asList( |
|
41 |
"Identifiant de la notice", "Titre propre", "Notes du titre", "Date de diffusion", "Durée", "Nom fichier segmenté (info)", "antract_video", |
|
42 |
"antract_debut","antract_fin","antract_duree","antract_tc_type","antract_tc_date"); |
|
43 |
|
|
44 |
lineRules.put("Type de notice", "Notice sommaire"); |
|
45 |
|
|
46 |
columnsToRenameRules.put("Identifiant de la notice", "id"); |
|
47 |
|
|
48 |
columnsToCopy.put("Notes du titre", "subtitle"); |
|
49 |
columnsToCopy.put("Titre propre", "title"); |
|
50 |
columnsToCopy.put("Date de diffusion", "textorder"); |
|
51 |
columnsToCopy.put("Date de diffusion", "annee"); |
|
52 |
|
|
53 |
searchAndReplaceRules.put("textorder", ["([0-9][0-9])/([0-9][0-9])/([0-9][0-9][0-9][0-9])", '$3$2$1'] as String[]); // not working yet |
|
54 |
searchAndReplaceRules.put("annee", ["([0-9][0-9])/([0-9][0-9])/([0-9][0-9][0-9][0-9])", '$3'] as String[]); // not working yet |
|
55 |
|
|
56 |
process(excel, table2File, lineRules, columnsSelection, columnsToCopy, searchAndReplaceRules, columnsToRenameRules) |
|
57 |
} |
|
58 |
|
|
59 |
if (buildSujetsMetadata) { |
|
60 |
// sujets |
|
61 |
table2File = new File(tableFile.getParentFile(), "sujets.xlsx"); |
|
62 |
HashMap<String, String> lineRules = new HashMap<>(); // line tests to select line to keep |
|
63 |
List<String> columnsSelection; // list of columns to keep |
|
64 |
HashMap<String, String> columnsToCopy = new HashMap<>(); |
|
65 |
HashMap<String, String> columnsToRenameRules = new HashMap<>(); |
|
66 |
HashMap<String, String[]> searchAndReplaceRules = new HashMap<>(); |
|
67 |
|
|
68 |
columnsSelection = Arrays.asList( |
|
69 |
"Identifiant de la notice", "Titre propre", "Notes du titre", "Lien notice principale", |
|
70 |
"Date de diffusion", "Type de date", "Durée", "Genre", "Langue VO / VE", "Nature de production", "Producteurs (Aff.)", "Thématique", |
|
71 |
"Nom fichier segmenté (info)", "antract_video", "antract_debut", "antract_fin", "antract_duree", "antract_tc_type", "antract_tc_date", |
|
72 |
"Résumé", "Séquences", "Descripteurs (Aff. Lig.)", "Générique (Aff. Lig.)"); |
|
73 |
|
|
74 |
lineRules.put("Type de notice", "Notice sujet"); |
|
75 |
|
|
76 |
process(excel, table2File, lineRules, columnsSelection, columnsToCopy, searchAndReplaceRules, columnsToRenameRules) |
|
77 |
} |
|
78 |
|
|
79 |
def process(ReadExcel excel, File table2File, def lineRules, def columnsSelection, def columnsToCopy, def searchAndReplaceRules, def columnsToRenameRules) { |
|
80 |
System.out.println("Writing: $table2File"); |
|
81 |
|
|
82 |
table2File.delete(); |
|
83 |
ReadExcel excel2 = new ReadExcel(table2File, null); |
|
84 |
println " Selecting $columnsSelection with lines matching $lineRules" |
|
85 |
if (!excel.extractTo(excel2, lineRules, columnsSelection)) { |
|
86 |
System.out.println("FAIL"); |
|
87 |
return; |
|
88 |
} |
|
89 |
|
|
90 |
if (columnsToCopy.size() > 0) { |
|
91 |
System.out.println(" Copying column: " + columnsToCopy); |
|
92 |
excel2.copyColumns(columnsToCopy); |
|
93 |
println " WARNING: ReadExcel.copyColumns() not implemented" |
|
94 |
} |
|
95 |
|
|
96 |
if (searchAndReplaceRules.size() > 0) { |
|
97 |
System.out.println(" Search&replace column: " + searchAndReplaceRules); |
|
98 |
excel2.searchAndReplaceInLines(searchAndReplaceRules); |
|
99 |
} |
|
100 |
|
|
101 |
if (columnsToRenameRules.size() > 0) { |
|
102 |
System.out.println(" Renaming column: " + columnsToRenameRules); |
|
103 |
excel2.renameColumns(columnsToRenameRules); |
|
104 |
} |
|
105 |
|
|
106 |
System.out.println(" Saving&Closing..."); |
|
107 |
excel2.save(); |
|
108 |
excel2.close(); |
|
109 |
excel.close(); |
|
110 |
System.out.println("Done: $table2File"); |
|
111 |
|
|
112 |
return table2File.exists() |
|
113 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/FixINATRSMacro.groovy (revision 2942) | ||
---|---|---|
1 |
package org.txm.macro.projects.antract |
|
2 |
|
|
3 |
import org.txm.utils.ConsoleProgressBar |
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
|
|
8 |
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS à corriger", widget="Folder", required=true, def="AF") |
|
9 |
def trsDirectory |
|
10 |
|
|
11 |
if (!ParametersDialog.open(this)) return; |
|
12 |
|
|
13 |
println "Fixing $trsDirectory" |
|
14 |
def files = trsDirectory.listFiles() |
|
15 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.size()) |
|
16 |
for (File trsFile : files) { |
|
17 |
//File trsFile = new File(directory, "AFE86004868.trs") |
|
18 |
cpb.tick() |
|
19 |
if (!trsFile.getName().endsWith(".trs")) { |
|
20 |
continue; |
|
21 |
} |
|
22 |
String content = trsFile.getText("UTF-8") |
|
23 |
content = content.replaceAll("punct=\"([^\"]+)\"\">", "punct=\"\$1\">") |
|
24 |
content = content.replaceAll("<unk>", "???") |
|
25 |
content = content.replaceAll(" Time=\"", " time=\"") |
|
26 |
trsFile.setText(content, "UTF-8") |
|
27 |
} |
|
28 |
cpb.done() |
|
29 |
println "Done" |
Formats disponibles : Unified diff