/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 2944

     	columnsSelection = Arrays.asList(
     	"Identifiant de la notice", "Titre propre", "Notes du titre", "Date de diffusion", "Durée", "Nom fichier segmenté (info)", "antract_video",
     	"antract_debut","antract_fin","antract_duree","antract_tc_type","antract_tc_date");
     	lineRules.put("Type de notice", "Notice sommaire");
     	columnsToRenameRules.put("Identifiant de la notice", "id");
-...
     	if (columnsToCopy.size() > 0) {
     		System.out.println(" Copying column: " + columnsToCopy);
     		excel2.copyColumns(columnsToCopy);
     		println " WARNING: ReadExcel.copyColumns() not implemented"
+    	}
     	if (searchAndReplaceRules.size() > 0) {

     	content = content.replaceAll("punct=\"([^\"]+)\"\">", "punct=\"\$1\">")
     	content = content.replaceAll("<unk>", "???")
     	content = content.replaceAll(" Time=\"", " time=\"")
     	content = content.replaceAll("<w startTime=\"0.00\" endTime=\"0\" conf=\"\" pos=\"\" punct=\"\" case=\"\" ne=\"\"></w>\n", "")
     	content = content.replaceAll("<Turn startTime=\"<o,o,o>\" endTime=\"\" speaker=\"0\">\n<Sync time=\"<o,o,o>\"/>\n</Turn>\n", "")
     	content = content.replaceAll("<Section type=\"report\" startTime=\"<o,o,o>\" endTime=\"\">", "<Section type=\"report\" startTime=\"0.0\" endTime=\"\">")
     	trsFile.setText(content, "UTF-8")
+    }
     cpb.done()

     import java.time.LocalTime
     import java.time.format.DateTimeFormatter
     import org.txm.utils.*
     import org.txm.utils.logger.*
     @Field @Option(name="metadataFile", usage="Tableau des metadonnées de sections", widget="FileOpen", required=true, def="")
     File metadataFile;
-...
     	LocalTime time1 = LocalTime.parse(totalFrame, dateTimeFormatter)
     	totalFrame = (time1.getHour()*60*60) + (time1.getMinute()*60) + time1.getSecond()
     	def ret = totalFrame + (bonusFrame/25)
     	return ret
+    }
     		def ret = totalFrame + (bonusFrame/25)
     		return ret
+    	}
     def sectionGroupsToInsert = [:]
     println "Reading data..."
     while (reader.readRecord()) {
     	String id = reader.get(joinTRSColumn).trim()
     	if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
     	if (id.length() == 0) continue;
     try {
     	def sectionGroupsToInsert = [:]
     	println "Reading data..."
     	while (reader.readRecord()) {
     		String id = reader.get(joinTRSColumn).trim()
     		if (id.endsWith(".mp4")) id = id.substring(0, id.length()-4)
     		if (id.length() == 0) continue;
     	if (!sectionGroupsToInsert.containsKey(id)) {
     		sectionGroupsToInsert[id] = []
+    	}
     	def section = sectionGroupsToInsert[id]
     	if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
     		if (!sectionGroupsToInsert.containsKey(id)) {
     			sectionGroupsToInsert[id] = []
+    		}
     		def section = sectionGroupsToInsert[id]
     		def m = [:]
     		for (def todo : ["topic":topicColumns, "type":typeColumns]) {
     			def data = []
     			for (def col : todo.value) {
     				if (reader.get(col).trim().length() > 0) {
     					data << reader.get(col).trim().replace("\n", "")
     		if (reader.get(startTimeColumn) != null && reader.get(startTimeColumn).length() > 0) { // ignore non timed sections
     			def m = [:]
     			for (def todo : ["topic":topicColumns, "type":typeColumns]) {
     				def data = []
     				for (def col : todo.value) {
     					if (reader.get(col).trim().length() > 0) {
     						data << reader.get(col).trim().replace("\n", "")
+    					}
+    				}
     				m[todo.key] = data.join("\t")
+    			}
     			m[todo.key] = data.join("\t")
     			def metadataList = []
     			def metadataGroupList = []
     			for (int i = 0 ;  i < metadataColumns.size() ; i++) {
     				def col = metadataColumns[i]
     				String c = AsciiUtils.buildAttributeId(col)
     				m[c] = reader.get(col)
     				metadataList << c
     				metadataGroupList << metadataColumnsGroups[i]
+    			}
     			m["metadata"] = metadataList.join("|")
     			m["metadata_groups"] = metadataGroupList.join("|")
     			m["startTime"] = strTotime(reader.get(startTimeColumn))
     			m["endTime"] = strTotime(reader.get(endTimeColumn))
     			m["synchronized"] = "true"
     			section << [m["startTime"], m["endTime"], m]
+    		}
     		def metadataList = []
     		def metadataGroupList = []
     		for (int i = 0 ;  i < metadataColumns.size() ; i++) {
     			def col = metadataColumns[i]
     			String c = AsciiUtils.buildAttributeId(col)
     			m[c] = reader.get(col)
     			metadataList << c
     			metadataGroupList << metadataColumnsGroups[i]
+    		}
     		m["metadata"] = metadataList.join("|")
     		m["metadata_groups"] = metadataGroupList.join("|")
     		m["startTime"] = strTotime(reader.get(startTimeColumn))
     		m["endTime"] = strTotime(reader.get(endTimeColumn))
     		m["synchronized"] = "true"
     		section << [m["startTime"], m["endTime"], m]
+    	}
+    }
     println "Inserting sections... "+sectionGroupsToInsert.size()
     ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
     for (String id : sectionGroupsToInsert.keySet()) {
     	cpb.tick()
     	File trsFile = new File(trsDirectory, id+".trs")
     	if (!trsFile.exists()) {
     		continue
+    	}
     	//println "Processing $id..."
     	def sections = sectionGroupsToInsert[id]
     	sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> b[1] }
     	// Open input file
     	def slurper = new groovy.util.XmlParser(false, true, true);
     	slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
     	slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
     	def trs = slurper.parse(trsFile.toURI().toString())
     	def trsEpisodes = trs.Episode // 1
     	if (trsEpisodes.size() > 1) {
     		println "multiple Episode node in $trsFile"
     		continue
+    	}
     	def trsEpisode = trsEpisodes[0]
     	def trsSections =  trs.Episode.Section // 1
     	if (trsSections.size() > 1) {
     		println "multiple Section node in $trsFile"
     		continue
+    	}
     	def trsSection = trsSections[0]
     	println "Inserting sections... "+sectionGroupsToInsert.size()
     	def turns = trsSection.Turn
     	def newSections = []
     	def iSection = 0;
     	def currentSection = null
     	def currentNode = null
     	for (def turn : turns) {
     		def start = Float.parseFloat(turn.@startTime)
     		def end = Float.parseFloat(turn.@endTime)
     	ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
     	for (String id : sectionGroupsToInsert.keySet()) {
     		cpb.tick()
     		File trsFile = new File(trsDirectory, id+".trs")
     		if (!trsFile.exists()) {
     			continue
+    		}
     		//println "Processing $id..."
     		def sections = sectionGroupsToInsert[id]
     		sections = sections.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> b[1] }
     		def found = null;
     		for (int i = iSection ; i < sections.size() ; i++) {
     			if (end < sections[i][0]) { // Turn is before section
     			} else if (sections[i][1] < start) { // Turn is before section
     			} else {
     				found = sections[i]
     				iSection = i
     				break; // stop searching and set iSection to accelerate next search
+    			}
     		// Open input file
     		def slurper = new groovy.util.XmlParser(false, true, true);
     		slurper.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) // allow DTD declaration
     		slurper.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "all"); // allow to read DTD from local file
     		def trs = slurper.parse(trsFile.toURI().toString())
     		def trsEpisodes = trs.Episode // 1
     		if (trsEpisodes.size() > 1) {
     			println "multiple Episode node in $trsFile"
     			continue
+    		}
     		def trsEpisode = trsEpisodes[0]
     		def trsSections =  trs.Episode.Section // 1
     		if (trsSections.size() > 1) {
     			println "multiple Section node in $trsFile"
     			continue
+    		}
     		def trsSection = trsSections[0]
     		if (found == null) {
     			if (currentSection != null || currentNode == null) {
     				currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
     				currentSection = null;
     		def turns = trsSection.Turn
     		def newSections = []
     		def iSection = 0;
     		def currentSection = null
     		def currentNode = null
     		for (def turn : turns) {
     			def start = Float.parseFloat(turn.@startTime)
     			def end = Float.parseFloat(turn.@endTime)
     			def found = null;
     			for (int i = iSection ; i < sections.size() ; i++) {
     				if (end < sections[i][0]) { // Turn is before section
     				} else if (sections[i][1] < start) { // Turn is before section
     				} else {
     					found = sections[i]
     					iSection = i
     					break; // stop searching and set iSection to accelerate next search
+    				}
+    			}
     		} else {
     			if (found != currentSection) {
     				if (currentNode != null && currentNode.@synchronized == "false") {
     					def tmp = currentNode.Turn
     					currentNode.@endTime = tmp[-1].@endTime
     			if (found == null) {
     				if (currentSection != null || currentNode == null) {
     					currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
     					currentSection = null;
+    				}
     				currentSection = found
     				currentNode = new Node(trsEpisode, "Section", currentSection[2])
     			} else {
     				if (found != currentSection) {
     					if (currentNode != null && currentNode.@synchronized == "false") {
     						def tmp = currentNode.Turn
     						currentNode.@endTime = tmp[-1].@endTime
+    					}
     					currentSection = found
     					currentNode = new Node(trsEpisode, "Section", currentSection[2])
+    				}
+    			}
     			trsSection.remove(turn)
     			currentNode.append(turn)
+    		}
     		trsSection.remove(turn)
     		currentNode.append(turn)
     		//remove the initial section
     		trsEpisode.remove(trsSection)
     		outputDirectory.mkdir()
     		File outfile = new File(outputDirectory, trsFile.getName())
     		outfile.withWriter("UTF-8") { writer ->
     			writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
     			def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
     			printer.setPreserveWhitespace(true)
     			printer.print(trs)
+    		}
+    	}
     	cpb.done()
     	reader.close()
     	println "Done."
     	//remove the initial section
     	trsEpisode.remove(trsSection)
     	outputDirectory.mkdir()
     	File outfile = new File(outputDirectory, trsFile.getName())
     	outfile.withWriter("UTF-8") { writer ->
     		writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
     		def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
     		printer.setPreserveWhitespace(true)
     		printer.print(trs)
+    	}
     } catch(Exception e) {
     	println "Error: "+e
     	Log.printStackTrace(e)
+    }
     cpb.done()
     reader.close()
     println "Done."

tmp/org.txm.libs.msoffice/src/org/txm/libs/msoffice/ReadExcel.java (revision 2944)
207	207	}
208	208
209	209	System.out.println("" + nRowWritten + " rows updated.");
	210
210	211	return nRowWritten > 0;
211	212	}
212	213

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 2944