Révision 3030

tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3030)
20 20
		
21 21
@Field @Option(name="nonPrimarySpeakerRegex", usage="other non primary id of the other turns", widget="String", required=false, def="")
22 22
		String nonPrimarySpeakerRegex
23
		
24
@Field @Option(name="newSectionMarker", usage="section marker", widget="String", required=false, def="*#")
25
		String newSectionMarker
23 26

  
24 27
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false")
25 28
		Boolean debug
......
39 42
gse.runMacro(org.txm.macro.transcription.Vocapia2TranscriberMacro, ["vocapiaDirectory":vocapiaDirectory, "resultDirectory":trsDirectory])
40 43

  
41 44

  
45
println "DETECTING MARKED SECTIONS..."
42 46

  
43 47
trsFiles = trsDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")}
44 48
if (trsFiles.size() == 0) {
......
46 50
	return false
47 51
}
48 52

  
53
File sectionsDirectory = new File(resultDirectory, "sections")
54
sectionsDirectory.mkdir()
55
gse.runMacro(org.txm.macro.transcription.SegmentTRSInSectionFromMarkerMacro, ["trsDirectory":trsDirectory, "resultDirectory":sectionsDirectory, "newSectionMarker":newSectionMarker])
56

  
57

  
49 58
println "CREATE THE 'OTHER' TURNS..."
59

  
60
trsFiles = sectionsDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")}
61
if (trsFiles.size() == 0) {
62
	println "No XML file found in $sectionsDirectory"
63
	return false
64
}
65

  
50 66
File otherDirectory = new File(resultDirectory, "otherturns")
51 67
otherDirectory.mkdir()
52
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size())
68
cpb = new ConsoleProgressBar(trsFiles.size())
53 69
for (File file : trsFiles) {
54 70
	cpb.tick()
55 71
	
......
63 79
}
64 80
cpb.done()
65 81

  
82

  
83
println "NORMALIZING LOCUTORS..."
84

  
66 85
trsFiles = otherDirectory.listFiles().findAll(){it.getName().toLowerCase().endsWith(".trs")}
67 86
if (trsFiles.size() == 0) {
68 87
	println "No XML file found in $otherDirectory"
69 88
	return false
70 89
}
71 90

  
72
// FIX LOCUTORS
73
println "NORMALIZING LOCUTORS..."
74 91
cpb = new ConsoleProgressBar(trsFiles.size())
75 92
for (File file : trsFiles) {
76 93
	cpb.tick()
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/SegmentTRSInSectionFromMarkerMacro.groovy (revision 3030)
10 10
@Field @Option(name="trsDirectory", usage="Dossier qui contient les fichiers TRS", widget="Folder", required=true, def="")
11 11
File trsDirectory;
12 12

  
13
@Field @Option(name="resultDirectory", usage="Dossier résultat TRS", widget="Folder", required=true, def="")
14
File resultDirectory;
15

  
13 16
@Field @Option(name="newSectionMarker", usage="The marker, spaces included", widget="String", required=true, def=" *#")
14 17
def newSectionMarker
15 18

  
19

  
16 20
if (!ParametersDialog.open(this)) return;
17 21

  
18 22
if (!trsDirectory.exists()) {
......
20 24
	return
21 25
}
22 26

  
23
File outputDirectory = new File(trsDirectory, "out")
24
println "Writing result to $outputDirectory..."
27
println "Writing result to $resultDirectory..."
25 28

  
26 29
dateTimeFormatter = DateTimeFormatter.ISO_LOCAL_TIME
27 30
def strTotime(def str) {
......
44 47
}
45 48

  
46 49
try {
47
	def trsFiles = trsDirectory.list(new FilenameFilter() { public boolean accept(File dir, String name) { return name.toLowerCase().endsWith(".trs"); } });
50
	def trsFiles = trsDirectory.listFiles().findAll() { it.getName().toLowerCase().endsWith(".trs") }
48 51
	
49 52
	if (trsFiles.size() == 0) {
50 53
		println "No TRS file to process in $trsDirectory"
51 54
		return;
52 55
	}
53 56
	
54
	ConsoleProgressBar cpb = new ConsoleProgressBar(sectionGroupsToInsert.keySet().size())
57
	ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size())
55 58
	for (File trsFile : trsFiles) {
56 59
		cpb.tick()
57 60

  
......
68 71
		def trsEpisode = trsEpisodes[0]
69 72
		def trsSections =  trs.Episode.Section // 1
70 73
		if (trsSections.size() > 1) {
71
			println "multiple Section node in $trsFile"
74
			println "Error: multiple Section node in $trsFile"
72 75
			continue
73 76
		}
74 77
		def trsSection = trsSections[0]
......
76 79
		def turns = trsSection.Turn
77 80
		def newSections = []
78 81
		def iSection = 0;
79
		def currentSection = null
80
		def currentNode = null
82
		def previousSection = trsSection
83
		def currentSection = trsSection
81 84
		
82
		for (def turn : turns) {
85
		for (int iTurn = 0 ; iTurn < turns.size() ; iTurn++) {
86
			def turn = turns[iTurn]
83 87
			def start = Float.parseFloat(turn.@startTime)
84 88
			def end = Float.parseFloat(turn.@endTime)
85 89
			
86
			def found = null
87
			for (int i = iSection ; i < sections.size() ; i++) {
88
				if (end < sections[i][0]) { // Turn is before section
89
					
90
				} else if (sections[i][1] < start) { // Turn is before section
91
					
92
				} else {
93
					found = sections[i]
94
					iSection = i
95
					break // stop searching and set iSection to accelerate next search
96
				}
97
			}
98
			
99
			if (found == null) {
100
				if (currentSection != null || currentNode == null) {
101
					currentNode = new Node(trsEpisode, "Section", ["type":"Sujet non synchronisé", "startTime":turn.@startTime, "endTime":"", "synchronized":"false"] )
102
					currentSection = null;
103
				}
104
			} else {
105
				if (found != currentSection) {
106
					if (currentNode != null && currentNode.@synchronized == "false") {
107
						def tmp = currentNode.Turn
108
						currentNode.@endTime = tmp[-1].@endTime
90
			String content = turn.text().trim()
91
			println "TURN: "+content
92
			if (content.startsWith(newSectionMarker)) {
93
				println "NEW SECTION !"
94
				previousSection = currentSection
95
				currentSection = new Node(trsEpisode, "Section", new LinkedHashMap(["type":newSectionMarker, "startTime":turn.@startTime, "endTime":previousSection.attributes()["endTime"]]))
96
				
97
				previousSection.attributes()["endTime"] = turn.@startTime
98
				turns[iTurn-1].attributes()["endTime"] = turn.@startTime
99
				
100
					def children = turn.children()
101
					for (int i = 0 ; i < children.size() ; i++) {
102
						def node = children[i]
103
						if (node instanceof String) {
104
							println "FIXING: $node "
105
							children.remove(i)
106
							i--
107
						} else if (node instanceof groovy.util.Node && node.name() == "w" && node.text().contains(newSectionMarker)) {
108
							println "FIXING: w $node "
109
							children.remove(node)
110
						}
109 111
					}
110
					
111
					currentSection = found
112
					currentNode = new Node(trsEpisode, "Section", currentSection[2])
113
				}
114 112
			}
113
			
115 114
			trsSection.remove(turn)
116
			currentNode.append(turn)
115
			currentSection.append(turn)
117 116
		}
118 117
		
119
		//remove the initial section
120
		trsEpisode.remove(trsSection)
121
		
122
		outputDirectory.mkdir()
123
		File outfile = new File(outputDirectory, trsFile.getName())
118
		resultDirectory.mkdir()
119
		File outfile = new File(resultDirectory, trsFile.getName())
124 120
		outfile.withWriter("UTF-8") { writer ->
125 121
			writer.write('<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Trans SYSTEM "trans-14.dtd">\n')
126 122
			def printer = new groovy.util.XmlNodePrinter(new PrintWriter(writer))
......
129 125
		}
130 126
	}
131 127
	cpb.done()
132
	reader.close()
128
	
133 129
	println "Done."
134 130
	
135 131
} catch(Exception e) {
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2Transcriber.groovy (revision 3030)
40 40
		def turninfos = new LinkedHashMap()
41 41
		boolean other = false;
42 42
		String word = ""
43
		String duration = "0.0"
43 44
		try {
44 45
			
45 46
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
......
84 85
								writer.writeEndElement()
85 86
								writer.writeCharacters("\n")
86 87
								break;
88
								
89
							case "Channel":
90
								duration = parser.getAttributeValue(null, "sigdur")
91
								break;
87 92
							
88 93
							case "SegmentList":
89 94
								writer.writeStartElement("Episode")
90 95
							//<Section type="report" startTime="0" endTime="3617.593">
96
								
91 97
								writer.writeStartElement("Section")
98
								writer.writeAttribute("startTime", "0.0")
99
								writer.writeAttribute("endTime", duration)
92 100
								for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
93 101
									writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i))
94 102
								}

Formats disponibles : Unified diff