Révision 3020

tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/FixTranscriptionsMacro.groovy (revision 3020)
1
package org.txm.macro.projects.nov13
2

  
3
import java.time.LocalTime
4
import java.time.format.DateTimeFormatter
5
import org.txm.utils.*
6
import org.txm.utils.logger.*
7

  
8
@Field @Option(name="trsFile", usage="A single vocapia XML file", widget="FileOpen", required=false, def="")
9
		File trsFile
10

  
11
@Field @Option(name="trsDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="")
12
		File trsDirectory
13

  
14
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="")
15
		File resultDirectory
16
		
17
@Field @Option(name="primarySpeakerIdPrefix", usage="speaker ID of the primary speaker", widget="String", required=false, def="")
18
		String primarySpeakerIdPrefix
19

  
20
		@Field @Option(name="otherNonPrimaryLocutor", usage="other non primary id of the other turns", widget="String", required=false, def="")
21
		String otherNonPrimaryLocutor
22
		
23
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false")
24
		Boolean debug
25

  
26
if (!ParametersDialog.open(this)) return;
27

  
28
if (resultDirectory.equals(trsDirectory) || (trsFile != null && trsFile.getParentFile().equals(resultDirectory))) {
29
	return false;
30
}
31

  
32
resultDirectory.mkdirs();
33

  
34
def trsFiles = []
35
if (trsDirectory != null && trsDirectory.exists()) {
36
	
37
	println "Processing TRS directory: $trsDirectory"
38
	for (File file : trsDirectory.listFiles()) {
39
		if (file.getName().toLowerCase().endsWith(".trs")) {
40
			trsFiles << file
41
		}
42
	}
43
} else if (trsFile != null && trsFile.exists()) {
44
	println "Processing TRS file: $trsFile"
45
	trsFiles << trsFile
46
}
47

  
48
if (trsFiles.size() == 0) {
49
	println "No XML file found for parameters trsFile=$trsFile and trsDirectory=$trsDirectory"
50
	return false
51
}
52

  
53
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size())
54
for (File file : trsFiles) {
55
	cpb.tick()
56
	
57
	FixTranscription fixer = new FixTranscription(file, primarySpeakerIdPrefix, otherNonPrimaryLocutor, debug)
58
	String name = FileUtils.stripExtension(file)
59
	File outFile = new File(resultDirectory, name+".trs")
60
	
61
	if (!fixer.process(outFile)) {
62
		println "WARNING: ERROR WHILE PROCESSING: "+file
63
		return false
64
	}
65
}
66
cpb.done()
67

  
68
println "Done: "+trsFiles.size()+" files processed. Result files in $resultDirectory"
69

  
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/FixTranscription.groovy (revision 3020)
1
package org.txm.macro.projects.nov13
2

  
3
import javax.xml.stream.*
4

  
5
import org.txm.importer.PersonalNamespaceContext
6
import org.txm.utils.FileUtils
7
import org.txm.xml.IdentityHook
8
import org.txm.xml.*
9

  
10
import java.io.BufferedOutputStream
11
import java.io.FileOutputStream
12
import java.io.IOException
13
import java.net.URL
14
import java.util.*
15
import java.util.Map.Entry
16
import java.util.regex.Pattern
17

  
18
class FixTranscription extends XMLProcessor {
19
	
20
	LocalNamesHookActivator activator;
21
	IdentityHook hook;
22
	
23
	def primarySpeakerIdRegex
24
	String primarySpeakerId
25
	
26
	Boolean debug
27
	
28
	String otherNonPrimaryLocutor = "other"
29
	
30
	public FixTranscription(File xmlfile, String primarySpeakerIdPrefix, String otherNonPrimaryLocutor, Boolean debug) {
31
		super(xmlfile)
32
		this.debug = debug
33
		
34
		this.otherNonPrimaryLocutor = otherNonPrimaryLocutor
35
		if (primarySpeakerIdPrefix != null && primarySpeakerIdPrefix.length() > 0) {
36
			String id = FileUtils.stripExtension(xmlfile)
37
			
38
			this.primarySpeakerIdRegex = /$primarySpeakerIdPrefix.*/
39
			
40
			def rez = (id =~ primarySpeakerIdRegex).findAll()
41
			def rez2 = (id =~ /$primarySpeakerIdPrefix/).findAll()
42
			if (rez2.size() != 1) {
43
				if (debug) println "WARNING: found the ${rez2.size()} matches of primary speaker prefix in the '$id' file name"
44
				this.primarySpeakerIdRegex = null
45
			} else {
46
				primarySpeakerId = rez[0]
47
				//if (debug) println "Detected primary speaker: $primarySpeakerId"
48
			}
49
		}
50
		
51
		activator = new LocalNamesHookActivator<>(hook, ["w", "Turn", "Sync"]);
52
		
53
		hook = new IdentityHook("word_hook", activator, this) {
54
					
55
					boolean inTurn = false;
56
					
57
					boolean inW = false;
58
					StringBuilder wordBuffer = new StringBuilder();
59
					
60
					String currentTime;
61
					LinkedHashMap turnInfos = new LinkedHashMap()
62
					LinkedHashMap wInfos = new LinkedHashMap()
63
					boolean other
64
					
65
					@Override
66
					public boolean deactivate() {
67
						return true;
68
					}
69
					
70
					@Override
71
					public boolean _activate() {
72
						return true;
73
					}
74
					
75
					@Override
76
					protected void processStartElement() throws XMLStreamException, IOException {
77
						if (localname.equals("Turn")) {
78
							// store values
79
							inTurn = true;
80
							turnInfos.clear()
81
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
82
								turnInfos[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i)
83
							}
84
							currentTime = turnInfos["startTime"]
85
							super.processStartElement();
86
						} else if (localname.equals("Sync")) {
87
							currentTime = parser.getAttributeValue(null, "time")
88
							super.processStartElement();
89
						} else if (localname.equals("w")) {
90
							// store values
91
							inW = true;
92
							wInfos.clear()
93
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
94
								wInfos[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i)
95
							}
96
							String time = parser.getAttributeValue(null, "time")
97
							if (time != null && time.length() > 0) {
98
								currentTime = time
99
							}
100
							wordBuffer.setLength(0);
101
							return; // write w later
102
						}
103
						else {
104
							super.processStartElement();
105
						}
106
					}
107
					
108
					@Override
109
					protected void processCharacters() throws XMLStreamException {
110
						if (inW) {
111
							wordBuffer.append(parser.getText())
112
						}
113
						else {
114
							super.processCharacters();
115
						}
116
					}
117
					
118
					@Override
119
					protected void processEndElement() throws XMLStreamException {
120
						if (localname.equals("w")) {
121
							
122
							inW = false
123
							String word = wordBuffer.toString().trim()
124
							if (!other && word.startsWith("*")) {
125
								//close current Turn and start a 'other' Turn
126
								writer.writeEndElement() // current Turn
127
								writer.writeCharacters("\n")
128
								
129
								def tmpInfos = new LinkedHashMap()
130
								for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr]
131
								tmpInfos["orig-speaker"] = turnInfos["speaker"]
132
								
133
								if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) {
134
									tmpInfos["speaker"] = "other"
135
								} else {
136
									tmpInfos["speaker"] = primarySpeakerId
137
								}
138
								tmpInfos["startTime"] = currentTime
139
								writer.writeStartElement("Turn")
140
								for (String attr : tmpInfos.keySet()) {
141
									writer.writeAttribute(attr, tmpInfos[attr])
142
								}
143
								
144
								other = true
145
								word = word.substring(1)
146
							}
147
							
148
							boolean shouldCloseOtherTurn = false;
149
							if (other && word.endsWith("*")) {
150
								shouldCloseOtherTurn = true;
151
								
152
								word = word.substring(0, word.length()-1)
153
								other = false
154
							}
155
							
156
							if ("XXX".equals(word)) { // <Event desc="XXX" type="unknown" extent="next"/>
157
								writer.writeStartElement("event") // start the initial word
158
								writer.writeAttribute("desc", "XXX from "+wInfos["start"] + " to "+wInfos["end"])
159
								writer.writeAttribute("type", "unknown")
160
								writer.writeAttribute("extent", "instantaneous")
161
								writer.writeEndElement() // event
162
								word = "" // don't write the word
163
							}
164
							
165
							if (word.length() > 0) {
166
								
167
								writer.writeStartElement("w") // start the initial word
168
								for (String attr : wInfos.keySet() ) {
169
									writer.writeAttribute(attr, wInfos[attr])
170
								}
171
								writer.writeCharacters(word)
172
								writer.writeEndElement() // w
173
							}
174
							
175
							if (shouldCloseOtherTurn) {
176
								shouldCloseOtherTurn = false;
177
								//close the current 'other' Turn and restart the actual Turn
178
								writer.writeEndElement() // current 'other' Turn
179
								
180
								writer.writeStartElement("Turn") // rebuild the orig Turn and fix its start-end infos
181
								turnInfos["startTime"] = wInfos["end"] // fix the startTime using the current word end time
182
								for (String attr : turnInfos.keySet()) {
183
									writer.writeAttribute(attr, turnInfos[attr])
184
								}
185
								
186
								other = false
187
							}
188
						} else {
189
							super.processEndElement();
190
						}
191
					}
192
				}
193
	}
194
	
195
	public static void main(String[] args) {
196
		File infile = new File("/home/mdecorde/xml/vocapia","test.trs")
197
		File outfile = new File("/home/mdecorde/xml/vocapia","test-fixed.trs")
198
		def processor = new FixTranscription(infile, true)
199
		println processor.process(outfile)
200
	}
201
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/CreateTheOtherTurns.groovy (revision 3020)
1
package org.txm.macro.projects.nov13
2

  
3
import javax.xml.stream.*
4

  
5
import org.txm.importer.PersonalNamespaceContext
6
import org.txm.utils.FileUtils
7
import org.txm.xml.IdentityHook
8
import org.txm.xml.*
9

  
10
import java.io.BufferedOutputStream
11
import java.io.FileOutputStream
12
import java.io.IOException
13
import java.net.URL
14
import java.util.*
15
import java.util.Map.Entry
16
import java.util.regex.Pattern
17

  
18
class FixTranscription extends XMLProcessor {
19
	
20
	LocalNamesHookActivator activator;
21
	IdentityHook hook;
22
	
23
	def primarySpeakerIdRegex
24
	String primarySpeakerId
25
	
26
	Boolean debug
27
	
28
	String otherNonPrimaryLocutor = "other"
29
	
30
	public FixTranscription(File xmlfile, String primarySpeakerIdPrefix, String otherNonPrimaryLocutor, Boolean debug) {
31
		super(xmlfile)
32
		this.debug = debug
33
		
34
		this.otherNonPrimaryLocutor = otherNonPrimaryLocutor
35
		if (primarySpeakerIdPrefix != null && primarySpeakerIdPrefix.length() > 0) {
36
			String id = FileUtils.stripExtension(xmlfile)
37
			
38
			this.primarySpeakerIdRegex = /$primarySpeakerIdPrefix.*/
39
			
40
			def rez = (id =~ primarySpeakerIdRegex).findAll()
41
			def rez2 = (id =~ /$primarySpeakerIdPrefix/).findAll()
42
			if (rez2.size() != 1) {
43
				if (debug) println "WARNING: found the ${rez2.size()} matches of primary speaker prefix in the '$id' file name"
44
				this.primarySpeakerIdRegex = null
45
			} else {
46
				primarySpeakerId = rez[0]
47
				//if (debug) println "Detected primary speaker: $primarySpeakerId"
48
			}
49
		}
50
		
51
		activator = new LocalNamesHookActivator<>(hook, ["w", "Turn", "Sync"]);
52
		
53
		hook = new IdentityHook("word_hook", activator, this) {
54
					
55
					boolean inTurn = false;
56
					
57
					boolean inW = false;
58
					StringBuilder wordBuffer = new StringBuilder();
59
					
60
					String currentTime;
61
					LinkedHashMap turnInfos = new LinkedHashMap()
62
					LinkedHashMap wInfos = new LinkedHashMap()
63
					boolean other
64
					
65
					@Override
66
					public boolean deactivate() {
67
						return true;
68
					}
69
					
70
					@Override
71
					public boolean _activate() {
72
						return true;
73
					}
74
					
75
					@Override
76
					protected void processStartElement() throws XMLStreamException, IOException {
77
						if (localname.equals("Turn")) {
78
							// store values
79
							inTurn = true;
80
							turnInfos.clear()
81
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
82
								turnInfos[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i)
83
							}
84
							currentTime = turnInfos["startTime"]
85
							super.processStartElement();
86
						} else if (localname.equals("Sync")) {
87
							currentTime = parser.getAttributeValue(null, "time")
88
							super.processStartElement();
89
						} else if (localname.equals("w")) {
90
							// store values
91
							inW = true;
92
							wInfos.clear()
93
							for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
94
								wInfos[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i)
95
							}
96
							String time = parser.getAttributeValue(null, "time")
97
							if (time != null && time.length() > 0) {
98
								currentTime = time
99
							}
100
							wordBuffer.setLength(0);
101
							return; // write w later
102
						}
103
						else {
104
							super.processStartElement();
105
						}
106
					}
107
					
108
					@Override
109
					protected void processCharacters() throws XMLStreamException {
110
						if (inW) {
111
							wordBuffer.append(parser.getText())
112
						}
113
						else {
114
							super.processCharacters();
115
						}
116
					}
117
					
118
					@Override
119
					protected void processEndElement() throws XMLStreamException {
120
						if (localname.equals("w")) {
121
							
122
							inW = false
123
							String word = wordBuffer.toString().trim()
124
							if (!other && word.startsWith("*")) {
125
								//close current Turn and start a 'other' Turn
126
								writer.writeEndElement() // current Turn
127
								writer.writeCharacters("\n")
128
								
129
								def tmpInfos = new LinkedHashMap()
130
								for (String attr : turnInfos.keySet()) tmpInfos[attr] = turnInfos[attr]
131
								tmpInfos["orig-speaker"] = turnInfos["speaker"]
132
								
133
								if (primarySpeakerIdRegex == null || turnInfos["speaker"] ==~ primarySpeakerIdRegex) {
134
									tmpInfos["speaker"] = "other"
135
								} else {
136
									tmpInfos["speaker"] = primarySpeakerId
137
								}
138
								tmpInfos["startTime"] = currentTime
139
								writer.writeStartElement("Turn")
140
								for (String attr : tmpInfos.keySet()) {
141
									writer.writeAttribute(attr, tmpInfos[attr])
142
								}
143
								
144
								other = true
145
								word = word.substring(1)
146
							}
147
							
148
							boolean shouldCloseOtherTurn = false;
149
							if (other && word.endsWith("*")) {
150
								shouldCloseOtherTurn = true;
151
								
152
								word = word.substring(0, word.length()-1)
153
								other = false
154
							}
155
							
156
							if ("XXX".equals(word)) { // <Event desc="XXX" type="unknown" extent="next"/>
157
								writer.writeStartElement("event") // start the initial word
158
								writer.writeAttribute("desc", "XXX from "+wInfos["start"] + " to "+wInfos["end"])
159
								writer.writeAttribute("type", "unknown")
160
								writer.writeAttribute("extent", "instantaneous")
161
								writer.writeEndElement() // event
162
								word = "" // don't write the word
163
							}
164
							
165
							if (word.length() > 0) {
166
								
167
								writer.writeStartElement("w") // start the initial word
168
								for (String attr : wInfos.keySet() ) {
169
									writer.writeAttribute(attr, wInfos[attr])
170
								}
171
								writer.writeCharacters(word)
172
								writer.writeEndElement() // w
173
							}
174
							
175
							if (shouldCloseOtherTurn) {
176
								shouldCloseOtherTurn = false;
177
								//close the current 'other' Turn and restart the actual Turn
178
								writer.writeEndElement() // current 'other' Turn
179
								
180
								writer.writeStartElement("Turn") // rebuild the orig Turn and fix its start-end infos
181
								turnInfos["startTime"] = wInfos["end"] // fix the startTime using the current word end time
182
								for (String attr : turnInfos.keySet()) {
183
									writer.writeAttribute(attr, turnInfos[attr])
184
								}
185
								
186
								other = false
187
							}
188
						} else {
189
							super.processEndElement();
190
						}
191
					}
192
				}
193
	}
194
	
195
	public static void main(String[] args) {
196
		File infile = new File("/home/mdecorde/xml/vocapia","test.trs")
197
		File outfile = new File("/home/mdecorde/xml/vocapia","test-fixed.trs")
198
		def processor = new FixTranscription(infile, true)
199
		println processor.process(outfile)
200
	}
201
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/nov13/PrepareTranscriptionsMacro.groovy (revision 3020)
1
package org.txm.macro.projects.nov13
2

  
3
import java.time.LocalTime
4
import java.time.format.DateTimeFormatter
5
import org.txm.utils.*
6
import org.txm.utils.logger.*
7
import org.txm.macro.transcriber.RenameSpeaker
8

  
9
@Field @Option(name="trsDirectory", usage="A Vocapia XML files directory to process", widget="Folder", required=false, def="")
10
		File trsDirectory
11

  
12
@Field @Option(name="resultDirectory", usage="The result directory", widget="Folder", required=false, def="")
13
		File resultDirectory
14
		
15
@Field @Option(name="primarySpeakerIdPrefix", usage="speaker ID of the primary speaker", widget="String", required=false, def="")
16
		String primarySpeakerIdPrefix
17
@Field @Option(name="otherNonPrimaryLocutor", usage="other non primary id of the other turns", widget="String", required=false, def="")
18
		String otherNonPrimaryLocutor
19
		
20
@Field @Option(name="nonPrimaryLocutorReplaceRegex", usage="other non primary id of the other turns", widget="String", required=false, def="")
21
		String nonPrimaryLocutorReplaceRegex
22

  
23
@Field @Option(name="debug", usage="speaker ID of the primary speaker", widget="Boolean", required=false, def="false")
24
		Boolean debug
25
		
26
if (!ParametersDialog.open(this)) return;
27

  
28
if (resultDirectory.equals(trsDirectory)) {
29
	println "Result directory must differs from trsDirectory: "+trsDirectory
30
	return false;
31
}
32

  
33
resultDirectory.mkdirs();
34

  
35
def trsFiles = []
36
	
37
println "Processing TRS directory: $trsDirectory"
38
for (File file : trsDirectory.listFiles()) {
39
	if (file.getName().toLowerCase().endsWith(".trs")) {
40
		trsFiles << file
41
	}
42
}
43

  
44
if (trsFiles.size() == 0) {
45
	println "No XML file found for parameters trsDirectory=$trsDirectory"
46
	return false
47
}
48

  
49
println "CREATE THE 'OTHER' TURNS..."
50

  
51
ConsoleProgressBar cpb = new ConsoleProgressBar(trsFiles.size())
52
for (File file : trsFiles) {
53
	cpb.tick()
54
	
55
	CreateTheOtherTurns fixer = new CreateTheOtherTurns(file, primarySpeakerIdPrefix, otherNonPrimaryLocutor, debug)
56
	String name = FileUtils.stripExtension(file)
57
	File outFile = new File(resultDirectory, name+".trs")
58
	
59
	if (!fixer.process(outFile)) {
60
		println "WARNING: ERROR WHILE PROCESSING: "+file
61
	}
62
}
63
cpb.done()
64

  
65
// FIX LOCUTORS
66
println "NORMALIZING LOCUTORS..."
67
cpb = new ConsoleProgressBar(trsFiles.size())
68
for (File file : resultDirectory.listFiles()) {
69
	cpb.tick()
70
	
71
	RenameSpeaker fixer = new RenameSpeaker(file, file, nonPrimaryLocutorReplaceRegex, null, otherNonPrimaryLocutor, otherNonPrimaryLocutor)
72
	
73
	if (!fixer.process()) {
74
		println "WARNING: ERROR WHILE PROCESSING: "+file
75
	}
76
}
77
cpb.done()
78

  
79
println "Done: "+trsFiles.size()+" files processed. Result files in $resultDirectory"
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/pager.groovy (revision 3020)
39 39
import org.xml.sax.InputSource;
40 40
import org.xml.sax.helpers.DefaultHandler;
41 41
import org.txm.utils.io.FileCopy
42
import org.txm.objects.Project
42 43

  
43 44
/** Build a simple edition from a xml-tei. 
44 45
 * @author mdecorde */
......
68 69

  
69 70
	/** The first word. */
70 71
	boolean firstWord = true;
72
	
73
	boolean paginate = true;
71 74

  
72 75
	/** The wordvalue. */
73 76
	String wordvalue = "";
......
126 129
	 * @param basename the basename
127 130
	 */
128 131
	pager(File infile, File outdir, String txtname, List<String> NoSpaceBefore,
129
	List<String> NoSpaceAfter, int max, String basename, String paginationElement) {
130
		this.paginationElement = paginationElement;
132
	List<String> NoSpaceAfter, String basename, Project project) {
133
		this.paginationElement = project.getEditionDefinition("default").getPageElement()
134
		this.paginate = project.getEditionDefinition("default").getPaginateEdition()
135
		this.wordmax = project.getEditionDefinition("default").getWordsPerPage();
136
		
131 137
		this.basename = basename;
132 138
		this.txtname = txtname;
133 139
		this.outdir = outdir;
134
		this.wordmax = max;
140
		
135 141
		this.NoSpaceBefore = NoSpaceBefore;
136 142
		this.NoSpaceAfter = NoSpaceAfter;
137 143
		this.url = infile.toURI().toURL();
......
288 294
				case XMLStreamConstants.START_ELEMENT:
289 295
					localname = parser.getLocalName();
290 296
					
291
					if (localname == paginationElement) {
297
					if (paginate && localname == paginationElement) {
292 298
						createNextOutput();
293 299
						pagedWriter.write("\n");
294 300
						if (parser.getAttributeValue(null,"n") != null) {
......
355 361
							wordid = parser.getAttributeValue(null,"id");
356 362
							anaValues.clear()
357 363
							wordcount++;
358
							if (wordcount >= wordmax) {
364
							if (paginate && wordcount >= wordmax) {
359 365
								createNextOutput();
360 366
							}
361 367
							
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivaLoader.groovy (revision 3020)
271 271
		t.setSourceFile(txmFile)
272 272
		t.setTXMFile(txmFile)
273 273

  
274
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element);
274
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, basename, project);
275 275
		Edition edition = new Edition(t);
276 276
		edition.setName("default");
277 277
		edition.setIndex(outdir.getAbsolutePath());
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 3020)
324 324
				edition.delete();
325 325
				edition = null;
326 326
			}
327
			def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
327
			def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, basename, page_element, metadatas, project);
328 328
			edition = t.getEdition("default")
329 329
			edition = new Edition(t);
330 330
			edition.setName("default");
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 3020)
28 28
package org.txm.scripts.importer.transcriber
29 29

  
30 30
import java.io.File;
31

  
31 32
import java.util.ArrayList;
32 33

  
33 34
import javax.xml.stream.*
......
37 38
import org.txm.metadatas.Metadatas
38 39
import org.txm.utils.TimeFormatter
39 40
import org.txm.utils.io.FileCopy;
41
import org.txm.objects.Project
40 42

  
41

  
42 43
// TODO: Auto-generated Javadoc
43 44
/** Build Discours corpus simple edition from a xml-tei.
44 45
 * 
......
74 75
	/** The first word. */
75 76
	boolean firstWord = true
76 77
	
78
	boolean paginate = true
79
	
80
	
77 81
	/** The wordvalue. */
78 82
	String wordvalue;
79 83
	
......
156 160
	 * @param metadatas the metadatas
157 161
	 */
158 162
	pager(File txmfile, File htmlDir, String txtname, List<String> NoSpaceBefore,
159
	List<String> NoSpaceAfter, int max, String corpusname, String cuttingTag, Metadatas metadatas) {
163
	List<String> NoSpaceAfter, String corpusname, String cuttingTag, Metadatas metadatas, Project project) {
160 164
		this.metadatas = metadatas
161
		this.wordmax = max;
165
		this.wordmax = project.getEditionDefinition("default").getWordsPerPage();
162 166
		this.cuttingTag = cuttingTag;
163 167
		this.corpusname = corpusname;
164 168
		this.NoSpaceBefore = NoSpaceBefore;
......
167 171
		this.txmfile = txmfile;
168 172
		this.htmlDir = htmlDir;
169 173
		this.txtname = txtname;
174
		this.paginate = project.getEditionDefinition("default").getPaginateEdition()
170 175
		
171 176
		inputData = url.openStream();
172 177
		factory = XMLInputFactory.newInstance();
......
386 391
							writer.writeAttribute("id", ""+nbBreak);
387 392
							writer.writeCharacters("\n");
388 393
						
389
							pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
390
							indexes << wordid
394
							if (paginate) {
395
								pages << new File(defaultDir, "${txtname}_${nbBreak}.html")
396
								indexes << wordid
397
							}
391 398
						
392 399
							wordcount = 0;
393 400
							shouldBreak = false;
......
612 619
								}
613 620
						
614 621
							wordcount++;
615
							if (wordcount >= wordmax) {
622
							if (paginate && wordcount >= wordmax) {
616 623
								shouldBreak = true;
617 624
							}
618 625
						
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/txtLoader.groovy (revision 3020)
188 188
		t.setSourceFile(srcfile)
189 189
		t.setTXMFile(srcfile)
190 190
		
191
		def ed = new pager(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, null);
191
		def ed = new pager(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, basename, project);
192 192
		Edition edition = new Edition(t);
193 193
		edition.setName("default");
194 194
		edition.setIndex(outdir.getAbsolutePath());
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/hyperbaseLoader.groovy (revision 3020)
142 142
		t.setSourceFile(txmFile)
143 143
		t.setTXMFile(txmFile)
144 144

  
145
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element);
145
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, basename, project);
146 146
		Edition edition = new Edition(t);
147 147
		edition.setName("default");
148 148
		edition.setIndex(outdir.getAbsolutePath());
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZPager.groovy (revision 3020)
28 28
	String page_element;
29 29
	String wordTag;
30 30
	int wordsPerPage;
31
	boolean paginate;
31 32

  
32 33
	File cssDirectory, jsDirectory, imagesDirectory;
33 34

  
......
39 40
		lang = project.getLang();
40 41
		wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
41 42
		page_element = project.getEditionDefinition("default").getPageElement()
43
		paginate = project.getEditionDefinition("default").getPaginateEdition()
42 44
		wordTag = project.getTokenizerWordElement()
43 45

  
44 46
		cssDirectory = new File(module.getSourceDirectory(), "css")
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZDefaultPagerStep.groovy (revision 3020)
87 87
	/** The idxstart. */
88 88
	ArrayList<String> idxstart = new ArrayList<String>();
89 89
	String paginationElement;
90
	boolean paginate;
90 91
	def cssList;
91 92
	def wordTag = "w";
92 93
	def noteElements = new HashSet<String>();
......
107 108
	List<String> NoSpaceAfter, def cssList) {
108 109
		this.pager = pager;
109 110
		this.paginationElement = pager.page_element;
111
		this.paginate = pager.paginate
110 112
		this.cssList = cssList;
111 113
		this.basename = pager.corpusname;
112 114
		this.txtname = txtname;
......
317 319
							currentOutOfTextElements << localname
318 320
						}
319 321
					
320
						if (localname == paginationElement) {
322
						if (paginate && localname == paginationElement) {
321 323
							createNextOutput()
322 324
							wordcount=0;
323 325
							pagedWriter.write("\n")
......
453 455
								wordid = getAttributeValue(parser, null,"id");
454 456
								anaValues.clear()
455 457
								wordcount++;
456
								if (wordcount >= wordmax) {
458
								if (paginate && wordcount >= wordmax) {
457 459
									createNextOutput();
458 460
								}
459 461
							
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xmltxm/xmltxmLoader.groovy (revision 3020)
216 216
		if (second > 0 && (second++ % 5) == 0) println ""
217 217
		cpb.tick()
218 218

  
219
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element);
219
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, basename, project);
220 220
		Edition edition = new Edition(t);
221 221
			edition.setName("default");
222 222
			edition.setIndex(outdir.getAbsolutePath());
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/discoursLoader.groovy (revision 3020)
125 125
		t.setName(txtname);
126 126
		t.setSourceFile(txmFile)
127 127
		t.setTXMFile(txmFile)
128
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, "pb");
128
		def ed = new pager(txmFile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, basename, project);
129 129
		Edition edition = new Edition(t);
130 130
		edition.setName("default");
131 131
		edition.setIndex(outdir.getAbsolutePath());
tmp/org.txm.core/src/java/org/txm/core/preferences/TBXPreferences.java (revision 3020)
65 65
	
66 66
	public static final String EDITION_DEFINITION_BUILD = "edition_definition_build"; //$NON-NLS-1$
67 67
	
68
	public static final String EDITION_DEFINITION_PAGINATE = "edition_definition_paginate"; //$NON-NLS-1$
69
	
68 70
	public static final String EDITION_DEFINITION_WORDS_PER_PAGE = "edition_definition_words_per_page"; //$NON-NLS-1$
69 71
	
70 72
	public static final String EDITION_DEFINITION_PAGE_BREAK_ELEMENT = "edition_definition_page_break_element"; //$NON-NLS-1$
tmp/org.txm.core/src/java/org/txm/objects/EditionDefinition.java (revision 3020)
46 46
		return node.getBoolean(TBXPreferences.EDITION_DEFINITION_BUILD, false);
47 47
	}
48 48
	
49
	public boolean getPaginateEdition() {
50
		return node.getBoolean(TBXPreferences.EDITION_DEFINITION_PAGINATE, true);
51
	}
52
	
49 53
	public String getImagesDirectory() {
50 54
		return node.get(TBXPreferences.EDITION_DEFINITION_IMAGES_DIRECTORY, null);
51 55
	}
......
114 118
		}
115 119
		return buffer.toString();
116 120
	}
121

  
122
	public void setPaginateEdition(boolean paginate) {
123
		node.putBoolean(TBXPreferences.EDITION_DEFINITION_PAGINATE, paginate);		
124
	}
117 125
}

Formats disponibles : Unified diff