Révision 3010

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/limsi/limsiLoader.groovy (revision 3010)
65 65

  
66 66

  
67 67
//PARAMETERS
68
boolean removeInterviewer = false;//if true the transcription of speakers (en1 and enq2) defined in metadatas file will be ignored
69 68
boolean includeComments = false;
70 69
boolean ignoreTranscriberMetadata = false;
71 70
int csvHeaderNumber = 1;
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 3010)
73 73
	/** The debug. */
74 74
	boolean debug = false;
75 75

  
76
	/** The removeinterviewers. */
77
	boolean removeinterviewers = false;
76
	/** The indexInterviewer: index interviewer speech if true. */
77
	boolean indexInterviewer = true;
78 78

  
79 79
	/** The trans. */
80 80
	HashMap<String, ArrayList<Pair<String, String>>> trans;
......
89 89
	HashMap<String, ArrayList<Pair<String, String>>> topics;
90 90

  
91 91

  
92
	/** The interviewers. */
92
	/** The interviewers regex */
93 93
	def interviewers = null
94 94
	static HashSet<String> sectionAttrs;
95 95

  
......
103 103
	 * @param value the value
104 104
	 * @return the java.lang. object
105 105
	 */
106
	public removeInterviewers(boolean value) {
107
		this.removeinterviewers = value;
106
	public setIndexInterviewer(boolean value) {
107
		this.indexInterviewer = value;
108 108
	}
109 109

  
110 110
	File cqpFile
......
510 510

  
511 511
							vForm = vForm.replaceAll("\n", "").replaceAll("&", "&amp;").replaceAll("<", "&lt;");
512 512

  
513
							if (removeinterviewers) {
514
								if (!interviewers.matches(u_name))
513
							if (!indexInterviewer) {
514
								if (!interviewers.matches(u_name)) {
515 515
									output.write(vForm+"\t"+wordid+vAna+"\n");
516
								}
516 517
							} else {
517 518
								output.write(vForm+"\t"+wordid+vAna+"\n");
518 519
							}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/importer.groovy (revision 3010)
146 146
						cpb.tick()
147 147
						if (!metadatas.injectMetadatasInXml(infile, outfile, "text")) {
148 148
							println("Failed to inject metadata in "+infile)
149
							outfile.delete()
149
							//outfile.delete()
150 150
						}
151 151
						if (!infile.delete()) {
152 152
							println "ERROR: could not delete $infile"
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 3010)
58 58
import org.txm.utils.xml.DomUtils;
59 59

  
60 60
//PARAMETERS
61
boolean removeInterviewer = false;//if true the transcription of speakers (en1 and enq2) defined in metadatas file will be ignored
61
boolean indexInterviewer = true;//if true the transcription of speakers (en1 and enq2) defined in metadatas file will be ignored
62 62
boolean includeComments = false;
63 63
boolean ignoreTranscriberMetadata = false;
64 64
//int csvHeaderNumber = 1;
......
137 137
	props.load(input);
138 138
	input.close();
139 139

  
140
	if (props.getProperty("removeInterviewer") != null)
141
		removeInterviewer = Boolean.parseBoolean(props.get("removeInterviewer").toString());
140
	if (props.getProperty("indexInterviewer") != null)
141
		indexInterviewer = Boolean.parseBoolean(props.get("indexInterviewer").toString());
142 142
	if (props.getProperty("ignoreTranscriberMetadata") != null)
143 143
		ignoreTranscriberMetadata = Boolean.parseBoolean(props.get("ignoreTranscriberMetadata").toString());
144 144
//	if (props.getProperty("metadataList") != null)
......
149 149
	//	includeComments = props.get("includeComments").toString();
150 150

  
151 151
	println "import properties: "
152
	println " removeInterviewer: "+removeInterviewer
152
	println " indexInterviewer: "+indexInterviewer
153 153
	println " ignoreTranscriberMetadata: "+ignoreTranscriberMetadata
154 154
//	println " metadataToKeep: "+metadatasToKeep
155 155
//	println " ignored csvHeaderSize: "+csvHeaderNumber
......
213 213

  
214 214
		if (MONITOR != null) MONITOR.worked(5)
215 215
		if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
216
		println "-- Remove interviewer: "+removeInterviewer
217
		if (removeInterviewer) 	{
218
			if (metadatas == null) {
219
				println "Can't remove interviewer without a metadata.csv file defining who are the interviewers."
216
		println "-- Remove interviewer: "+(!indexInterviewer)
217
		if (!indexInterviewer) 	{
218
			if (metadatas == null || !metadatas.headersList.contains("interviewer-id-regex")) {
219
				println "Can't remove interviewer without a metadata.csv file defining who are the interviewers. Add the 'interviewer-id-regex' column"
220 220
			} else {
221 221
				println "Removing some speakers in "+txmDir.listFiles().length+" file(s)"
222 222
				for (File infile : txmDir.listFiles()) {
......
225 225
					ArrayList<Pair<String, String>> metas = metadatas.get(filename)
226 226
					//println "filename=$filename metas= $metas"
227 227
					for (Pair p : metas) {
228
						if (p.getFirst().startsWith("out-of-text-to-edit-locutor")) {
228
						if (p.getFirst().startsWith("interviewer-id-regex")) {
229 229
							new RemoveSpeaker(infile, infile, p.getSecond())
230 230
						}
231 231
					}
......
281 281

  
282 282
	def comp = new compiler()
283 283
	if(debug) comp.setDebug();
284
	comp.removeInterviewers(removeInterviewer);
284
	comp.setIndexInterviewer(indexInterviewer);
285 285
	comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata);
286 286
	if (!comp.run(project, xmltxmFiles, corpusname, "default", binDir)) {
287 287
		println "Failed to compile files";
tmp/org.txm.core/src/java/org/txm/metadatas/Metadatas.java (revision 3010)
66 66
import org.txm.utils.AsciiUtils;
67 67
import org.txm.utils.CsvReader;
68 68
import org.txm.utils.Pair;
69
import org.txm.utils.io.FileCopy;
69 70
import org.txm.utils.logger.Log;
70 71
import org.txm.utils.xml.DomUtils;
71 72
import org.w3c.dom.Document;
......
108 109
	/** The ns context. */
109 110
	NamespaceContext nsContext = new PersonalNamespaceContext();
110 111
	
112
	private File inputMetadataFile;
113
	
111 114
	/**
112 115
	 * Instantiates a new metadatas.
113 116
	 *
......
151 154
	 */
152 155
	public Metadatas(File inputFile, String encoding, String separator, String txtseparator, int nbheaderline) {
153 156
		
157
		this.inputMetadataFile = inputFile;
154 158
		File xmlfile = new File(inputFile.getParent(), inputFile.getName() + ".xml");
155 159
		// println "create xml file version of "+csvfile+" : "+xmlfile
156 160
		try {
......
615 619
	public boolean injectMetadatasInXml(File infile, File outfile, String tag, String namespace) throws MalformedURLException, IOException, XMLStreamException {
616 620
		
617 621
		String key = infile.getName();
618
		if (key.lastIndexOf(".") > 0)
622
		if (key.lastIndexOf(".") > 0) {
619 623
			key = key.substring(0, key.lastIndexOf("."));
620
		
624
		}
621 625
		ArrayList<org.txm.metadatas.Entry> metas = get(key);
622 626
		
623 627
		if (metas == null) {
624
			System.out.println("\nError: can't find metadata for text of id=" + key);
625
			System.out.println("Maybe the metadata file doesn't have the right format (comma or tab separated values?)");
626
			return false;
628
			System.out.println("\nWarning: can't find metadata for text of id=" + key);
629
			if (this.inputMetadataFile != null && inputMetadataFile.getName().toLowerCase().endsWith(".csv")) {
630
				System.out.println("Maybe the metadata file doesn't have the right format (comma or tab separated values?)");
631
			}
632
			if (!infile.equals(outfile)) {
633
				FileCopy.copy(infile, outfile);
634
			}
635
			return true;
627 636
		}
628 637
		
629 638
		// ensure attribute names format

Formats disponibles : Unified diff