/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 2999

     		boolean flagWord = false
     		def winfos = [:]
     		def turninfos = [:]
     		boolean other = false;
     		String word = ""
     		try {
-...
     							case "SpeechSegment": // <SpeechSegment ch="1" sconf="1.00" stime="9.94" etime="43.81" spkid="Enquêtrice" lang="fre" lconf="1.00" trs="1">
     								writer.writeStartElement("Turn")
     								writer.writeAttribute("speaker", parser.getAttributeValue(null, "spkid"))
     								writer.writeAttribute("startTime", parser.getAttributeValue(null, "stime"))
     								writer.writeAttribute("endTime", parser.getAttributeValue(null, "etime"))
     								turninfos = ["speaker":parser.getAttributeValue(null, "spkid"),
     									"startTime":parser.getAttributeValue(null, "stime"),
     									"endTime":parser.getAttributeValue(null, "etime"),
+    								]
     								for (String attr : turninfos.keySet()) {
     									writer.writeAttribute(attr, turninfos[attr])
+    								}
     								writer.writeCharacters("\n")
     								writer.writeStartElement("Sync")
     								writer.writeAttribute("time", parser.getAttributeValue(null, "stime"))
-...
     								flagWord = false
     								word = word.trim()
     								if (word.startsWith("*")) {
     									//close current Turn and start a 'other' Turn
     									writer.writeEndElement() // current Turn
     									writer.writeStartElement("Turn")
     									writer.writeAttribute("speaker", "other")
     									writer.writeAttribute("startTime", winfos["time"])
     									writer.writeAttribute("orig-speaker", turninfos["speaker"])
     									writer.writeCharacters("\n")
     									other = true
     									word = word.substring(1)
+    								}
-...
     								String otherAttributeValue = Boolean.toString(other) // set now
     								if (other && word.endsWith("*")) {
     									//close the current 'other' Turn and restart the actual Turn
     									writer.writeEndElement() // current 'other' Turn
     									writer.writeStartElement("Turn") // rebuild the orig Turn with its infos
     									turninfos["startTime"] = winfos["end"] // fix the startTime using the current word end time
     									for (String attr : turninfos.keySet()) {
     										writer.writeAttribute(attr, turninfos[attr])
+    									}
     									writer.writeCharacters("\n")
     									word = word.substring(0, word.length()-1)
     									other = false
+    								}
-...
     								for (def punct : puncts) { // pre-retokenize if any
     									writer.writeStartElement("w")
     									writer.writeAttribute("time", winfos["time"])
     									writer.writeAttribute("start", winfos["start"])
     									writer.writeAttribute("end", winfos["start"])
     									for (String attr : winfos.keySet()) {
     										writer.writeAttribute(attr, winfos[attr])
+    									}
     									writer.writeCharacters(punct)
     									writer.writeEndElement() // w
     									writer.writeCharacters("\n")
+    								}
     								puncts = []

     import org.txm.importer.ApplyXsl2
     import org.txm.metadatas.MetadataGroup
     import org.txm.metadatas.Metadatas
     import org.txm.utils.TimeFormatter
     import org.txm.utils.io.FileCopy;
-...
     	List<String> NoSpaceAfter;
     	/** The pages. */
     	def pages = [];
     	def indexes = [];
     	def pages = []
     	def indexes = []
     	/** The wordcount. */
     	int wordcount = 0;
     	int wordcount = 0
     	/** The pagecount. */
     	int pagecount = 0;
     	int pagecount = 0
     	/** The wordmax. */
     	int wordmax = 10;
     	int wordmax = 10
     	/** The wordid. */
     	String wordid;
     	/** The first word. */
     	boolean firstWord = true;
     	boolean firstWord = true
     	/** The wordvalue. */
     	String wordvalue;
-...
     	File outfile;
     	String corpusname ="";
     	String corpusname =""
     	String cuttingTag = "pb"
     	String txtname;
     	File htmlDir;
     	File defaultDir;
     	Metadatas metadatas;
     	String txtname
     	File htmlDir
     	File defaultDir
     	Metadatas metadatas
     	def interviewers = [];
     	def interviewers = null
     	def eventTranslations = ["^^":"mot inconnu", "?":"orthographe incertaine",
     		"()":"rupture de syntaxe", "b":"bruit indéterminé",
     		"*":"mot corrigé",
-...
     		"pif":"inaudible", "r":"respiration",
     		"rire":"rire du locuteur", "shh":"soufle électrique",
     		"sif":"sifflement du locuteur", "tx":"toux"];
     	String currentTime = "";
     	boolean bold = false;
     	int writenLength = 0;
     	boolean spokenTurn = false;
     	boolean firstSync = false;
     	boolean firstWho = false;
     	String currentUTime = ""
     	String startTimeSp = ""
     	String endTimeSp = ""
     	String startTimeU = "0"
     	String previousStartTimeU = "0"
     	boolean bold = false
     	int writenLength = 0
     	boolean spokenTurn = false
     	boolean firstSync = false
     	boolean firstWho = false
     	int nSilence = 0
     	/**
     	 * Instantiates a new pager.
+    	 *
-...
     									writer.writeEndElement(); // td
+    								}
     								//get enqueteur to style their names
     								if (name.startsWith("enq")) {
     									interviewers.add(value)
     								if (name.equals("out-of-text-to-edit-locutor")) {
     									interviewers = /$value/
+    								}
     								writer.writeEndElement(); // tr
+    							}
-...
     							firstWho = true;
     							spokenTurn = false;
     							overlapping = false
     							nSilence = 0 // will count the number of silence written to avoid writting [silence] at the sp end
     							writer.writeStartElement("p");
     							writer.writeAttribute("class", "turn");
     							writer.writeCharacters("\n");
     							this.startTimeSp = parser.getAttributeValue(null, "start")
     							this.endTimeSp = parser.getAttributeValue(null, "end")
     							overlapping = ("true" == parser.getAttributeValue(null,"overlap"))
     							String spid = parser.getAttributeValue(null, "who");
-...
     							break;
     						case "u":
     							writer.writeCharacters("\n");
     							this.currentTime = parser.getAttributeValue(null,"time");
     							this.previousStartTimeU = this.startTimeU
     							this.startTimeU = parser.getAttributeValue(null, "start");
     							this.currentUTime = parser.getAttributeValue(null, "time");
     							if (previousElem == "u" && writenLength == 0) { // if previous u had no words, it was a silence
     								def duration = ""
     								try {
     									def d = Float.parseFloat(this.startTimeU) - Float.parseFloat(this.previousStartTimeU)
     									duration = " "+TimeFormatter.formatTime(d);
     								} catch(Exception e) { e.printStackTrace()}
     								writer.writeStartElement("span");
     								writer.writeAttribute("class", "event");
     								writer.writeCharacters("[silence]");
     								writer.writeCharacters("[silence$duration]");
     								writer.writeEndElement(); // span
     								writer.writeEmptyElement("br");
     								nSilence++
     								//writer.writeEmptyElement("br");
+    							}
     							String spk = parser.getAttributeValue(null, "who")
-...
     							previousSPK = spk
     							if (overlapping) previousSPK = null
     							writenLength = 0;
     						//							writenLength = 0;
     						/*writer.writeStartElement("span");
     					 writer.writeAttribute("class", "sync");
-...
     						case "sp":
     						//println "CLOSING: "+parser.getLocalName()
     							endBoldIfNeeded()
     							if (!spokenTurn) {
     							if (!spokenTurn && nSilence == 0) {
     								writer.writeStartElement("span");
     								writer.writeAttribute("class", "event");
     								writer.writeCharacters("[silence]");
     								String duration = ""
     								try {
     									def d = Float.parseFloat(endTimeSp)-Float.parseFloat(startTimeSp)
     									duration = " "+TimeFormatter.formatTime(d);
     								} catch (Exception e) {e.printStackTrace()}
     								writer.writeCharacters("[silence$duration]");
     								writer.writeEndElement();
     								writer.writeEmptyElement("br");
+    							}
-...
     						//writer.writeEndElement() // span@class=u
     						//writer.writeEmptyElement("br");
     						//if (overlapping) writer.writeEndElement(); // b
     							writer.writeCharacters("\n");
     							break;
     						case "event":
     							break;
-...
     							spokenTurn = true;
     							int l = lastword.length();
     							String endOfLastWord = "";
     							if(l > 0)
     							if (l > 0) {
     								endOfLastWord = lastword.subSequence(l-1, l);
+    							}
     							if (interpvalue != null) {
     								interpvalue = interpvalue.replace("\"","&quot;");
+    							}
-...
     							if (interpvalue.contains("rapp1")) {
     								writer.writeCharacters(" «");
     							} else if (wordvalue == "\"") {
     								// don't write this char
     							} else {
     								writer.writeStartElement("span");
     								writer.writeAttribute("class", "word");
     								writer.writeAttribute("title", interpvalue);
     								writer.writeAttribute("id", wordid);
     								writer.writeCharacters(wordvalue);
     								writer.writeEndElement();
+    							}
+    							}
     							writer.writeStartElement("span");
     							writer.writeAttribute("class", "word");
     							writer.writeAttribute("title", interpvalue);
     							writer.writeAttribute("id", wordid);
     							writer.writeCharacters(wordvalue);
     							writer.writeEndElement();
     							if (interpvalue.contains("orth")) {
     								writer.writeStartElement("span");
     								writer.writeAttribute("class", "event");
-...
     								writer.writeCharacters("_[!]");
     								writer.writeEndElement();
+    							}
     							if (interpvalue.contains("rapp2")) {
     								writer.writeCharacters(" » ");
     								writer.writeCharacters("» ");
+    							}
     							lastword=wordvalue;
-...
     	private void writeCurrentTime() {
     		writer.writeStartElement("span");
     		writer.writeAttribute("class", "sync");
     		writer.writeCharacters(currentTime);
     		writer.writeCharacters(currentUTime);
     		writeMediaAccess(currentTime)
     		writeMediaAccess(currentUTime)
     		writer.writeEndElement() // span
+    	}
-...
     		writer.writeStartElement("span");
     		writer.writeAttribute("class", "spk");
     		if(interviewers.contains(spk)) {
     			bold = true;
     		} else {
     			bold = false;
+    		}
     		bold = interviewers != null && interviewers.matches(spk)
     		spk = spk.replaceAll('^([^0-9]*)([0-9]+)$', '$1 $2');
     		if (overlapping) {
     			writer.writeCharacters("// ")
-...
+    		}
+    	}
     	//	private String formatTime(float time, boolean doshort)
     	//	{
     	//		String rez = " ";
     	//		//		if(time >= 3600) // >= 1h
     	//		//		{
     	//		float h = time / 3600;
     	//		time = time%3600;
     	//		float min = (time%3600) / 60;
     	//		int sec = (int)time%60;
     	//
     	//		if(min < 10)
     	//			rez = ""+(int)h+":0"+(int)min;//+":"+time%60;
     	//		else
     	//			rez = ""+(int)h+":"+(int)min;//+":"+time%60;
     	//		//if (!doshort)
     	//		if (sec > 9)
     	//			rez += ":"+(int)time%60;
     	//		else
     	//			rez += ":0"+(int)time%60;
     	//		//		}
     	//		//		else if(time >= 60) // >= 1min
     	//		//		{
     	//		//			int min = time/60;
     	//		//			if(min < 10)
     	//		//				rez = "00:0"+min;//+":"+time%60;
     	//		//			else
     	//		//				rez = "00:"+min;//+":"+time%60;
     	//		//			if(!doshort)
     	//		//				rez += ":"+(int)time%60;
     	//		//		}
     	//		//		else // < 60
     	//		//		{
     	//		//			if(time < 10)
     	//		//				return " 0:0"+time;
     	//		//			else
     	//		//				return " 0:"+time;
     	//		//		}
     	//		return rez;
     	//	}
     	/**
     	 * Gets the page files.
+    	 *

tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2999)
225	225	ArrayList<Pair<String, String>> metas = metadatas.get(filename)
226	226	//println "filename=$filename metas= $metas"
227	227	for (Pair p : metas) {
228		if (p.getFirst().startsWith("enq")) {
229		new RemoveSpeaker(infile, infile, p.getFirst())
	228	if (p.getFirst().startsWith("out-of-text-to-edit-locutor")) {
	229	new RemoveSpeaker(infile, infile, p.getSecond())
230	230	}
231	231	}
232	232	}

     import org.txm.scripts.importer.*;
     import org.txm.scripts.importer.graal.PersonalNamespaceContext
     import org.txm.utils.*;
     import org.txm.utils.xml.DomUtils
     import org.txm.metadatas.*;
     import java.io.File;
-...
     	 * @param outfile the outfile
     	 * @param id the id
     	 */
     	public RemoveSpeaker(File transcriptionfile, File outfile, String id)
+    	{
     	public RemoveSpeaker(File transcriptionfile, File outfile, String idRegex) {
     		System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl");
     		this.outfile = outfile;
     		String xpathString = "//tei:u[@spk='"+id+"']";
     //		String xpathString = "//u";
     		//println "removing $xpathString in $transcriptionfile"
     		DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
     		//println "domFactory: $domFactory"
     		domFactory.setNamespaceAware(true); // never forget this!
     		domFactory.setXIncludeAware(true);
     		DocumentBuilder builder = domFactory.newDocumentBuilder();
     //		DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
     //		//println "domFactory: $domFactory"
     //		domFactory.setNamespaceAware(true); // never forget this!
     //		domFactory.setXIncludeAware(true);
     //		DocumentBuilder builder = domFactory.newDocumentBuilder();
     		//println "builder $builder"
     		doc = builder.parse(transcriptionfile);
     		//doc = builder.parse(transcriptionfile);
     		//println "doc $doc"
     		doc = DomUtils.load(transcriptionfile)
     		//def xpath = XPathFactory.newInstance().newXPath()
     //		xpath.setNamespaceContext(new PersonalNamespaceContext());
     		//def expr = xpath.compile(xpathString);
     		def nodes = doc.getDocumentElement().getElementsByTagName("u")
     		def xpath = XPathFactory.newInstance().newXPath()
     		xpath.setNamespaceContext(new PersonalNamespaceContext());
     		def expr = xpath.compile(xpathString);
     		def nodes = expr.evaluate(doc, XPathConstants.NODESET);
     		def reg = /$idRegex/
     		for (def node : nodes) {
     			//println "remove node "+node
     			Element elem = (Element)node;
     			elem.getParentNode().removeChild(node);
     			//Element elem = (Element)node;
     			String who = node.getAttribute("who") // [@who='"+idRegex+"']
     			if (reg.matches(who)) {
     				node.getParentNode().removeChild(node);
+    			}
+    		}
     		save()
+    	}
-...
+    	 *
     	 * @return true, if successful
     	 */
     	private boolean save()
+    	{
     	private boolean save() {
     		try {
     			// Création de la source DOM
     			Source source = new DOMSource(doc);

     	 * @param infile the infile
     	 * @param outfile the outfile
     	 */
     	public TranscriberTokenizer(File infile, File outfile, String lang)
+    	{
     	public TranscriberTokenizer(File infile, File outfile, String lang) {
     		super(infile, outfile, lang)
     		txtname = infile.getName();
     		int idx = txtname.lastIndexOf(".")
-...
     				audio = "present"
     			notation = s;
     			event = "";
     			if (s.startsWith("\"")) {
     				rapp = true;
     				event += "#rapp1";
     			} else if(s.endsWith("\"")) {
     				rapp = false;
     				event += "#rapp2";
+    			}
     			//TODO does not work (eg ' "word" '). This step should be done after the tokenizer step is done
     //			if (s.startsWith("\"") && s.endsWith("\"")) {
     //				// not rapp1 or rapp2
     //			} else if (s.startsWith("\"")) {
     //				rapp = true;
     //				event += "#rapp1";
     //			} else if(s.endsWith("\"")) {
     //				rapp = false;
     //				event += "#rapp2";
     //			}
     			//test events
     			if (s.startsWith("^^")) {
     				event += "#orth";

     	/** The interviewers. */
     	ArrayList<String> interviewers = [];
     	def interviewers = null
     	static HashSet<String> sectionAttrs;
     	/** The anatypes. */
-...
     							break;
     						case "w":
     						// concat spk id and ref
     							String isEnq = (interviewers.contains(u_name))?"*":"";
     							String isEnq = (interviewers != null && interviewers.matches(u_name))?"*":"";
     							String ref = (u_name+", "+formatedTime+""+isEnq)
     							if (ADD_TEXTID_TO_REF) ref = textid+", "+ref
     							vForm +="\t"+u_name+"\t"+ref
-...
+    								}
+    							}
     							vForm = vForm.replaceAll("\n", "").replaceAll("&", "&amp;").replaceAll("<", "&lt;");
     							if (removeinterviewers) {
     								if (!interviewers.contains(u_name))
     								if (!interviewers.matches(u_name))
     									output.write(vForm+"\t"+wordid+vAna+"\n");
     							} else {
     								output.write(vForm+"\t"+wordid+vAna+"\n");
-...
     						for (int i = 0 ; i < parser.getAttributeCount() ; i ++) {
     							list.add(new Pair(parser.getAttributeLocalName(i).replace("_","").toLowerCase(), parser.getAttributeValue(i)));
     							if (parser.getAttributeLocalName(i).startsWith("enq"))
     								interviewers.add(parser.getAttributeValue(i));
     							if (parser.getAttributeLocalName(i).equals("out-of-text-to-edit-locutor"))
     								interviewers = /${parser.getAttributeValue(i)}/;
+    						}
     						return
     					case "Topic":

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 2999