Révision 2999
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/transcription/Vocapia2Transcriber.groovy (revision 2999) | ||
---|---|---|
37 | 37 |
|
38 | 38 |
boolean flagWord = false |
39 | 39 |
def winfos = [:] |
40 |
def turninfos = [:] |
|
40 | 41 |
boolean other = false; |
41 | 42 |
String word = "" |
42 | 43 |
try { |
... | ... | |
92 | 93 |
|
93 | 94 |
case "SpeechSegment": // <SpeechSegment ch="1" sconf="1.00" stime="9.94" etime="43.81" spkid="Enquêtrice" lang="fre" lconf="1.00" trs="1"> |
94 | 95 |
writer.writeStartElement("Turn") |
95 |
writer.writeAttribute("speaker", parser.getAttributeValue(null, "spkid")) |
|
96 |
writer.writeAttribute("startTime", parser.getAttributeValue(null, "stime")) |
|
97 |
writer.writeAttribute("endTime", parser.getAttributeValue(null, "etime")) |
|
96 |
|
|
97 |
turninfos = ["speaker":parser.getAttributeValue(null, "spkid"), |
|
98 |
"startTime":parser.getAttributeValue(null, "stime"), |
|
99 |
"endTime":parser.getAttributeValue(null, "etime"), |
|
100 |
] |
|
101 |
for (String attr : turninfos.keySet()) { |
|
102 |
writer.writeAttribute(attr, turninfos[attr]) |
|
103 |
} |
|
104 |
|
|
98 | 105 |
writer.writeCharacters("\n") |
99 | 106 |
writer.writeStartElement("Sync") |
100 | 107 |
writer.writeAttribute("time", parser.getAttributeValue(null, "stime")) |
... | ... | |
148 | 155 |
flagWord = false |
149 | 156 |
word = word.trim() |
150 | 157 |
if (word.startsWith("*")) { |
158 |
//close current Turn and start a 'other' Turn |
|
159 |
writer.writeEndElement() // current Turn |
|
160 |
writer.writeStartElement("Turn") |
|
161 |
writer.writeAttribute("speaker", "other") |
|
162 |
writer.writeAttribute("startTime", winfos["time"]) |
|
163 |
writer.writeAttribute("orig-speaker", turninfos["speaker"]) |
|
164 |
writer.writeCharacters("\n") |
|
151 | 165 |
other = true |
152 | 166 |
word = word.substring(1) |
153 | 167 |
} |
... | ... | |
155 | 169 |
String otherAttributeValue = Boolean.toString(other) // set now |
156 | 170 |
|
157 | 171 |
if (other && word.endsWith("*")) { |
172 |
|
|
173 |
//close the current 'other' Turn and restart the actual Turn |
|
174 |
writer.writeEndElement() // current 'other' Turn |
|
175 |
|
|
176 |
writer.writeStartElement("Turn") // rebuild the orig Turn with its infos |
|
177 |
turninfos["startTime"] = winfos["end"] // fix the startTime using the current word end time |
|
178 |
for (String attr : turninfos.keySet()) { |
|
179 |
writer.writeAttribute(attr, turninfos[attr]) |
|
180 |
} |
|
181 |
writer.writeCharacters("\n") |
|
182 |
|
|
158 | 183 |
word = word.substring(0, word.length()-1) |
159 | 184 |
other = false |
160 | 185 |
} |
... | ... | |
178 | 203 |
|
179 | 204 |
for (def punct : puncts) { // pre-retokenize if any |
180 | 205 |
writer.writeStartElement("w") |
181 |
writer.writeAttribute("time", winfos["time"])
|
|
182 |
writer.writeAttribute("start", winfos["start"])
|
|
183 |
writer.writeAttribute("end", winfos["start"])
|
|
206 |
for (String attr : winfos.keySet()) {
|
|
207 |
writer.writeAttribute(attr, winfos[attr])
|
|
208 |
}
|
|
184 | 209 |
writer.writeCharacters(punct) |
185 | 210 |
writer.writeEndElement() // w |
186 | 211 |
writer.writeCharacters("\n") |
187 |
|
|
188 |
|
|
189 | 212 |
} |
190 | 213 |
|
191 | 214 |
puncts = [] |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/pager.groovy (revision 2999) | ||
---|---|---|
35 | 35 |
import org.txm.importer.ApplyXsl2 |
36 | 36 |
import org.txm.metadatas.MetadataGroup |
37 | 37 |
import org.txm.metadatas.Metadatas |
38 |
import org.txm.utils.TimeFormatter |
|
38 | 39 |
import org.txm.utils.io.FileCopy; |
39 | 40 |
|
40 | 41 |
|
... | ... | |
55 | 56 |
List<String> NoSpaceAfter; |
56 | 57 |
|
57 | 58 |
/** The pages. */ |
58 |
def pages = [];
|
|
59 |
def indexes = [];
|
|
59 |
def pages = [] |
|
60 |
def indexes = [] |
|
60 | 61 |
|
61 | 62 |
/** The wordcount. */ |
62 |
int wordcount = 0;
|
|
63 |
int wordcount = 0 |
|
63 | 64 |
|
64 | 65 |
/** The pagecount. */ |
65 |
int pagecount = 0;
|
|
66 |
int pagecount = 0 |
|
66 | 67 |
|
67 | 68 |
/** The wordmax. */ |
68 |
int wordmax = 10;
|
|
69 |
int wordmax = 10 |
|
69 | 70 |
|
70 | 71 |
/** The wordid. */ |
71 | 72 |
String wordid; |
72 | 73 |
|
73 | 74 |
/** The first word. */ |
74 |
boolean firstWord = true;
|
|
75 |
boolean firstWord = true |
|
75 | 76 |
|
76 | 77 |
/** The wordvalue. */ |
77 | 78 |
String wordvalue; |
... | ... | |
113 | 114 |
|
114 | 115 |
File outfile; |
115 | 116 |
|
116 |
String corpusname ="";
|
|
117 |
String corpusname ="" |
|
117 | 118 |
String cuttingTag = "pb" |
118 |
String txtname;
|
|
119 |
File htmlDir;
|
|
120 |
File defaultDir;
|
|
121 |
Metadatas metadatas;
|
|
119 |
String txtname |
|
120 |
File htmlDir |
|
121 |
File defaultDir |
|
122 |
Metadatas metadatas |
|
122 | 123 |
|
123 |
def interviewers = [];
|
|
124 |
def interviewers = null
|
|
124 | 125 |
def eventTranslations = ["^^":"mot inconnu", "?":"orthographe incertaine", |
125 | 126 |
"()":"rupture de syntaxe", "b":"bruit indéterminé", |
126 | 127 |
"*":"mot corrigé", |
... | ... | |
133 | 134 |
"pif":"inaudible", "r":"respiration", |
134 | 135 |
"rire":"rire du locuteur", "shh":"soufle électrique", |
135 | 136 |
"sif":"sifflement du locuteur", "tx":"toux"]; |
136 |
String currentTime = ""; |
|
137 |
boolean bold = false; |
|
138 |
int writenLength = 0; |
|
139 |
boolean spokenTurn = false; |
|
140 |
boolean firstSync = false; |
|
141 |
boolean firstWho = false; |
|
137 |
String currentUTime = "" |
|
138 |
String startTimeSp = "" |
|
139 |
String endTimeSp = "" |
|
140 |
String startTimeU = "0" |
|
141 |
String previousStartTimeU = "0" |
|
142 |
boolean bold = false |
|
143 |
int writenLength = 0 |
|
144 |
boolean spokenTurn = false |
|
145 |
boolean firstSync = false |
|
146 |
boolean firstWho = false |
|
147 |
int nSilence = 0 |
|
142 | 148 |
/** |
143 | 149 |
* Instantiates a new pager. |
144 | 150 |
* |
... | ... | |
307 | 313 |
writer.writeEndElement(); // td |
308 | 314 |
} |
309 | 315 |
//get enqueteur to style their names |
310 |
if (name.startsWith("enq")) {
|
|
311 |
interviewers.add(value)
|
|
316 |
if (name.equals("out-of-text-to-edit-locutor")) {
|
|
317 |
interviewers = /$value/
|
|
312 | 318 |
} |
313 | 319 |
writer.writeEndElement(); // tr |
314 | 320 |
} |
... | ... | |
493 | 499 |
firstWho = true; |
494 | 500 |
spokenTurn = false; |
495 | 501 |
overlapping = false |
502 |
nSilence = 0 // will count the number of silence written to avoid writting [silence] at the sp end |
|
496 | 503 |
|
497 | 504 |
writer.writeStartElement("p"); |
498 | 505 |
writer.writeAttribute("class", "turn"); |
506 |
writer.writeCharacters("\n"); |
|
499 | 507 |
|
508 |
this.startTimeSp = parser.getAttributeValue(null, "start") |
|
509 |
this.endTimeSp = parser.getAttributeValue(null, "end") |
|
510 |
|
|
500 | 511 |
overlapping = ("true" == parser.getAttributeValue(null,"overlap")) |
501 | 512 |
String spid = parser.getAttributeValue(null, "who"); |
502 | 513 |
|
... | ... | |
512 | 523 |
break; |
513 | 524 |
case "u": |
514 | 525 |
writer.writeCharacters("\n"); |
515 |
this.currentTime = parser.getAttributeValue(null,"time"); |
|
526 |
this.previousStartTimeU = this.startTimeU |
|
527 |
this.startTimeU = parser.getAttributeValue(null, "start"); |
|
528 |
this.currentUTime = parser.getAttributeValue(null, "time"); |
|
516 | 529 |
|
517 | 530 |
if (previousElem == "u" && writenLength == 0) { // if previous u had no words, it was a silence |
531 |
def duration = "" |
|
532 |
try { |
|
533 |
def d = Float.parseFloat(this.startTimeU) - Float.parseFloat(this.previousStartTimeU) |
|
534 |
duration = " "+TimeFormatter.formatTime(d); |
|
535 |
} catch(Exception e) { e.printStackTrace()} |
|
518 | 536 |
writer.writeStartElement("span"); |
519 | 537 |
writer.writeAttribute("class", "event"); |
520 |
writer.writeCharacters("[silence]"); |
|
538 |
writer.writeCharacters("[silence$duration]");
|
|
521 | 539 |
writer.writeEndElement(); // span |
522 |
writer.writeEmptyElement("br"); |
|
540 |
nSilence++ |
|
541 |
//writer.writeEmptyElement("br"); |
|
523 | 542 |
} |
524 | 543 |
|
525 | 544 |
String spk = parser.getAttributeValue(null, "who") |
... | ... | |
534 | 553 |
previousSPK = spk |
535 | 554 |
if (overlapping) previousSPK = null |
536 | 555 |
|
556 |
writenLength = 0; |
|
537 | 557 |
// writenLength = 0; |
538 | 558 |
/*writer.writeStartElement("span"); |
539 | 559 |
writer.writeAttribute("class", "sync"); |
... | ... | |
652 | 672 |
case "sp": |
653 | 673 |
//println "CLOSING: "+parser.getLocalName() |
654 | 674 |
endBoldIfNeeded() |
655 |
if (!spokenTurn) { |
|
675 |
if (!spokenTurn && nSilence == 0) {
|
|
656 | 676 |
writer.writeStartElement("span"); |
657 | 677 |
writer.writeAttribute("class", "event"); |
658 |
writer.writeCharacters("[silence]"); |
|
678 |
String duration = "" |
|
679 |
try { |
|
680 |
def d = Float.parseFloat(endTimeSp)-Float.parseFloat(startTimeSp) |
|
681 |
duration = " "+TimeFormatter.formatTime(d); |
|
682 |
} catch (Exception e) {e.printStackTrace()} |
|
683 |
writer.writeCharacters("[silence$duration]"); |
|
659 | 684 |
writer.writeEndElement(); |
660 | 685 |
writer.writeEmptyElement("br"); |
661 | 686 |
} |
... | ... | |
680 | 705 |
//writer.writeEndElement() // span@class=u |
681 | 706 |
//writer.writeEmptyElement("br"); |
682 | 707 |
//if (overlapping) writer.writeEndElement(); // b |
708 |
writer.writeCharacters("\n"); |
|
683 | 709 |
break; |
684 | 710 |
case "event": |
685 | 711 |
break; |
... | ... | |
694 | 720 |
spokenTurn = true; |
695 | 721 |
int l = lastword.length(); |
696 | 722 |
String endOfLastWord = ""; |
697 |
if(l > 0)
|
|
723 |
if (l > 0) {
|
|
698 | 724 |
endOfLastWord = lastword.subSequence(l-1, l); |
699 |
|
|
725 |
} |
|
700 | 726 |
if (interpvalue != null) { |
701 | 727 |
interpvalue = interpvalue.replace("\"","""); |
702 | 728 |
} |
... | ... | |
726 | 752 |
|
727 | 753 |
if (interpvalue.contains("rapp1")) { |
728 | 754 |
writer.writeCharacters(" «"); |
729 |
} else if (wordvalue == "\"") { |
|
730 |
// don't write this char |
|
731 |
} else { |
|
732 |
writer.writeStartElement("span"); |
|
733 |
writer.writeAttribute("class", "word"); |
|
734 |
writer.writeAttribute("title", interpvalue); |
|
735 |
writer.writeAttribute("id", wordid); |
|
736 |
writer.writeCharacters(wordvalue); |
|
737 |
writer.writeEndElement(); |
|
738 |
} |
|
755 |
} |
|
756 |
|
|
757 |
writer.writeStartElement("span"); |
|
758 |
writer.writeAttribute("class", "word"); |
|
759 |
writer.writeAttribute("title", interpvalue); |
|
760 |
writer.writeAttribute("id", wordid); |
|
761 |
writer.writeCharacters(wordvalue); |
|
762 |
writer.writeEndElement(); |
|
763 |
|
|
739 | 764 |
if (interpvalue.contains("orth")) { |
740 | 765 |
writer.writeStartElement("span"); |
741 | 766 |
writer.writeAttribute("class", "event"); |
... | ... | |
748 | 773 |
writer.writeCharacters("_[!]"); |
749 | 774 |
writer.writeEndElement(); |
750 | 775 |
} |
751 |
|
|
752 | 776 |
if (interpvalue.contains("rapp2")) { |
753 |
writer.writeCharacters(" » ");
|
|
777 |
writer.writeCharacters("» "); |
|
754 | 778 |
} |
755 | 779 |
|
756 | 780 |
lastword=wordvalue; |
... | ... | |
817 | 841 |
private void writeCurrentTime() { |
818 | 842 |
writer.writeStartElement("span"); |
819 | 843 |
writer.writeAttribute("class", "sync"); |
820 |
writer.writeCharacters(currentTime); |
|
844 |
writer.writeCharacters(currentUTime);
|
|
821 | 845 |
|
822 |
writeMediaAccess(currentTime) |
|
846 |
writeMediaAccess(currentUTime)
|
|
823 | 847 |
|
824 | 848 |
writer.writeEndElement() // span |
825 | 849 |
} |
... | ... | |
838 | 862 |
|
839 | 863 |
writer.writeStartElement("span"); |
840 | 864 |
writer.writeAttribute("class", "spk"); |
841 |
if(interviewers.contains(spk)) { |
|
842 |
bold = true; |
|
843 |
} else { |
|
844 |
bold = false; |
|
845 |
} |
|
865 |
bold = interviewers != null && interviewers.matches(spk) |
|
846 | 866 |
spk = spk.replaceAll('^([^0-9]*)([0-9]+)$', '$1 $2'); |
847 | 867 |
if (overlapping) { |
848 | 868 |
writer.writeCharacters("// ") |
... | ... | |
876 | 896 |
} |
877 | 897 |
} |
878 | 898 |
|
879 |
// private String formatTime(float time, boolean doshort) |
|
880 |
// { |
|
881 |
// String rez = " "; |
|
882 |
// // if(time >= 3600) // >= 1h |
|
883 |
// // { |
|
884 |
// float h = time / 3600; |
|
885 |
// time = time%3600; |
|
886 |
// float min = (time%3600) / 60; |
|
887 |
// int sec = (int)time%60; |
|
888 |
// |
|
889 |
// if(min < 10) |
|
890 |
// rez = ""+(int)h+":0"+(int)min;//+":"+time%60; |
|
891 |
// else |
|
892 |
// rez = ""+(int)h+":"+(int)min;//+":"+time%60; |
|
893 |
// //if (!doshort) |
|
894 |
// if (sec > 9) |
|
895 |
// rez += ":"+(int)time%60; |
|
896 |
// else |
|
897 |
// rez += ":0"+(int)time%60; |
|
898 |
// // } |
|
899 |
// // else if(time >= 60) // >= 1min |
|
900 |
// // { |
|
901 |
// // int min = time/60; |
|
902 |
// // if(min < 10) |
|
903 |
// // rez = "00:0"+min;//+":"+time%60; |
|
904 |
// // else |
|
905 |
// // rez = "00:"+min;//+":"+time%60; |
|
906 |
// // if(!doshort) |
|
907 |
// // rez += ":"+(int)time%60; |
|
908 |
// // } |
|
909 |
// // else // < 60 |
|
910 |
// // { |
|
911 |
// // if(time < 10) |
|
912 |
// // return " 0:0"+time; |
|
913 |
// // else |
|
914 |
// // return " 0:"+time; |
|
915 |
// // } |
|
916 |
// return rez; |
|
917 |
// } |
|
918 |
|
|
919 | 899 |
/** |
920 | 900 |
* Gets the page files. |
921 | 901 |
* |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2999) | ||
---|---|---|
225 | 225 |
ArrayList<Pair<String, String>> metas = metadatas.get(filename) |
226 | 226 |
//println "filename=$filename metas= $metas" |
227 | 227 |
for (Pair p : metas) { |
228 |
if (p.getFirst().startsWith("enq")) {
|
|
229 |
new RemoveSpeaker(infile, infile, p.getFirst())
|
|
228 |
if (p.getFirst().startsWith("out-of-text-to-edit-locutor")) {
|
|
229 |
new RemoveSpeaker(infile, infile, p.getSecond())
|
|
230 | 230 |
} |
231 | 231 |
} |
232 | 232 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/RemoveSpeaker.groovy (revision 2999) | ||
---|---|---|
31 | 31 |
import org.txm.scripts.importer.*; |
32 | 32 |
import org.txm.scripts.importer.graal.PersonalNamespaceContext |
33 | 33 |
import org.txm.utils.*; |
34 |
import org.txm.utils.xml.DomUtils |
|
34 | 35 |
import org.txm.metadatas.*; |
35 | 36 |
|
36 | 37 |
import java.io.File; |
... | ... | |
65 | 66 |
* @param outfile the outfile |
66 | 67 |
* @param id the id |
67 | 68 |
*/ |
68 |
public RemoveSpeaker(File transcriptionfile, File outfile, String id) |
|
69 |
{ |
|
69 |
public RemoveSpeaker(File transcriptionfile, File outfile, String idRegex) { |
|
70 | 70 |
System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl"); |
71 | 71 |
|
72 | 72 |
this.outfile = outfile; |
73 |
String xpathString = "//tei:u[@spk='"+id+"']";
|
|
73 |
// String xpathString = "//u";
|
|
74 | 74 |
//println "removing $xpathString in $transcriptionfile" |
75 |
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); |
|
76 |
//println "domFactory: $domFactory" |
|
77 |
domFactory.setNamespaceAware(true); // never forget this! |
|
78 |
domFactory.setXIncludeAware(true); |
|
79 |
DocumentBuilder builder = domFactory.newDocumentBuilder(); |
|
75 |
// DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
|
|
76 |
// //println "domFactory: $domFactory"
|
|
77 |
// domFactory.setNamespaceAware(true); // never forget this!
|
|
78 |
// domFactory.setXIncludeAware(true);
|
|
79 |
// DocumentBuilder builder = domFactory.newDocumentBuilder();
|
|
80 | 80 |
//println "builder $builder" |
81 |
doc = builder.parse(transcriptionfile); |
|
81 |
//doc = builder.parse(transcriptionfile);
|
|
82 | 82 |
//println "doc $doc" |
83 |
doc = DomUtils.load(transcriptionfile) |
|
84 |
//def xpath = XPathFactory.newInstance().newXPath() |
|
85 |
// xpath.setNamespaceContext(new PersonalNamespaceContext()); |
|
86 |
//def expr = xpath.compile(xpathString); |
|
87 |
def nodes = doc.getDocumentElement().getElementsByTagName("u") |
|
83 | 88 |
|
84 |
def xpath = XPathFactory.newInstance().newXPath() |
|
85 |
xpath.setNamespaceContext(new PersonalNamespaceContext()); |
|
86 |
def expr = xpath.compile(xpathString); |
|
87 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
|
89 |
def reg = /$idRegex/ |
|
88 | 90 |
|
89 | 91 |
for (def node : nodes) { |
90 |
//println "remove node "+node |
|
91 |
Element elem = (Element)node; |
|
92 |
elem.getParentNode().removeChild(node); |
|
92 |
|
|
93 |
//Element elem = (Element)node; |
|
94 |
|
|
95 |
String who = node.getAttribute("who") // [@who='"+idRegex+"'] |
|
96 |
if (reg.matches(who)) { |
|
97 |
node.getParentNode().removeChild(node); |
|
98 |
} |
|
93 | 99 |
} |
94 | 100 |
save() |
95 | 101 |
} |
... | ... | |
99 | 105 |
* |
100 | 106 |
* @return true, if successful |
101 | 107 |
*/ |
102 |
private boolean save() |
|
103 |
{ |
|
108 |
private boolean save() { |
|
104 | 109 |
try { |
105 | 110 |
// Création de la source DOM |
106 | 111 |
Source source = new DOMSource(doc); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/TranscriberTokenizer.groovy (revision 2999) | ||
---|---|---|
70 | 70 |
* @param infile the infile |
71 | 71 |
* @param outfile the outfile |
72 | 72 |
*/ |
73 |
public TranscriberTokenizer(File infile, File outfile, String lang) |
|
74 |
{ |
|
73 |
public TranscriberTokenizer(File infile, File outfile, String lang) { |
|
75 | 74 |
super(infile, outfile, lang) |
76 | 75 |
txtname = infile.getName(); |
77 | 76 |
int idx = txtname.lastIndexOf(".") |
... | ... | |
96 | 95 |
audio = "present" |
97 | 96 |
notation = s; |
98 | 97 |
event = ""; |
99 |
|
|
100 |
if (s.startsWith("\"")) { |
|
101 |
rapp = true; |
|
102 |
event += "#rapp1"; |
|
103 |
} else if(s.endsWith("\"")) { |
|
104 |
rapp = false; |
|
105 |
event += "#rapp2"; |
|
106 |
} |
|
107 | 98 |
|
99 |
//TODO does not work (eg ' "word" '). This step should be done after the tokenizer step is done |
|
100 |
// if (s.startsWith("\"") && s.endsWith("\"")) { |
|
101 |
// // not rapp1 or rapp2 |
|
102 |
// } else if (s.startsWith("\"")) { |
|
103 |
// rapp = true; |
|
104 |
// event += "#rapp1"; |
|
105 |
// } else if(s.endsWith("\"")) { |
|
106 |
// rapp = false; |
|
107 |
// event += "#rapp2"; |
|
108 |
// } |
|
109 |
|
|
108 | 110 |
//test events |
109 | 111 |
if (s.startsWith("^^")) { |
110 | 112 |
event += "#orth"; |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2999) | ||
---|---|---|
90 | 90 |
|
91 | 91 |
|
92 | 92 |
/** The interviewers. */ |
93 |
ArrayList<String> interviewers = [];
|
|
93 |
def interviewers = null
|
|
94 | 94 |
static HashSet<String> sectionAttrs; |
95 | 95 |
|
96 | 96 |
/** The anatypes. */ |
... | ... | |
481 | 481 |
break; |
482 | 482 |
case "w": |
483 | 483 |
// concat spk id and ref |
484 |
String isEnq = (interviewers.contains(u_name))?"*":"";
|
|
484 |
String isEnq = (interviewers != null && interviewers.matches(u_name))?"*":"";
|
|
485 | 485 |
String ref = (u_name+", "+formatedTime+""+isEnq) |
486 | 486 |
if (ADD_TEXTID_TO_REF) ref = textid+", "+ref |
487 | 487 |
vForm +="\t"+u_name+"\t"+ref |
... | ... | |
508 | 508 |
} |
509 | 509 |
} |
510 | 510 |
|
511 |
|
|
512 | 511 |
vForm = vForm.replaceAll("\n", "").replaceAll("&", "&").replaceAll("<", "<"); |
513 | 512 |
|
514 | 513 |
if (removeinterviewers) { |
515 |
if (!interviewers.contains(u_name))
|
|
514 |
if (!interviewers.matches(u_name))
|
|
516 | 515 |
output.write(vForm+"\t"+wordid+vAna+"\n"); |
517 | 516 |
} else { |
518 | 517 |
output.write(vForm+"\t"+wordid+vAna+"\n"); |
... | ... | |
660 | 659 |
|
661 | 660 |
for (int i = 0 ; i < parser.getAttributeCount() ; i ++) { |
662 | 661 |
list.add(new Pair(parser.getAttributeLocalName(i).replace("_","").toLowerCase(), parser.getAttributeValue(i))); |
663 |
if (parser.getAttributeLocalName(i).startsWith("enq"))
|
|
664 |
interviewers.add(parser.getAttributeValue(i));
|
|
662 |
if (parser.getAttributeLocalName(i).equals("out-of-text-to-edit-locutor"))
|
|
663 |
interviewers = /${parser.getAttributeValue(i)}/;
|
|
665 | 664 |
} |
666 | 665 |
return |
667 | 666 |
case "Topic": |
Formats disponibles : Unified diff