Révision 4017
| TXM/trunk/bundles/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImport.groovy (revision 4017) | ||
|---|---|---|
| 60 | 60 |
return; |
| 61 | 61 |
} |
| 62 | 62 |
|
| 63 |
def formatSentences = "true" == UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.IMPORT_PRINT_NEWLINES_AFTER_SENTENCES, ""+UDPreferences.getInstance().getString(UDPreferences.IMPORT_PRINT_NEWLINES_AFTER_SENTENCES)) |
|
| 64 |
println "FORMATTING SENTENCES: "+formatSentences |
|
| 65 |
if (formatSentences) {
|
|
| 66 |
File conlluCorpusCSS = new File(this.binaryDirectory, "HTML/"+this.project.getName()+"/default/css/"+this.project.getName()+".css") |
|
| 67 |
conlluCorpusCSS.getParentFile().mkdirs() |
|
| 68 |
conlluCorpusCSS << """p[type="sentence"]::before {
|
|
| 69 |
content:var(--before-content); |
|
| 70 |
color:grey; |
|
| 71 |
} |
|
| 72 |
""" |
|
| 73 |
println "conlluCorpusCSS=$conlluCorpusCSS" |
|
| 74 |
} |
|
| 75 |
|
|
| 63 | 76 |
if (cutps.canBuildTSFiles()) {
|
| 64 | 77 |
|
| 65 | 78 |
println "Converting CoNLL-U files to TIGER-XML files..." |
| ... | ... | |
| 118 | 131 |
// write the modified driver file |
| 119 | 132 |
IOUtils.setText(driver, content, "UTF-8"); |
| 120 | 133 |
|
| 121 |
|
|
| 122 | 134 |
FixDriverFile.fixFeatureValues(driver, tigerxmlFiles) |
| 123 | 135 |
|
| 124 |
|
|
| 125 |
|
|
| 126 |
|
|
| 127 |
|
|
| 128 | 136 |
// build TIGER indexes |
| 129 | 137 |
if (isSuccessful) {
|
| 130 | 138 |
// read from the 'tiger-xml' and write to the 'tiger' directory |
| TXM/trunk/bundles/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImporter.groovy (revision 4017) | ||
|---|---|---|
| 119 | 119 |
lines.remove(i) |
| 120 | 120 |
i-- |
| 121 | 121 |
continue; /// next ! |
| 122 |
} else if (temp_multiwords.containsKey(split[0])) { // it's a syntactic word of an orthographic word -> add the orthographic form in the misc field
|
|
| 122 |
} else if (temp_multiwords.containsKey(split[0])) { // it's a syntactic word of an orthographic word
|
|
| 123 | 123 |
def split_ortho = temp_multiwords.remove(split[0]) |
| 124 | 124 |
|
| 125 | 125 |
if (split[9].length() > 0) split[9] += "|" |
| 126 | 126 |
split[9] += "multiword="+split_ortho[1] // the orthographic form |
| 127 |
|
|
| 128 | 127 |
} |
| 129 | 128 |
} else if (contractionsManagement == UDPreferences.SURFACE) {
|
| 130 | 129 |
if (split[0].contains("-")) {
|
| ... | ... | |
| 276 | 275 |
|
| 277 | 276 |
def depsPropertiesToProject = UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.IMPORT_DEPS_TO_PROJECT, UDPreferences.getInstance().getString(UDPreferences.IMPORT_DEPS_TO_PROJECT)).split(",") as Set
|
| 278 | 277 |
|
| 279 |
def printNewLines = "true" == UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.IMPORT_PRINT_NEWLINES_AFTER_SENTENCES, ""+UDPreferences.getInstance().getString(UDPreferences.IMPORT_PRINT_NEWLINES_AFTER_SENTENCES))
|
|
| 278 |
def formatSentences = "true" == UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.IMPORT_PRINT_NEWLINES_AFTER_SENTENCES, ""+UDPreferences.getInstance().getString(UDPreferences.IMPORT_PRINT_NEWLINES_AFTER_SENTENCES))
|
|
| 280 | 279 |
|
| 281 | 280 |
String contractionsManagement = UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.CONTRACTIONS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.CONTRACTIONS_MANAGEMENT)); |
| 282 | 281 |
|
| ... | ... | |
| 421 | 420 |
ImportCoNLLUAnnotations.buildPropertiesProjections(sentencehash, headPropertiesToProject, depsPropertiesToProject) |
| 422 | 421 |
} |
| 423 | 422 |
|
| 424 |
if (printNewLines) {
|
|
| 423 |
if (formatSentences) {
|
|
| 425 | 424 |
writer.writeStartElement("p")
|
| 426 | 425 |
writer.writeAttribute("type", "sentence")
|
| 426 |
writer.writeAttribute("style", "--before-content:'$sent_id';")
|
|
| 427 | 427 |
} |
| 428 | 428 |
|
| 429 | 429 |
for (def word : words) {
|
| 430 | 430 |
|
| 431 |
println "UD-ID="+word["id"] |
|
| 432 |
if (word["id"].contains("-")) {
|
|
| 433 |
writer.writeStartElement("seg")
|
|
| 434 |
writer.writeCharacters("******")
|
|
| 435 |
writer.writeEndElement() // span |
|
| 436 |
} |
|
| 437 |
|
|
| 431 | 438 |
String id = null |
| 432 | 439 |
wordCounter++ |
| 433 | 440 |
writer.writeStartElement ("w")
|
| ... | ... | |
| 454 | 461 |
writer.writeCharacters(" ")
|
| 455 | 462 |
} |
| 456 | 463 |
|
| 457 |
if (printNewLines) writer.writeEndElement()
|
|
| 464 |
if (formatSentences) writer.writeEndElement()
|
|
| 458 | 465 |
|
| 459 | 466 |
writer.writeCharacters("\n")
|
| 460 | 467 |
writer.writeEndElement() // s |
| TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/function/BratPrintTree.java (revision 4017) | ||
|---|---|---|
| 9 | 9 |
import org.txm.utils.io.IOUtils; |
| 10 | 10 |
|
| 11 | 11 |
public class BratPrintTree {
|
| 12 |
|
|
| 12 | 13 |
public static File print(File file, List<String> conll, String[] Tvalues, String[] NTvalues) {
|
| 13 | 14 |
|
| 14 | 15 |
try {
|
| TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/function/UDTreeSearch.java (revision 4017) | ||
|---|---|---|
| 12 | 12 |
import org.txm.conllu.core.preferences.UDPreferences; |
| 13 | 13 |
import org.txm.conllu.core.preferences.UDTreePreferences; |
| 14 | 14 |
import org.txm.conllu.core.search.UDSearchEngine; |
| 15 |
import org.txm.core.results.TXMParameters; |
|
| 16 | 15 |
import org.txm.libs.deptreeviz.UDDepTreeVizPrintTree; |
| 17 | 16 |
import org.txm.searchengine.core.Selection; |
| 18 | 17 |
import org.txm.searchengine.core.SimpleSelection; |
| ... | ... | |
| 49 | 48 |
super(parentNodePath, corpus); |
| 50 | 49 |
|
| 51 | 50 |
this.corpus = getParent(); |
| 52 |
|
|
| 53 |
|
|
| 54 | 51 |
} |
| 55 | 52 |
|
| 56 | 53 |
public UDTreeSearch(String parentNodePath) {
|
| TXM/trunk/bundles/org.txm.conllu.core/template/conllu.js (revision 4017) | ||
|---|---|---|
| 66 | 66 |
*/ |
| 67 | 67 |
Document.prototype.parse = function(input, logger, strict) {
|
| 68 | 68 |
// discard previous state, if any |
| 69 |
|
|
| 70 |
strict = false; |
|
| 71 |
|
|
| 69 | 72 |
this.reset(); |
| 70 | 73 |
|
| 71 | 74 |
if (logger !== undefined) {
|
Formats disponibles : Unified diff