Révision 4006

TXM/trunk/bundles/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImporter.groovy (revision 4006)
67 67
		files.sort()
68 68
		
69 69
		// Keep or not contractions
70
		String keepContractions =  UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.MULTIWORDS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.MULTIWORDS_MANAGEMENT));
70
		String keepContractions =  UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.CONTRACTIONS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.CONTRACTIONS_MANAGEMENT));
71 71
		
72
			println "Multiwords managment ($keepContractions) & add XmlId if necessary & remove empty nodes"
72
			println "Contractions managment ($keepContractions) & add XmlId if necessary & remove empty nodes"
73 73
			ConsoleProgressBar cpb_texts = new ConsoleProgressBar(files.size())
74 74
			for (File conlluFile : files) {
75 75
				cpb_texts.tick()
......
100 100
						
101 101
						if (keepContractions == UDPreferences.ALL) {
102 102
							// ok on fait rien
103
						} else if (keepContractions == UDPreferences.TOKENS) {
103
						} else if (keepContractions == UDPreferences.SYNTAX) {
104 104
							if (split[0].contains("-")) {
105 105
								//println "REMOVE - $split"
106 106
								lines.remove(i)
107 107
								i--
108 108
								continue; /// next !
109 109
							}
110
						} else if (keepContractions == UDPreferences.MULTIWORDS) {
110
						} else if (keepContractions == UDPreferences.SURFACE) {
111 111
							if (split[0].contains("-")) {
112 112
								int n1 = Integer.parseInt(split[0].substring(0,  split[0].indexOf("-")));
113
								split[0] = ""+n1
114
								
115
								
116 113
								int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-")));
117
								int n =  1 + n2 - n1
114
								int n =  n2 - n1
118 115
								
116
								split[0] = ""+n1
117
								
119 118
								def splits = []
120 119
								for (int j = 1 ; j <= n ;j++) {
121 120
									def tmp = lines[i+j].split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length);
......
135 134
								if (split[9].length() > 0) split[9] += "|"
136 135
								split[9] += "expand="+splits.collect(){it[1]}.join("_")
137 136
								
138
								//println "REMOVE non- $split"
139
								for (int j = 1 ; j <= n ;j++) {
140
									splits << lines[i+j].split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length);
137
								println "REMOVE non- $split"
138
								for (int j = 0 ; j <= n ;j++) {
141 139
									lines.remove(i+1)
142 140
								}
141
								println "splits=$splits"
143 142
							}
144 143
						}
145 144
						
TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/preferences/UDPreferences.java (revision 4006)
17 17
	
18 18
	public static String UDPREFIX = "ud_prefix";
19 19
	
20
	public static String MULTIWORDS_MANAGEMENT = "import_MULTIWORDS_MANAGEMENT";
20
	public static String CONTRACTIONS_MANAGEMENT = "import_contractions_MANAGEMENT";
21 21
	
22
	public static String MULTIWORDS = "multiwords";
22
	public static String SURFACE = "surface";
23 23
	
24
	public static String TOKENS = "tokens";
24
	public static String SYNTAX = "syntax";
25 25
	
26 26
	public static String ALL = "all";
27 27
	
......
54 54
		preferences.put(UDPREFIX, "ud-");
55 55
		preferences.put(IMPORT_HEAD_TO_PROJECT, "upos,deprel");
56 56
		preferences.put(IMPORT_DEPS_TO_PROJECT, "upos,deprel");
57
		preferences.put(MULTIWORDS_MANAGEMENT, TOKENS); // boths, multiwords, tokens
57
		preferences.put(CONTRACTIONS_MANAGEMENT, SURFACE); // surface, syntax, all
58 58
		preferences.putBoolean(IMPORT_USE_NEW_DOC_ID, true);
59 59
		preferences.putBoolean(IMPORT_BUILD_TIGERSEARCH_INDEXES, true);
60 60
	}

Formats disponibles : Unified diff