/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

     	 * @param tigerPosition
     	 * @return 1 if the TIGER position has a position in CQP ; 0 if not
     	 */
     	public int getPresence(int tigerPosition) {
     	public byte getPresence(int tigerPosition) {
     		if (presencesMapped != null) {
     			return presencesMapped.getInt(tigerPosition * Integer.BYTES);
     			return presencesMapped.get(tigerPosition);
+    		}
     		else {
     			return 0;
-...
     	 * @param tigerPositions
     	 * @return 1 if the TIGER position has a position in CQP ; 0 if not
     	 */
     	public int[] getPresences(int tigerPositions[]) {
     		int[] ret = new int[tigerPositions.length];
     	public byte[] getPresences(int tigerPositions[]) {
     		byte[] ret = new byte[tigerPositions.length];
     		if (presencesMapped != null) {
     			for (int i = 0; i < tigerPositions.length; i++) {
     				ret[i] = presencesMapped.getInt(tigerPositions[i] * Integer.BYTES);
     				ret[i] = presencesMapped.get(tigerPositions[i] * Integer.BYTES);
+    			}
+    		}

     import org.txm.utils.io.IOUtils;
     import org.txm.utils.logger.Log;
     import cern.colt.Arrays;
     import ims.tiger.corpus.Feature;
     import ims.tiger.corpus.Header;
     import ims.tiger.corpus.Sentence;
-...
     		int iPivot = variables.indexOf("pivot"); //$NON-NLS-1$
     		MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped();
     		MappedByteBuffer presenceMapped = tcorpus.getPresencesMapped();
     		// MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped();
     		ArrayList<String> warnings = new ArrayList<String>();
     		boolean useSubMatches = TigerSearchTreePreferences.getInstance().getBoolean(TigerSearchTreePreferences.USESUBMATCHES);
-...
     					if (iPivot != -1 && i != iPivot) continue; // skip match that are not 'pivot'
     					int left = sent_start + index.getLeftCorner(sent, match[i]);
     					if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
     						left += offsetsMapped.getInt(left * Integer.BYTES);
     						// System.out.println("left="+left+" offset="+offsetsMapped.getInt(left*Integer.BYTES));
+    					}
     					int right = sent_start + index.getRightCorner(sent, match[i]);
     					if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
     						right += offsetsMapped.getInt(right * Integer.BYTES);
+    					}
     					// System.out.println(" M="+match[i]+" ("+left+", "+right+")");
     					// test if the match position is also in the CQP positions
     					if (presenceMapped.get(left) > 0  && presenceMapped.get(right) > 0) {
     					TIGERMatch tigerMatch = new TIGERMatch(left, right);
     						if (offsetsMapped != null && presenceMapped != null) { // the TIGER token is not in the CQP corpus
     							if (presenceMapped.get(left) > 0) {
     								left += offsetsMapped.getInt(left * Integer.BYTES);
+    							}
     							// System.out.println("left="+left+" offset="+offsetsMapped.getInt(left*Integer.BYTES));
+    						}
     					// System.out.println(" ajusted="+(tigerMatch));
     					tigerMatchesList.add(tigerMatch);
     						if (offsetsMapped != null && presenceMapped != null) { // the TIGER token is not in the CQP corpus
     							if (presenceMapped.get(right) > 0) {
     								right += offsetsMapped.getInt(right * Integer.BYTES);
+    							}
+    						}
     						// System.out.println(" M="+match[i]+" ("+left+", "+right+")");
     					if (!useSubMatches) { // use only the first submatch
     						break;
     						TIGERMatch tigerMatch = new TIGERMatch(left, right);
     						// System.out.println(" ajusted="+(tigerMatch));
     						tigerMatchesList.add(tigerMatch);
     						if (!useSubMatches) { // use only the first submatch
     							break;
+    						}
     					} else {
     						warnings.add("<"+left+", "+right+">");
+    					}
+    				}
+    			}
+    		}
     		if (warnings.size() > 0) {
     			Log.warning("Some TIGER matches are not in the CQP corpus: "+StringUtils.join(warnings, ", "));
+    		}
     		// intersect with corpus matches
     		List<? extends Match> result2 = Match.intersect(corpus.getMatches(), new ArrayList<>(tigerMatchesList), true);
-...
     			int[] ids_idx = CQI.str2Id(corpus.getProperty("id").getQualifiedName(), ids); //$NON-NLS-1$
     			Integer[] cqpPositions = new Integer[sent_size];
     			Integer[] offsets = new Integer[sent_size];
     			boolean error = false;
     			for (int t = 0; t < sent_size; t++) {
     				if (ids_idx[t] >= 0) {
     					int[] positions = CQI.id2Cpos(corpus.getProperty("id").getQualifiedName(), ids_idx[t]); //$NON-NLS-1$
-...
+    				}
     				else { // word not in the CQP corpus
     					Log.warning("Could not find word for id=" + ids[t]);
     					cqpPositions[t] = null;
     					error = true;
+    				}
     				if (cqpPositions[t] != null) {
-...
     					offsets[t] = null;
+    				}
+    			}
     			if (error) {
     				Log.warning("	IDS      =" + " "+ids.length+" "+Arrays.toString(ids));
     				Log.warning("	IDS_IDX  =" + " "+ids_idx.length+" "+Arrays.toString(ids_idx));
     				Log.warning("	CQP      =" + " "+cqpPositions.length+" "+Arrays.toString(cqpPositions));
     				Log.warning("	TIGER    =" + " "+tigerPositions.length+" "+Arrays.toString(tigerPositions));
     				Log.warning("	OFFSET   =" + " "+offsets.length+" "+Arrays.toString(offsets));
+    			}
     			// System.out.println("ids="+Arrays.toString(ids));
     			// System.out.println("cqp indexes="+Arrays.toString(ids_idx));
     			// System.out.println("tiger positions="+Arrays.toString(tigerPositions));

     		files.sort()
     		println "Add XmlId if necessary & remove empty nodes"
     		String contractionsManagement =  UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.CONTRACTIONS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.CONTRACTIONS_MANAGEMENT));
     		ConsoleProgressBar cpb_texts = new ConsoleProgressBar(files.size())
     		for (File conlluFile : files) {
     			cpb_texts.tick()
-...
     						continue; // next !
+    					}
+    				}
     				def temp_multiwords = [:]
     				for (int i = 0 ; i < lines.size() ; i++) {
     					String line = lines[i]
     					if (line.length() == 0 || line.startsWith("#") || !line.contains("\t")) continue;
     					def split = line.split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length);
     					if (split[-1] != null && !split[-1].contains("XmlId=")) {
     					if (temp_multiwords.containsKey(split[0])) { // this word XMLid must be the same as its multiword id, see below
     						String id = temp_multiwords.remove(split[0]);
     						if (split[-1] == "_") {
     							split[-1] = "XmlId=w_"+textid+"_"+(wcounter++)
     							split[-1] = "XmlId="+id
     						} else {
     							split[-1] += "|XmlId=w_"+textid+"_"+(wcounter++)
     							split[-1] += "|XmlId="+id
+    						}
     					} else {
     						if (split[-1] != null && !split[-1].contains("XmlId=")) { // There is no XmlID -> create one and manage subwords
     							String id = "w_"+textid+"_"+(wcounter++);
     							if (split[-1] == "_") {
     								split[-1] = "XmlId="+id
     							} else {
     								split[-1] += "|XmlId="+id
+    							}
     							if (split[0].contains("-") && contractionsManagement == "surface") {
     								temp_multiwords = [:] // reset to avoid using another multiwords
     								String[] fromstart= split[0].split("-", 2)
     								int pfrom = Integer.parseInt(fromstart[0])
     								int pend = Integer.parseInt(fromstart[1])
     								for (int p = pfrom ; p <= pend ; p++) {
     									temp_multiwords.put(""+p, id)
+    								}
     								println temp_multiwords
+    							}
+    						}
+    					}
     					lines[i] = split.join("\t") // rebuild the line
-...
     		// Keep or not contractions
     		File conlluSrcForTXMDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu-fortxm")
     		String contractionsManagement =  UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.CONTRACTIONS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.CONTRACTIONS_MANAGEMENT));
     		if (contractionsManagement == UDPreferences.ALL) {
     			conlluSrcForTXMDirectory = conlluSrcDirectory; // use the same directory as TIGER since no word modifications have been done
     		} else {
     			conlluSrcForTXMDirectory.deleteDir()
     			conlluSrcForTXMDirectory.mkdirs()
     			println "Contractions managment mode is '$contractionsManagement'"
     			println "Contractions management mode is '$contractionsManagement'"
     			cpb_texts = new ConsoleProgressBar(files.size())
     			for (File conlluFile : files) {
     				cpb_texts.tick()
-...
     						if (line.length() == 0 || line.startsWith("#") || !line.contains("\t")) continue;
     						def split = line.split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length);
     						if (contractionsManagement == UDPreferences.SYNTAX) {
     							if (split[0].contains("-")) {
     								// stores the syntatic word id and the ortographic word properties
     								temp_multiwords = [:]
     								temp_multiwords = [:] // reset to avoid using another multiwords
     								int n1 = Integer.parseInt(split[0].substring(0,  split[0].indexOf("-")));
     								int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-")));
     								for (int ii = n1 ; ii <= n2 ; ii++) {

TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/function/BratPrintTree.java (revision 4025)
30	30	int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-")));
31	31	int n = n2 - n1;
32	32
33		//System.out.println("Word "+Arrays.toString(split));
34		//System.out.println("lines to insert: "+n);
35	33	if ( !(splittedLines.get(i+1)[0].equals(""+n1)) \|\| !(splittedLines.get(i+n+1)[0].equals(""+n2)) ) {
36	34
37	35	ArrayList<String[]> newlines = new ArrayList<>();
...	...
73	71	} else {
74	72	//System.out.println("NOT FIXING "+conll.get(i));
75	73	}
76
77
78	74	}
79	75	}
80	76

     Bundle-SymbolicName: org.txm.treetagger.core;singleton:=true
     Bundle-Version: 1.0.0.qualifier
     Bundle-Name: TreeTagger Core
     Require-Bundle: org.txm.nlp.core;bundle-version="1.0.0",
      org.txm.core
     Require-Bundle: org.txm.core,
      org.txm.nlp.core;bundle-version="1.0.0"
     Bundle-ActivationPolicy: lazy
     Bundle-ManifestVersion: 2
     Bundle-RequiredExecutionEnvironment: JavaSE-16

     <?eclipse version="3.4"?>
     <plugin>
        <extension
              point="org.txm.annotation.core.AnnotationEngine">
              point="org.txm.nlp.core.NLPEngine">
           <AnnotationEngine
                 class="org.txm.udpipe.core.UDPipeEngine"
                 description="UDPipe wrapper">

Laboratoire ICAR » Plateforme TXM

Révision 4025