Révision 2692

tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 2692)
173 173
	/** The writer. */
174 174
	private BufferedWriter writer;
175 175
	
176
	
177
	
178
	
179 176
	@Parameter(key = CooccurrencePreferences.QUERY_FILTER)
180 177
	protected String pCooccurentQueryFilter = "[]"; //$NON-NLS-1$
181 178
	
......
255 252
		
256 253
		// clear data
257 254
		try {
255
			this.numberOfCooccurrents = -1;
258 256
			this.lines.clear();
259 257
			this.allsignaturesstr.clear();
260 258
			this.conclines.clear();
......
1117 1115
		int startsearchM2 = 0; // optimisation: m2 is ordered
1118 1116
		int startsearchM3 = 0; // optimisation: m3 is ordered
1119 1117
		// time = System.currentTimeMillis();
1118
		
1119
		HashMap<Integer, Integer> positionsDistances = new HashMap<>();
1120
		
1120 1121
		for (Match m : m1) { // for each match = for each focus
1121 1122
			
1122 1123
			if (m.getTarget() >= 0) { // if target is set focus on target position
......
1165 1166
			// System.out.println("positions");
1166 1167
			// System.out.println("start: "+(start)+" end:"+n.getEnd());
1167 1168
			for (int position = start; position <= n.getEnd(); position++) {
1168
				// creates the list of positions, anticontext not yet removed
1169
				positions[noOcc++] = position;
1170
				// System.out.print(" "+position);
1171
			}
1172
			
1173
			
1174
			
1175
			// find shortest distance for each positions
1176
			HashMap<Integer, Integer> positionsDistances = new HashMap<>();
1177
			for (int position : positions) { // cooccurrent words positions
1178 1169
				
1179 1170
				if (o.getStart() <= position && position <= o.getEnd()) {
1180 1171
					// ignore positions in the anticontext positions
......
1182 1173
				}
1183 1174
				
1184 1175
				int dist;
1185
				if (position <= m.getStart()) {
1186
					dist = m.getStart() - position;
1176
				if (position < m.getStart()) {
1177
					dist = m.getStart() - position - 1;
1187 1178
				}
1188
				else if (m.getEnd() <= position) {
1189
					dist = position - m.getEnd();
1179
				else if (m.getEnd() < position) {
1180
					dist = position - m.getEnd() - 1;
1190 1181
				}
1191 1182
				else { // the n match is in the m match !?
1192
					System.out.println("Warning: the  n match is in the m match ? " + n + " " + m);
1183
					System.out.println("Warning: the n match is in the m match ? " + n + " " + m);
1193 1184
					dist = 0;
1194 1185
				}
1195 1186
				if (!positionsDistances.containsKey(position) || positionsDistances.get(position) > dist) {
......
1197 1188
				}
1198 1189
			}
1199 1190
			
1200
			// store and count distances for each position signature
1201
			noOcc = 0;
1202
			for (int position : positionsDistances.keySet()) { // cooccurrent words positions
1203
				// String signature = allsignatures.get(position);
1204
				String signaturestr = allsignaturesstr.get(position);
1205
				
1206
				if (!distances.containsKey(signaturestr)) {
1207
					distances.put(signaturestr, 0.0);
1208
				}
1209
				
1210
				if (!counts.containsKey(signaturestr)) {
1211
					counts.put(signaturestr, 0);
1212
				}
1213
				
1214
				int dist = positionsDistances.get(position);
1191
			// System.out.println("nb Occ ignored: "+ignore);
1192
			// System.out.println("nb Occ chevauche: "+chevauche);
1193
		}
1194
		
1195
		// store and count distances for each position signature
1196
		int noOcc = 0;
1197
		for (int position : positionsDistances.keySet()) { // cooccurrent words positions
1198
			// String signature = allsignatures.get(position);
1199
			String signaturestr = allsignaturesstr.get(position);
1200
			
1201
			int dist = positionsDistances.get(position);
1202
			if (distances.containsKey(signaturestr)) {
1215 1203
				distances.put(signaturestr, (distances.get(signaturestr)) + dist);
1204
			}
1205
			else {
1206
				distances.put(signaturestr, 0.0);
1207
			}
1208
			
1209
			if (counts.containsKey(signaturestr)) {
1216 1210
				counts.put(signaturestr, (counts.get(signaturestr)) + 1);
1217
				// }
1218
				
1219
				noOcc++;
1220 1211
			}
1212
			else {
1213
				counts.put(signaturestr, 1);
1214
			}
1221 1215
			
1222
			// System.out.println("nb Occ ignored: "+ignore);
1223
			// System.out.println("nb Occ chevauche: "+chevauche);
1216
			if ("[1599]".equals(signaturestr)) {
1217
				System.out.println("p=" + position + " d=" + dist + " total(d)=" + distances.get(signaturestr) + " c=" + counts.get(signaturestr));
1218
			}
1219
			// }
1220
			
1221
			noOcc++;
1224 1222
		}
1225 1223
		// System.out.println("T counts : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
1226 1224
		
......
1327 1325
				CLine cline = new CLine(this, specifrownames[ii], props,
1328 1326
						counts.get(signaturestr), // cofreq
1329 1327
						indexfreqs.get(specifrownames[ii]), scores[ii][1], // freq
1330
						((float) (distances.get(signaturestr) / counts.get(signaturestr))) - 1.0f, // mean distance
1328
						((float) (distances.get(signaturestr) / counts.get(signaturestr))), // mean distance
1331 1329
						-1);
1332 1330
				
1333 1331
				// select the line

Formats disponibles : Unified diff