Révision 2692
| tmp/org.txm.cooccurrence.core/src/org/txm/cooccurrence/core/functions/Cooccurrence.java (revision 2692) | ||
|---|---|---|
| 173 | 173 |
/** The writer. */ |
| 174 | 174 |
private BufferedWriter writer; |
| 175 | 175 |
|
| 176 |
|
|
| 177 |
|
|
| 178 |
|
|
| 179 | 176 |
@Parameter(key = CooccurrencePreferences.QUERY_FILTER) |
| 180 | 177 |
protected String pCooccurentQueryFilter = "[]"; //$NON-NLS-1$ |
| 181 | 178 |
|
| ... | ... | |
| 255 | 252 |
|
| 256 | 253 |
// clear data |
| 257 | 254 |
try {
|
| 255 |
this.numberOfCooccurrents = -1; |
|
| 258 | 256 |
this.lines.clear(); |
| 259 | 257 |
this.allsignaturesstr.clear(); |
| 260 | 258 |
this.conclines.clear(); |
| ... | ... | |
| 1117 | 1115 |
int startsearchM2 = 0; // optimisation: m2 is ordered |
| 1118 | 1116 |
int startsearchM3 = 0; // optimisation: m3 is ordered |
| 1119 | 1117 |
// time = System.currentTimeMillis(); |
| 1118 |
|
|
| 1119 |
HashMap<Integer, Integer> positionsDistances = new HashMap<>(); |
|
| 1120 |
|
|
| 1120 | 1121 |
for (Match m : m1) { // for each match = for each focus
|
| 1121 | 1122 |
|
| 1122 | 1123 |
if (m.getTarget() >= 0) { // if target is set focus on target position
|
| ... | ... | |
| 1165 | 1166 |
// System.out.println("positions");
|
| 1166 | 1167 |
// System.out.println("start: "+(start)+" end:"+n.getEnd());
|
| 1167 | 1168 |
for (int position = start; position <= n.getEnd(); position++) {
|
| 1168 |
// creates the list of positions, anticontext not yet removed |
|
| 1169 |
positions[noOcc++] = position; |
|
| 1170 |
// System.out.print(" "+position);
|
|
| 1171 |
} |
|
| 1172 |
|
|
| 1173 |
|
|
| 1174 |
|
|
| 1175 |
// find shortest distance for each positions |
|
| 1176 |
HashMap<Integer, Integer> positionsDistances = new HashMap<>(); |
|
| 1177 |
for (int position : positions) { // cooccurrent words positions
|
|
| 1178 | 1169 |
|
| 1179 | 1170 |
if (o.getStart() <= position && position <= o.getEnd()) {
|
| 1180 | 1171 |
// ignore positions in the anticontext positions |
| ... | ... | |
| 1182 | 1173 |
} |
| 1183 | 1174 |
|
| 1184 | 1175 |
int dist; |
| 1185 |
if (position <= m.getStart()) {
|
|
| 1186 |
dist = m.getStart() - position; |
|
| 1176 |
if (position < m.getStart()) {
|
|
| 1177 |
dist = m.getStart() - position - 1;
|
|
| 1187 | 1178 |
} |
| 1188 |
else if (m.getEnd() <= position) {
|
|
| 1189 |
dist = position - m.getEnd(); |
|
| 1179 |
else if (m.getEnd() < position) {
|
|
| 1180 |
dist = position - m.getEnd() - 1;
|
|
| 1190 | 1181 |
} |
| 1191 | 1182 |
else { // the n match is in the m match !?
|
| 1192 |
System.out.println("Warning: the n match is in the m match ? " + n + " " + m);
|
|
| 1183 |
System.out.println("Warning: the n match is in the m match ? " + n + " " + m);
|
|
| 1193 | 1184 |
dist = 0; |
| 1194 | 1185 |
} |
| 1195 | 1186 |
if (!positionsDistances.containsKey(position) || positionsDistances.get(position) > dist) {
|
| ... | ... | |
| 1197 | 1188 |
} |
| 1198 | 1189 |
} |
| 1199 | 1190 |
|
| 1200 |
// store and count distances for each position signature |
|
| 1201 |
noOcc = 0; |
|
| 1202 |
for (int position : positionsDistances.keySet()) { // cooccurrent words positions
|
|
| 1203 |
// String signature = allsignatures.get(position); |
|
| 1204 |
String signaturestr = allsignaturesstr.get(position); |
|
| 1205 |
|
|
| 1206 |
if (!distances.containsKey(signaturestr)) {
|
|
| 1207 |
distances.put(signaturestr, 0.0); |
|
| 1208 |
} |
|
| 1209 |
|
|
| 1210 |
if (!counts.containsKey(signaturestr)) {
|
|
| 1211 |
counts.put(signaturestr, 0); |
|
| 1212 |
} |
|
| 1213 |
|
|
| 1214 |
int dist = positionsDistances.get(position); |
|
| 1191 |
// System.out.println("nb Occ ignored: "+ignore);
|
|
| 1192 |
// System.out.println("nb Occ chevauche: "+chevauche);
|
|
| 1193 |
} |
|
| 1194 |
|
|
| 1195 |
// store and count distances for each position signature |
|
| 1196 |
int noOcc = 0; |
|
| 1197 |
for (int position : positionsDistances.keySet()) { // cooccurrent words positions
|
|
| 1198 |
// String signature = allsignatures.get(position); |
|
| 1199 |
String signaturestr = allsignaturesstr.get(position); |
|
| 1200 |
|
|
| 1201 |
int dist = positionsDistances.get(position); |
|
| 1202 |
if (distances.containsKey(signaturestr)) {
|
|
| 1215 | 1203 |
distances.put(signaturestr, (distances.get(signaturestr)) + dist); |
| 1204 |
} |
|
| 1205 |
else {
|
|
| 1206 |
distances.put(signaturestr, 0.0); |
|
| 1207 |
} |
|
| 1208 |
|
|
| 1209 |
if (counts.containsKey(signaturestr)) {
|
|
| 1216 | 1210 |
counts.put(signaturestr, (counts.get(signaturestr)) + 1); |
| 1217 |
// } |
|
| 1218 |
|
|
| 1219 |
noOcc++; |
|
| 1220 | 1211 |
} |
| 1212 |
else {
|
|
| 1213 |
counts.put(signaturestr, 1); |
|
| 1214 |
} |
|
| 1221 | 1215 |
|
| 1222 |
// System.out.println("nb Occ ignored: "+ignore);
|
|
| 1223 |
// System.out.println("nb Occ chevauche: "+chevauche);
|
|
| 1216 |
if ("[1599]".equals(signaturestr)) {
|
|
| 1217 |
System.out.println("p=" + position + " d=" + dist + " total(d)=" + distances.get(signaturestr) + " c=" + counts.get(signaturestr));
|
|
| 1218 |
} |
|
| 1219 |
// } |
|
| 1220 |
|
|
| 1221 |
noOcc++; |
|
| 1224 | 1222 |
} |
| 1225 | 1223 |
// System.out.println("T counts : "+(System.currentTimeMillis()- time)); //$NON-NLS-1$
|
| 1226 | 1224 |
|
| ... | ... | |
| 1327 | 1325 |
CLine cline = new CLine(this, specifrownames[ii], props, |
| 1328 | 1326 |
counts.get(signaturestr), // cofreq |
| 1329 | 1327 |
indexfreqs.get(specifrownames[ii]), scores[ii][1], // freq |
| 1330 |
((float) (distances.get(signaturestr) / counts.get(signaturestr))) - 1.0f, // mean distance
|
|
| 1328 |
((float) (distances.get(signaturestr) / counts.get(signaturestr))), // mean distance |
|
| 1331 | 1329 |
-1); |
| 1332 | 1330 |
|
| 1333 | 1331 |
// select the line |
Formats disponibles : Unified diff