Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / functions / mesures / Proportion.groovy @ 187

History | View | Annotate | Download (6.1 kB)

1
package org.txm.functions.mesures;
2

    
3
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.HashMap;
6
import java.util.List;
7

    
8
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
9
import org.txm.searchengine.cqp.corpus.Corpus;
10
import org.txm.searchengine.cqp.corpus.QueryResult;
11
import org.txm.searchengine.cqp.corpus.query.Match;
12
import org.txm.searchengine.cqp.corpus.query.Query;
13

    
14
public class Proportion extends Mesure {
15

    
16
        private static final long serialVersionUID = -3160461437451974147L;
17

    
18
        public static List<Synthese> SUPORTED = Arrays.asList(Synthese.COUNTMATCHES, 
19
                        Synthese.SUM);
20

    
21
        protected String all;
22
        protected List<String> cas;
23

    
24
        public Proportion(String domaine, String unit, def syntheses, String all, List<String> cas) {
25
                this.domaine = domaine;
26
                this.unit= unit;
27
                this.syntheses = syntheses;
28

    
29
                this.all = all;
30
                this.cas = cas;
31
        }
32

    
33
        @Override
34
        public boolean compute(ArrayList<Corpus> arg0) {
35
                this.corpora = arg0;
36
                //                switch (synthese) {
37
                //                case COUNTMATCHES:
38
                //                        return computeMatches();
39
                //                case SUM:
40
                //                        return computeSum();
41
                //                default:
42
                //                        System.out.println("UNSUPPORTED SYNTHESIS: "+synthese);
43
                //                        return false;
44
                //                }
45
                return computeSynthese();
46
        }
47

    
48
        HashMap<Synthese, ArrayList<Object[]>> rezsPerSynt = new HashMap<Synthese, ArrayList<Object[]>>();
49
        private boolean computeSynthese() {
50

    
51
                // initialize list of results 
52
                for (Synthese synt : syntheses) {
53
                        ArrayList<Object[]> rezs = new ArrayList<Object[]>();
54
                        for (String c : cas) {
55
                                rezs.add(new Number[corpora.size()]);
56
                        }
57
                        rezsPerSynt.put(synt, rezs);
58
                }
59

    
60
                for (int icorpus = 0 ; icorpus < corpora.size() ; icorpus++) {
61
                        computeForCorpus(icorpus, corpora.get(icorpus)); // one cell per cas
62
                        //                        Object[] r = compute(icorpus, corpora.get(icorpus)); // one cell per cas
63
                        //                        for( int i = 0 ; i < cas.size() ; i++)
64
                        //                                rezs.get(i)[icorpus] = r[i];
65
                }
66

    
67
                // register results
68
                for (Synthese synt : syntheses) {
69
                        ArrayList<Object[]> rezs = rezsPerSynt.get(synt);
70
                        for (int i = 0 ; i < cas.size() ; i++) {
71
                                Object[] rez = rezs.get(i);
72
                                this.add(new MesureResult(this, rez, "Proportion synt="+synt+" domaine="+domaine+" cas="+cas.get(i)+" all="+all+")"));
73
                        }
74
                }
75
                return true;
76
        }
77

    
78
        private Integer[] countMatchesInMatches(List<Match> domaine, List<Match> cas) {
79
                //                System.out.println("count dom.size "+domaine.size()+" cas.size "+cas.size());
80
                //                System.out.println(domaine);
81
                //                System.out.println(cas);
82

    
83
                Integer[] rez = new Integer[domaine.size()];
84
                int iCas = 0;
85
                int iDom = 0;
86
                rez[0] = 0;
87

    
88
                int firsterror = 0;
89
                while(iCas < cas.size() & iDom < domaine.size()) {
90
                        Match mDom = domaine.get(iDom);
91
                        Match mCas = cas.get(iCas);
92

    
93
                        if (mDom.getStart() <= mCas.getStart() & mCas.getEnd() <= mDom.getEnd()) {
94
                                rez[iDom] = rez[iDom] + 1; 
95
                                iCas++;
96
                        } else if ( mCas.getStart() < mDom.getStart()) {
97
                                iCas++;
98
                        } else if ( mDom.getEnd() < mCas.getEnd()) {
99
                                iDom++;
100
                                rez[iDom] = 0;
101
                        } else {
102
                                if (firsterror++ == 0)
103
                                        System.out.println("ERROR MATCH COUNT: dom="+domaine+" cas="+cas);
104
                                System.out.println("ERROR idom="+iDom+" icas="+iCas);
105
                        }
106
                }
107
                //                for(int i = 0 ; i < rez.length ; i++) {
108
                //                        Match m = domaine.get(i);
109
                //                        while (currentMatchCas != null) {
110
                //                                if (m.contains(currentMatchCas)) {
111
                //                                        rez[i] = rez[i]+1;
112
                //                                } else {
113
                //                                        break; // next domaine match
114
                //                                }
115
                //                                // next cas match
116
                //                                if (iCas == cas.size()) break;
117
                //                                else currentMatchCas = cas.get(iCas++);
118
                //                        }
119
                //                }
120

    
121
                // complete counts
122
                for (int i = 0 ; i < rez.length ; i++)
123
                        if (rez[i] == null) rez[i] = 0;
124

    
125
                return rez;
126
        }
127

    
128
        private void computeForCorpus(int icorpus, Corpus corpus) {
129
                HashMap<Synthese, Number> allSyntheses = new HashMap<Synthese, Number>();
130
                HashMap<Synthese, Number[]> casSyntheses = new HashMap<Synthese, Number[]>();
131
                for (Synthese synt : syntheses) {
132
                        casSyntheses.put(synt, new Number[cas.size()]);
133
                }
134
                //Object[] rez = new Object[cas.size()];
135
                try {
136
                        Corpus domaineSub = corpus;
137
                        if (domaine != null && domaine.length() > 0)
138
                                domaineSub = corpus.createSubcorpus(new Query(domaine), "domaine");
139
                        else 
140
                                domaine = corpus.getName();
141
                        //System.out.println("domaineSub.name "+ domaineSub.getName());
142
                        //System.out.println("domaine "+ domaine);
143
                        List<Match> domaineMatches = domaineSub.getMatches();
144
                        //System.out.println("domaineMatches.size"+ domaineMatches.size());
145
                        // first process the all case
146

    
147
                        QueryResult allSub = domaineSub.query(new Query(all), "all", false);
148
                        Integer[] allCounts = countMatchesInMatches(domaineMatches, allSub.getMatches());
149
                        //System.out.println("allCounts.length "+allCounts.length+" count : "+Arrays.toString(allCounts));
150
                        builder = new SyntheseBuilder(allCounts);
151
                        for( Synthese synt : syntheses) {
152
                                Synthese s = synt;
153
                                if (s.equals(Synthese.NONE)) s = Synthese.SUM;
154
                                allSyntheses.put(synt, builder.doSynthese(s));
155
                        }
156

    
157
                        // then process each cas
158
                        for (int iCas = 0 ; iCas < cas.size() ; iCas++) {
159
                                String c = cas.get(iCas);
160
                                QueryResult result = domaineSub.query(new Query(c), "TMP", false);
161
                                Integer[] counts = countMatchesInMatches(domaineMatches, result.getMatches());
162

    
163
                                Float[] rapports = new Float[counts.length];
164
                                for (int i = 0 ; i < rapports.length ; i++) {
165
                                        if (allCounts[i] == 0) rapports[i] = 0.0f;
166
                                        else rapports[i] = (float)counts[i] / (float)allCounts[i];
167
                                }
168
                                
169
                                for (Synthese synt : syntheses) {
170
                                        
171
                                        if (synt.equals(Synthese.NONE)) {
172
                                                int allTotal = 0;
173
                                                for( int i : allCounts) allTotal += i;
174
                                                int casTotal = 0;
175
                                                for( int i : counts) casTotal += i;
176
                                                                
177
                                                ArrayList<Object[]> rez = rezsPerSynt.get(synt);
178
                                                rez.get(iCas)[icorpus] = (float)casTotal / (float)allTotal;
179
                                        } else {
180
                                                
181
                                                builder = new SyntheseBuilder(rapports);
182

    
183
                                                Synthese s = synt;
184
                                                if (s.equals(Synthese.NONE)) s = Synthese.SUM;
185

    
186
                                                ArrayList<Object[]> rez = rezsPerSynt.get(synt);
187
                                                rez.get(iCas)[icorpus] = builder.doSynthese(s);
188
                                        }
189
                                }
190
                        }
191

    
192
                        if (!domaineSub.equals(corpus)) { // cleaning
193
                                domaineSub.delete();
194
                        }
195
                } catch (CqiClientException e) {
196
                        // TODO Auto-generated catch block
197
                        e.printStackTrace();
198
                }
199
        }
200
}