Révision 1217
tmp/org.txm.core/buildJavadoc.xml (revision 1217) | ||
---|---|---|
24 | 24 |
--> |
25 | 25 |
<groovydoc |
26 | 26 |
destdir="javadoc/" |
27 |
sourcepath="../org.txm.core/src/java:../org.txm.statsengine.r.core/src:../org.txm.core.tests/src:../org.txm.cql2lsa.rcp/src:../org.txm.oriflamms.rcp/src:../org.txm.dictionary.rcp/src:../org.txm.para.core/src:../org.txm.groovy.core/src:../org.txm.para.rcp/src:../org.txm.groovy.rcp/src:../org.txm.partition.core/src:../org.txm.statsengine.r.rcp/src:../org.txm.imports.metopes/src:../org.txm.partition.rcp/src:../org.txm.svn.rcp/src:../org.txm.analec.rcp/src:../org.txm.index.core/src:../org.txm.practically.rcp/src:../org.txm.annotation.core/src:../org.txm.index.rcp/src:../org.txm.progression.core/src:../org.txm.edition.rcp/src:../org.txm.annotation.rcp/src:../org.txm.internalview.core/src:../org.txm.progression.rcp/src:../org.txm.textsbalance.core/src:../org.txm.backtomedia.rcp/src:../org.txm.internalview.rcp/src:../org.txm.querycooccurrences.rcp/src:../org.txm.ca.core/src:../org.txm.jodconverter.core/src:../org.txm.queryindex.rcp/src:../org.txm.textsbalance.rcp/src:../org.txm.ahc.core/src:../org.txm.lexicaltable.core/src:../org.txm.r/src:../org.txm.tigersearch.rcp/src:../org.txm.cah.rcp/src:../org.txm.lexicaltable.rcp/src:../org.txm.rcp/src/main/java:../org.txm.tmp.rcp/src:../org.txm.ca.rcp/src:../org.txm.lexicon.core/src:../org.txm.rcp.about.fragment/src:../org.txm.treetaggerinstaller.rcp/src:../org.txm.chartsengine.core/src:../org.txm.lexicon.rcp/src:../org.txm.treetagger.rcp/src:../org.txm.chartsengine.jfreechart.core/src:../org.txm.rcp.p2.ui/src:../org.txm.utils/src:../org.txm.chartsengine.jfreechart.rcp/src:../org.txm.referencer.core/src:../org.txm.wordcloud.core/src:../org.txm.chartsengine.raster.rcp/src:../org.txm.referencer.rcp/src:../org.txm.chartsengine.r.core/src:../org.txm.searchengine.core/src:../org.txm.wordcloud.rcp/src:../org.txm.chartsengine.rcp/src:../org.txm.searchengine.cqp.core/src:../org.txm.xmleditor.rcp/src:../org.txm.chartsengine.r.rcp/src:../org.txm.chartsengine.svgbatik.rcp/src:../org.txm.searchengine.cqp.rcp/src:../org.txm.concordance.core/src:../org.txm.setups.startdialog/src:../org.txm.concordance.rcp/src:../org.txm.specificities.core/src:../org.txm.cooccurrence.core/src:../org.txm.specificities.rcp/src:../org.txm.cooccurrence.rcp/src:../org.txm.statsengine.core"
|
|
27 |
sourcepath="../org.txm.core/src/java:../org.txm.statsengine.r.core/src:../org.txm.core.tests/src:../org.txm.cql2lsa.rcp/src:../org.txm.oriflamms.rcp/src:../org.txm.dictionary.rcp/src:../org.txm.para.core/src:../org.txm.groovy.core/src:../org.txm.para.rcp/src:../org.txm.groovy.rcp/src:../org.txm.partition.core/src:../org.txm.statsengine.r.rcp/src:../org.txm.imports.metopes/src:../org.txm.partition.rcp/src:../org.txm.svn.rcp/src:../org.txm.annotation.urs.rcp/src:../org.txm.index.core/src:../org.txm.practically.rcp/src:../org.txm.annotation.core/src:../org.txm.index.rcp/src:../org.txm.progression.core/src:../org.txm.edition.rcp/src:../org.txm.annotation.rcp/src:../org.txm.internalview.core/src:../org.txm.progression.rcp/src:../org.txm.textsbalance.core/src:../org.txm.backtomedia.rcp/src:../org.txm.internalview.rcp/src:../org.txm.querycooccurrences.rcp/src:../org.txm.ca.core/src:../org.txm.jodconverter.core/src:../org.txm.queryindex.rcp/src:../org.txm.textsbalance.rcp/src:../org.txm.ahc.core/src:../org.txm.lexicaltable.core/src:../org.txm.r/src:../org.txm.tigersearch.rcp/src:../org.txm.cah.rcp/src:../org.txm.lexicaltable.rcp/src:../org.txm.rcp/src/main/java:../org.txm.tmp.rcp/src:../org.txm.ca.rcp/src:../org.txm.lexicon.core/src:../org.txm.rcp.about.fragment/src:../org.txm.treetaggerinstaller.rcp/src:../org.txm.chartsengine.core/src:../org.txm.lexicon.rcp/src:../org.txm.treetagger.rcp/src:../org.txm.chartsengine.jfreechart.core/src:../org.txm.rcp.p2.ui/src:../org.txm.utils/src:../org.txm.chartsengine.jfreechart.rcp/src:../org.txm.referencer.core/src:../org.txm.wordcloud.core/src:../org.txm.chartsengine.raster.rcp/src:../org.txm.referencer.rcp/src:../org.txm.chartsengine.r.core/src:../org.txm.searchengine.core/src:../org.txm.wordcloud.rcp/src:../org.txm.chartsengine.rcp/src:../org.txm.searchengine.cqp.core/src:../org.txm.xmleditor.rcp/src:../org.txm.chartsengine.r.rcp/src:../org.txm.chartsengine.svgbatik.rcp/src:../org.txm.searchengine.cqp.rcp/src:../org.txm.concordance.core/src:../org.txm.setups.startdialog/src:../org.txm.concordance.rcp/src:../org.txm.specificities.core/src:../org.txm.cooccurrence.core/src:../org.txm.specificities.rcp/src:../org.txm.cooccurrence.rcp/src:../org.txm.statsengine.core"
|
|
28 | 28 |
packagenames="org.txm.*" |
29 | 29 |
|
30 | 30 |
use="true" |
tmp/org.txm.core/src/java/org/txm/importer/StaxIdentityParser.java (revision 1217) | ||
---|---|---|
46 | 46 |
this.factory = XMLInputFactory.newInstance(); |
47 | 47 |
this.parser = factory.createXMLStreamReader(inputData); |
48 | 48 |
} |
49 |
|
|
50 |
/** |
|
51 |
* Helper method to get an attribute value |
|
52 |
* |
|
53 |
* @param name the attribute name |
|
54 |
* @return the value if any |
|
55 |
*/ |
|
56 |
public String getParserAttributeValue(String name) { |
|
57 |
if (name == null) return null; |
|
58 |
|
|
59 |
int c = parser.getAttributeCount(); |
|
60 |
for (int i = 0 ; i < c ; i++) { |
|
61 |
if (name.equals(parser.getAttributeLocalName(i))) { |
|
62 |
return parser.getAttributeValue(i); |
|
63 |
} |
|
64 |
} |
|
65 |
|
|
66 |
return null; |
|
67 |
} |
|
49 | 68 |
|
50 | 69 |
protected void before() { |
51 | 70 |
|
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationSyMoGIHWriter.java (revision 1217) | ||
---|---|---|
184 | 184 |
} catch (XMLStreamException e) { |
185 | 185 |
e.printStackTrace(); |
186 | 186 |
} |
187 |
|
|
188 | 187 |
} |
189 | 188 |
|
190 | 189 |
/** |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationWriter.java (revision 1217) | ||
---|---|---|
3 | 3 |
import java.io.File; |
4 | 4 |
import java.io.IOException; |
5 | 5 |
import java.util.ArrayList; |
6 |
import java.util.Arrays; |
|
6 | 7 |
import java.util.HashMap; |
7 | 8 |
import java.util.List; |
8 | 9 |
import java.util.logging.Level; |
... | ... | |
88 | 89 |
*/ |
89 | 90 |
public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{ |
90 | 91 |
|
91 |
List<String> textsIds = corpus.getProject().getTextsID();
|
|
92 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
92 | 93 |
System.out.println("Exporting annotations of "+StringUtils.join(textsIds, ", ")+"."); |
93 | 94 |
|
94 | 95 |
File resultDirectory = new File(Toolbox.getTxmHomePath(), "results/"+corpus.getID()+"_annotations"); |
... | ... | |
140 | 141 |
|
141 | 142 |
int[] end_limits = corpus.getTextEndLimits(); |
142 | 143 |
int[] start_limits = corpus.getTextStartLimits(); |
143 |
List<String> textsIds = corpus.getProject().getTextsID();
|
|
144 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
144 | 145 |
|
145 | 146 |
File inputDirectory = corpus.getProjectDirectory(); |
146 | 147 |
File txmDirectory = new File(inputDirectory, "txm/"+corpus.getID()); |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationInjector.java (revision 1217) | ||
---|---|---|
8 | 8 |
import java.util.Comparator; |
9 | 9 |
import java.util.Date; |
10 | 10 |
import java.util.HashMap; |
11 |
import java.util.LinkedHashMap; |
|
11 | 12 |
import java.util.List; |
12 | 13 |
|
13 | 14 |
import javax.xml.stream.XMLInputFactory; |
... | ... | |
34 | 35 |
HashMap<Integer, List<Annotation>> annotationsToAddByEndPos; |
35 | 36 |
List<Annotation> currentStartAnnotations; |
36 | 37 |
List<Annotation> currentEndAnnotations; |
37 |
HashMap<String, Annotation> currentTokenAnnotations = new HashMap<String, Annotation>();
|
|
38 |
LinkedHashMap<String, Annotation> currentTokenAnnotations = new LinkedHashMap<String, Annotation>();
|
|
38 | 39 |
|
39 | 40 |
int n = 0; |
40 | 41 |
boolean debug = false; |
tmp/org.txm.setups/shared/debian/DEBIAN/control (revision 1217) | ||
---|---|---|
1 |
Package: txm |
|
1 |
Package: txm-TXMVERSION
|
|
2 | 2 |
Version: TXMVERSION |
3 | 3 |
Section: base |
4 | 4 |
Priority: optional |
tmp/org.txm.setups/shared/debian/usr/share/lintian/overrides/txm (revision 1217) | ||
---|---|---|
1 |
txm-TXMVERSION binary: arch-independent-package-contains-binary-or-object |
|
1 |
overtxm-TXMVERSION binary: arch-independent-package-contains-binary-or-object
|
|
2 | 2 |
txm-TXMVERSION binary: unstripped-binary-or-object |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/corpuswizard/ImportWizard.java (revision 1217) | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.File; |
4 | 4 |
import java.util.Date; |
5 |
import java.util.Locale; |
|
5 | 6 |
|
6 | 7 |
import org.eclipse.core.resources.IFolder; |
7 | 8 |
import org.eclipse.core.resources.IProject; |
... | ... | |
69 | 70 |
project = new Project(Toolbox.workspace, name); |
70 | 71 |
project.setSourceDirectory(path.getAbsolutePath()); |
71 | 72 |
project.setDescription(page1.getDescription()); |
73 |
project.setLang(Locale.getDefault().getCountry()); |
|
72 | 74 |
} else { |
73 | 75 |
System.out.println("Using parameters from already imported corpus: "+project.getCorpusBuild(project.getName())); |
74 | 76 |
} |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/handlers/scripts/ExecuteGroovyScript.java (revision 1217) | ||
---|---|---|
243 | 243 |
System.out.println(relativepath); |
244 | 244 |
gse.run(relativepath, binding); |
245 | 245 |
|
246 |
System.out.println(TXMUIMessages.ExecuteScript_12+(System.currentTimeMillis()-time)+TXMUIMessages.ExecuteGroovyScript_0);
|
|
246 |
System.out.println(TXMUIMessages.bind(TXMUIMessages.ExecuteScript_12, System.currentTimeMillis()-time));
|
|
247 | 247 |
} catch (ThreadDeath td) { |
248 | 248 |
return Status.CANCEL_STATUS; |
249 | 249 |
} catch (UIParameterException e) { |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/messages/messages_fr.properties (revision 1217) | ||
---|---|---|
271 | 271 |
ExecuteRScript_13 = \ ms |
272 | 272 |
|
273 | 273 |
ExecuteScriptImport_0 = Début de l'importation |
274 |
ExecuteScriptImport_1 = ** Le fichier de script {0} n''existe pas |
|
274 |
ExecuteScriptImport_1 = ** Le fichier de script {0} n''existe pas
|
|
275 | 275 |
ExecuteScriptImport_2 = ** Erreur: le dossier 'corpora' n'existe pas : |
276 | 276 |
ExecuteScriptImport_3 = Erreur: le dossier {0} n''a pu être supprimé et existe toujours. L''import est interrompu. Vous pouvez supprimer manuellement le dossier et recommencer l''import |
277 | 277 |
ExecuteScriptImport_5 = ** Erreur de paramètres du Tokenizer : abandon de l'import. |
... | ... | |
280 | 280 |
ExecuteScriptImport_8 = ** Erreur lors du chargement du corpus |
281 | 281 |
|
282 | 282 |
ExecuteScript_0 = Exécution de {0} ... |
283 |
ExecuteScript_1 = Sauver le fichier avant d'exécuter ? |
|
283 |
ExecuteScript_1 = Sauver le fichier avant d'exécuter ? |
|
284 |
ExecuteScript_11 = Exécution Groovy... |
|
285 |
ExecuteScript_12 = Terminé en {0} ms. |
|
284 | 286 |
ExecuteScript_2 = Sauver et lancer |
285 | 287 |
ExecuteScript_3 = ** Aucun interpréteur trouvé pour l'extension du fichier de script |
286 | 288 |
ExecuteScript_4 = Annuler |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/messages/messages.properties (revision 1217) | ||
---|---|---|
316 | 316 |
ExecuteScriptImport_8 = ** Error while loading corpus |
317 | 317 |
|
318 | 318 |
ExecuteScript_0 = Execution of {0} |
319 |
ExecuteScript_1 = Save file before execution ? |
|
319 |
ExecuteScript_1 = Save file before execution ? |
|
320 |
ExecuteScript_11 = Executing Groovy script... |
|
321 |
ExecuteScript_12 = Done {0} ms. |
|
320 | 322 |
ExecuteScript_2 = Save and run |
321 | 323 |
ExecuteScript_3 = ** No interpreter found for script file extension |
322 | 324 |
ExecuteScript_4 = Abort |
tmp/org.txm.cql2lsa.rcp/src/org/txm/rcp/commands/function/ComputeExpII.java (revision 1217) | ||
---|---|---|
45 | 45 |
import org.txm.rcp.swt.dialog.LastOpened; |
46 | 46 |
import org.txm.rcp.utils.JobHandler; |
47 | 47 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
48 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
48 | 49 |
import org.txm.utils.logger.Log; |
49 | 50 |
|
50 | 51 |
/** |
... | ... | |
70 | 71 |
Object s = selection.getFirstElement(); |
71 | 72 |
if (!(s instanceof CQPCorpus)) return null; |
72 | 73 |
|
73 |
final CQPCorpus corpus = (CQPCorpus) s;
|
|
74 |
final MainCorpus corpus = (MainCorpus) s;
|
|
74 | 75 |
System.out.println("Select lemma queries property file"); |
75 | 76 |
final File queriesFiles = getPropFile(event); |
76 | 77 |
if (queriesFiles == null) return Status.CANCEL_STATUS; |
tmp/org.txm.cql2lsa.rcp/src/org/txm/rcp/commands/function/ComputeExpI.java (revision 1217) | ||
---|---|---|
54 | 54 |
import org.txm.rcp.swt.dialog.LastOpened; |
55 | 55 |
import org.txm.rcp.utils.JobHandler; |
56 | 56 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
57 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
57 | 58 |
import org.txm.utils.logger.Log; |
58 | 59 |
// TODO: Auto-generated Javadoc |
59 | 60 |
/** |
... | ... | |
77 | 78 |
selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
78 | 79 |
|
79 | 80 |
Object s = selection.getFirstElement(); |
80 |
if (s instanceof CQPCorpus) {
|
|
81 |
CQPCorpus corpus = (CQPCorpus) s;
|
|
81 |
if (s instanceof MainCorpus) {
|
|
82 |
MainCorpus corpus = (MainCorpus) s;
|
|
82 | 83 |
try { |
83 | 84 |
File propFile = null; |
84 | 85 |
if (LastOpened.getFile(ID) != null) |
... | ... | |
105 | 106 |
return null; |
106 | 107 |
} |
107 | 108 |
|
108 |
public static void compute(final CQPCorpus corpus, final File propFile) {
|
|
109 |
public static void compute(final MainCorpus corpus, final File propFile) {
|
|
109 | 110 |
final String title = "Compute QueryIndexOfText with "+corpus +" corpus properties file: "+propFile; |
110 | 111 |
JobHandler jobhandler = new JobHandler(title) { |
111 | 112 |
@Override |
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpI.java (revision 1217) | ||
---|---|---|
8 | 8 |
import java.io.InputStreamReader; |
9 | 9 |
import java.io.OutputStreamWriter; |
10 | 10 |
import java.util.ArrayList; |
11 |
import java.util.Arrays; |
|
11 | 12 |
import java.util.Collection; |
12 | 13 |
import java.util.Collections; |
13 | 14 |
import java.util.Comparator; |
... | ... | |
29 | 30 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
30 | 31 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
31 | 32 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
33 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
32 | 34 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
33 | 35 |
import org.txm.searchengine.cqp.corpus.query.Match; |
34 | 36 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
... | ... | |
46 | 48 |
/** The writer. */ |
47 | 49 |
private OutputStreamWriter writer; |
48 | 50 |
|
49 |
public ExpI(CQPCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
51 |
public ExpI(MainCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
50 | 52 |
super(corpus); |
51 | 53 |
this.corpus = corpus; |
52 | 54 |
//System.out.println("get text ids"); |
53 |
texts = corpus.getProject().getTextsID();
|
|
55 |
texts = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
54 | 56 |
//System.out.println( "init texts: "+texts); |
55 | 57 |
//System.out.println("get text limits: "+texts.size()); |
56 | 58 |
textBoundaries = corpus.getTextEndLimits(); |
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpII.java (revision 1217) | ||
---|---|---|
10 | 10 |
import java.io.OutputStreamWriter; |
11 | 11 |
import java.io.PrintWriter; |
12 | 12 |
import java.util.ArrayList; |
13 |
import java.util.Arrays; |
|
13 | 14 |
import java.util.Collection; |
14 | 15 |
import java.util.Collections; |
15 | 16 |
import java.util.Comparator; |
... | ... | |
32 | 33 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
33 | 34 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
34 | 35 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
36 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
35 | 37 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
36 | 38 |
import org.txm.searchengine.cqp.corpus.query.Match; |
37 | 39 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
... | ... | |
49 | 51 |
/** The writer. */ |
50 | 52 |
private OutputStreamWriter writer; |
51 | 53 |
|
52 |
public ExpII(CQPCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
54 |
public ExpII(MainCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
53 | 55 |
super(corpus); |
54 | 56 |
this.corpus = corpus; |
55 |
texts = corpus.getProject().getTextsID();
|
|
57 |
texts = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
56 | 58 |
textBoundaries = corpus.getTextEndLimits(); |
57 | 59 |
|
58 | 60 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPCorpus.java (revision 1217) | ||
---|---|---|
123 | 123 |
/** The language. */ |
124 | 124 |
private String language = "???"; //$NON-NLS-1$ |
125 | 125 |
|
126 |
|
|
127 | 126 |
/** |
128 | 127 |
* Gets the locale of the corpus. |
129 | 128 |
* |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/MainCorpus.java (revision 1217) | ||
---|---|---|
445 | 445 |
} |
446 | 446 |
return textids; |
447 | 447 |
} |
448 |
|
|
449 |
public String[] getCorpusTextIdsList() throws CqiClientException, IOException, CqiServerError { |
|
450 |
|
|
451 |
int nbtext = getNbTexts(); |
|
452 |
int[] structs = new int[nbtext]; |
|
453 |
for(int i = 0 ; i < nbtext ; i++) |
|
454 |
structs[i] = i; |
|
448 | 455 |
|
456 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$ |
|
457 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$ |
|
458 |
|
|
459 |
return CorpusManager.getCorpusManager().getCqiClient().struc2Str(text_id_sup.getQualifiedName(), structs); |
|
460 |
} |
|
461 |
|
|
449 | 462 |
/** |
450 | 463 |
* Find text no. |
451 | 464 |
* |
tmp/org.txm.analec.rcp/META-INF/MANIFEST.MF (revision 1217) | ||
---|---|---|
165 | 165 |
org.txm.annotation.urs.toolbar, |
166 | 166 |
org.txm.annotation.urs.view, |
167 | 167 |
org.txm.annotation.urs.widgets, |
168 |
org.txm.macro.analec,
|
|
169 |
org.txm.macro.analec.edit,
|
|
170 |
org.txm.macro.analec.exploit,
|
|
171 |
org.txm.macro.analec.exploit.mesures1,
|
|
172 |
org.txm.macro.analec.export,
|
|
173 |
org.txm.macro.analec.misc,
|
|
168 |
org.txm.macro.urs,
|
|
169 |
org.txm.macro.urs.edit,
|
|
170 |
org.txm.macro.urs.exploit,
|
|
171 |
org.txm.macro.urs.exploit.mesures1,
|
|
172 |
org.txm.macro.urs.export,
|
|
173 |
org.txm.macro.urs.misc,
|
|
174 | 174 |
visuAnalec, |
175 | 175 |
visuAnalec.chaines, |
176 | 176 |
visuAnalec.donnees, |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/EmptyPropValuesMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
|
|
11 |
import groovy.transform.Field |
|
12 |
|
|
13 |
import org.txm.Toolbox |
|
14 |
import org.txm.rcp.swt.widget.parameters.* |
|
15 |
import org.txm.annotation.urs.* |
|
16 |
import org.txm.macro.urs.AnalecUtils |
|
17 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
20 |
|
|
21 |
import visuAnalec.donnees.Structure |
|
22 |
import visuAnalec.elements.* |
|
23 |
|
|
24 |
def scriptName = this.class.getSimpleName() |
|
25 |
|
|
26 |
def selection = [] |
|
27 |
for (def s : corpusViewSelections) { |
|
28 |
if (s instanceof CQPCorpus) selection << s |
|
29 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
30 |
} |
|
31 |
|
|
32 |
if (selection.size() == 0) { |
|
33 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
34 |
return false |
|
35 |
} |
|
36 |
|
|
37 |
// BEGINNING OF PARAMETERS |
|
38 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
39 |
String schema_ursql |
|
40 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
41 |
int minimum_schema_size |
|
42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
43 |
String unit_ursql |
|
44 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
45 |
limit_cql |
|
46 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
47 |
boolean strict_inclusion |
|
48 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
49 |
int limit_distance |
|
50 |
@Field @Option(name="debug", usage="Show internal variable content", widget="Boolean", required=true, def="false") |
|
51 |
debug |
|
52 |
if (!ParametersDialog.open(this)) return |
|
53 |
|
|
54 |
def CQI = CQPSearchEngine.getCqiClient() |
|
55 |
|
|
56 |
//corpus = corpusViewSelection |
|
57 |
for (def corpus : selection) { |
|
58 |
|
|
59 |
mainCorpus = corpus.getMainCorpus() |
|
60 |
|
|
61 |
def word = mainCorpus.getWordProperty() |
|
62 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
63 |
|
|
64 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE, |
|
65 |
unit_ursql, limit_cql, strict_inclusion, limit_distance); |
|
66 |
|
|
67 |
for (def unit : selectedUnits) { |
|
68 |
def props = unit.getProps(); |
|
69 |
for (def k : props.keySet()) { |
|
70 |
if (props[k] == null) { |
|
71 |
println "$corpus\t"+unit.getDeb()+"->"+unit.getFin()+"\t"+k |
|
72 |
} |
|
73 |
} |
|
74 |
} |
|
75 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/SchemasListMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.urs.misc |
|
8 |
|
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.Toolbox |
|
15 |
import org.txm.rcp.commands.* |
|
16 |
import org.apache.commons.lang.StringUtils |
|
17 |
|
|
18 |
// BEGINNING OF PARAMETERS |
|
19 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
20 |
String schema_type |
|
21 |
|
|
22 |
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3") |
|
23 |
int minimum_schema_size |
|
24 |
|
|
25 |
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="") |
|
26 |
String schema_property_name |
|
27 |
|
|
28 |
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*") |
|
29 |
String schema_property_value |
|
30 |
|
|
31 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon") |
|
32 |
String unit_type |
|
33 |
|
|
34 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
35 |
String unit_property_name |
|
36 |
|
|
37 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
38 |
String unit_property_value |
|
39 |
|
|
40 |
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word lemma frlemma frolemma #forme# id", required=false, def="word") |
|
41 |
String word_property |
|
42 |
|
|
43 |
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ") |
|
44 |
String separator |
|
45 |
|
|
46 |
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false') |
|
47 |
def buildCQL |
|
48 |
|
|
49 |
|
|
50 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
51 |
println "Corpus view selection is not a Corpus" |
|
52 |
return; |
|
53 |
} |
|
54 |
|
|
55 |
if (!ParametersDialog.open(this)) return; |
|
56 |
// END OF PARAMETERS |
|
57 |
|
|
58 |
MainCorpus corpus = corpusViewSelection |
|
59 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
60 |
|
|
61 |
// check Schema parameters |
|
62 |
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) { |
|
63 |
println "No schema with name=$schema_type" |
|
64 |
return; |
|
65 |
} else { |
|
66 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
67 |
// test property existance |
|
68 |
def props = analecCorpus.getStructure().getSchemaProperties(schema_type); |
|
69 |
if (!props.contains(schema_property_name)) { |
|
70 |
println "Schema $schema_type has no property named $schema_property_name" |
|
71 |
return; |
|
72 |
} |
|
73 |
} |
|
74 |
} |
|
75 |
|
|
76 |
// check unit parameters |
|
77 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
78 |
println "No unit with name=$unit_type" |
|
79 |
return; |
|
80 |
} else { |
|
81 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
82 |
// test property existance |
|
83 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
84 |
if (!props.contains(unit_property_name)) { |
|
85 |
println "Unit $unit_type has no property named $unit_property_name" |
|
86 |
return; |
|
87 |
} |
|
88 |
} |
|
89 |
} |
|
90 |
|
|
91 |
def CQI = CQPSearchEngine.getCqiClient() |
|
92 |
|
|
93 |
if (buildCQL) { |
|
94 |
word_prop = corpus.getProperty("id") |
|
95 |
} else { |
|
96 |
word_prop = corpus.getProperty(word_property) |
|
97 |
} |
|
98 |
|
|
99 |
def schemas = analecCorpus.getSchemas(schema_type) |
|
100 |
schemas.sort() {it.getProps()} |
|
101 |
def nSchemas = 0 |
|
102 |
|
|
103 |
def lens = [:] |
|
104 |
for (def schema : schemas) { |
|
105 |
|
|
106 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
107 |
if (!schema.getProp(schema_property_name).matches(schema_property_value)) { |
|
108 |
// ignoring this schema |
|
109 |
continue |
|
110 |
} |
|
111 |
} |
|
112 |
|
|
113 |
def nUnites = 0 |
|
114 |
for (def unit : schema.getUnitesSousjacentes()) { |
|
115 |
if (unit_type.length() > 0) { |
|
116 |
if (!unit.getType().equals(unit_type)) { |
|
117 |
continue |
|
118 |
} |
|
119 |
} |
|
120 |
|
|
121 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
122 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
123 |
// ignoring this schema |
|
124 |
continue |
|
125 |
} |
|
126 |
} |
|
127 |
|
|
128 |
nUnites++ |
|
129 |
} |
|
130 |
|
|
131 |
if (nUnites < minimum_schema_size) continue |
|
132 |
|
|
133 |
print schema.getProps().toString()+ ": " |
|
134 |
def first = true |
|
135 |
for (def unit : schema.getUnitesSousjacentes()) { |
|
136 |
if (unit_type.length() > 0) { |
|
137 |
if (!unit.getType().equals(unit_type)) { |
|
138 |
continue |
|
139 |
} |
|
140 |
} |
|
141 |
|
|
142 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
143 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
144 |
// ignoring this schema |
|
145 |
continue |
|
146 |
} |
|
147 |
} |
|
148 |
|
|
149 |
String forme = null; |
|
150 |
|
|
151 |
if (buildCQL) { |
|
152 |
int[] pos = null |
|
153 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
154 |
else pos = (unit.getDeb()..unit.getFin()) |
|
155 |
def first2= true |
|
156 |
q = "" |
|
157 |
pos.each { |
|
158 |
if (first2) { first2 = false } else { q = q+" " } |
|
159 |
int[] pos2 = [it] |
|
160 |
q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]" |
|
161 |
} |
|
162 |
if (first) { first = false } else { print "|" } |
|
163 |
print "("+q+")" |
|
164 |
} else { |
|
165 |
if (word_prop == null) { // word_property is the analec unit property to use |
|
166 |
forme = unit.getProp(word_property) |
|
167 |
} else { |
|
168 |
int[] pos = null |
|
169 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
170 |
else pos = (unit.getDeb()..unit.getFin()) |
|
171 |
|
|
172 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough |
|
173 |
} |
|
174 |
|
|
175 |
if (first) { first = false } else { print separator } |
|
176 |
print forme |
|
177 |
} |
|
178 |
} |
|
179 |
println "" |
|
180 |
|
|
181 |
nSchemas++ |
|
182 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitTypesMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
21 |
String schema_type |
|
22 |
|
|
23 |
if (!ParametersDialog.open(this)) return; |
|
24 |
|
|
25 |
MainCorpus corpus = corpusViewSelection |
|
26 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
27 |
def map = new HashMap() |
|
28 |
def unitesInSchema = [] |
|
29 |
def n = 0 |
|
30 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
31 |
def unites = schema.getUnitesSousjacentes() |
|
32 |
unitesInSchema.addAll(unites) |
|
33 |
n += unites.size() |
|
34 |
} |
|
35 |
|
|
36 |
def counts = unitesInSchema.countBy() { it }; |
|
37 |
for (def c : counts.keySet()) { |
|
38 |
if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()} |
|
39 |
} |
|
40 |
|
|
41 |
def set = new HashSet() |
|
42 |
set.addAll(unitesInSchema) |
|
43 |
for (def s : set.collect { it.getType() }) { |
|
44 |
if (!map.containsKey(s)) map[s] = 0; |
|
45 |
map[s] = map[s] +1 |
|
46 |
} |
|
47 |
|
|
48 |
println "Unites types: "+map.sort() { it -> map[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitsCorrelationMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils |
|
9 |
import org.txm.rcp.views.corpora.CorporaView |
|
10 |
import groovy.transform.Field |
|
11 |
|
|
12 |
import org.kohsuke.args4j.* |
|
13 |
import org.txm.Toolbox |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl |
|
16 |
import org.txm.macro.analec.* |
|
17 |
import org.txm.rcp.commands.* |
|
18 |
import org.txm.rcp.swt.widget.parameters.* |
|
19 |
import org.txm.searchengine.cqp.corpus.* |
|
20 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
|
21 |
|
|
22 |
import visuAnalec.donnees.* |
|
23 |
import visuAnalec.elements.* |
|
24 |
import cern.colt.matrix.DoubleFactory2D |
|
25 |
import cern.colt.matrix.DoubleMatrix2D |
|
26 |
|
|
27 |
def scriptName = this.class.getSimpleName() |
|
28 |
def parent |
|
29 |
def selection = [] |
|
30 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
31 |
println "** $scriptName please select a Corpus to run the macro" |
|
32 |
} |
|
33 |
|
|
34 |
|
|
35 |
// BEGINNING OF PARAMETERS |
|
36 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
37 |
String schema_ursql |
|
38 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
39 |
int minimum_schema_size |
|
40 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
41 |
int maximum_schema_size |
|
42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
43 |
String unit_ursql |
|
44 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
45 |
int limit_distance_in_schema |
|
46 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
47 |
limit_cql |
|
48 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
49 |
boolean strict_inclusion |
|
50 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
51 |
int limit_distance |
|
52 |
@Field @Option(name="unit_prop1", usage="PROP1", widget="String", required=false, def="PROP1") |
|
53 |
String unit_prop1 |
|
54 |
@Field @Option(name="unit_prop2", usage="PROP2", widget="String", required=false, def="PROP2") |
|
55 |
String unit_prop2 |
|
56 |
@Field @Option(name="corr_method", usage="try them all", widget="StringArray", metaVar="pearson spearman kendall", required=false, def="pearson") |
|
57 |
String corr_method |
|
58 |
@Field @Option(name="corr_style", usage="try them all", widget="StringArray", metaVar="circle square ellipse number shade color pie", required=false, def="number") |
|
59 |
String corr_style |
|
60 |
@Field @Option(name="corr_layout", usage="try them all", widget="StringArray", metaVar="full lower upper", required=false, def="upper") |
|
61 |
String corr_layout |
|
62 |
@Field @Option(name="corr_order", usage="try them all", widget="StringArray", metaVar="AOE FPC hclust alphabet", required=false, def="hclust") |
|
63 |
String corr_order |
|
64 |
@Field @Option(name="output_lexicaltable", usage="create or not a lexical table with the result", widget="Boolean", required=true, def="false") |
|
65 |
output_lexicaltable |
|
66 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
67 |
debug |
|
68 |
if (!ParametersDialog.open(this)) return |
|
69 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
70 |
|
|
71 |
|
|
72 |
def CQI = CQPSearchEngine.getCqiClient() |
|
73 |
|
|
74 |
def correlations = [:] |
|
75 |
def values1 = new HashSet() |
|
76 |
def values2 = new HashSet() |
|
77 |
def corpus = corpusViewSelection |
|
78 |
|
|
79 |
mainCorpus = corpus.getMainCorpus() |
|
80 |
|
|
81 |
def word = mainCorpus.getWordProperty() |
|
82 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
83 |
|
|
84 |
|
|
85 |
|
|
86 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
|
87 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
88 |
|
|
89 |
for (def unit : selectedUnits) { |
|
90 |
def value1 = unit.getProp(unit_prop1) |
|
91 |
if (value1 == null) value1 = "<null>" |
|
92 |
if (value1.length() == 0) value1 = "<empty>" |
|
93 |
def value2 = unit.getProp(unit_prop2) |
|
94 |
if (value2 == null) value2 = "<null>" |
|
95 |
if (value2.length() == 0) value2 = "<empty>" |
|
96 |
|
|
97 |
values1 << value1 |
|
98 |
values2 << value2 |
|
99 |
|
|
100 |
if (!correlations.containsKey(value1)) correlations[value1] = [:] |
|
101 |
def line = correlations[value1] |
|
102 |
if (!line.containsKey(value2)) line[value2] = 0 |
|
103 |
line[value2] += 1 |
|
104 |
} |
|
105 |
|
|
106 |
def matrix = new int[values1.size()][values2.size()]; |
|
107 |
println "\t"+values2.join("\t") |
|
108 |
int i = 0; |
|
109 |
for (def value1 : values1) { |
|
110 |
print value1 |
|
111 |
int j = 0; |
|
112 |
for (def value2 : values2) { |
|
113 |
if (correlations[value1][value2] == null) correlations[value1][value2] = 0; |
|
114 |
print "\t"+correlations[value1][value2] |
|
115 |
|
|
116 |
matrix[i][j] = correlations[value1][value2] |
|
117 |
j++ |
|
118 |
} |
|
119 |
println "" |
|
120 |
i++ |
|
121 |
} |
|
122 |
|
|
123 |
def r = RWorkspace.getRWorkspaceInstance() |
|
124 |
r.addVectorToWorkspace("corrlines", values1 as String[]) |
|
125 |
r.addVectorToWorkspace("corrcols", values2 as String[]) |
|
126 |
r.addMatrixToWorkspace("corrmatrix", matrix) |
|
127 |
r.eval("rownames(corrmatrix) = corrlines") |
|
128 |
r.eval("colnames(corrmatrix) = corrcols") |
|
129 |
|
|
130 |
def resultsDir = new File(Toolbox.getTxmHomePath(), "results") |
|
131 |
resultsDir.mkdirs() |
|
132 |
file = File.createTempFile("txm_corr_pairs_", ".svg", resultsDir) |
|
133 |
|
|
134 |
|
|
135 |
|
|
136 |
def title = "${corpus.getMainCorpus()}.${corpus}\n${unit_ursql}" |
|
137 |
if (limit_distance > 1) title += "[${limit_distance}]." |
|
138 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) title += "\n(${limit_cql} limits)" |
|
139 |
title += "\t P1=$unit_prop1 P2=unit_prop2" |
|
140 |
|
|
141 |
def plotScript = """ |
|
142 |
|
|
143 |
r1 = cor(corrmatrix, use="complete.obs", method="$corr_method"); |
|
144 |
r2 = cov(corrmatrix, use="complete.obs") ; |
|
145 |
|
|
146 |
library(corrplot) |
|
147 |
corrplot(r1, type="$corr_layout", order="$corr_order", method="$corr_style") |
|
148 |
""" |
|
149 |
|
|
150 |
|
|
151 |
// execute R script |
|
152 |
if (!output_lexicaltable) { |
|
153 |
r.plot(file, plotScript) |
|
154 |
} |
|
155 |
title = "$unit_prop1 $corr_method correlations" |
|
156 |
|
|
157 |
|
|
158 |
def lt = null; |
|
159 |
if (output_lexicaltable) { |
|
160 |
mFactory = DoubleFactory2D.dense |
|
161 |
dmatrix = mFactory.make(values1.size(), values2.size()) |
|
162 |
for (int ii = 0 ; ii < values1.size() ; ii++) { |
|
163 |
for (int jj = 0 ; jj < values2.size() ; jj++) { |
|
164 |
dmatrix.set(ii, jj, matrix[ii][jj]) |
|
165 |
} |
|
166 |
} |
|
167 |
if (corpusViewSelection instanceof Partition) { |
|
168 |
lt = new LexicalTableImpl(dmatrix, corpusViewSelection, corpusViewSelection.getCorpus().getProperty("word"), |
|
169 |
values1 as String[], values2 as String[]) |
|
170 |
lt.setCorpus(corpusViewSelection.getCorpus()); |
|
171 |
corpusViewSelection.storeResult(lt) |
|
172 |
} else { |
|
173 |
lt = new LexicalTableImpl(dmatrix, corpus.getProperty("word"), |
|
174 |
values1 as String[], values2 as String[]) |
|
175 |
lt.setCorpus(corpus); |
|
176 |
corpus.storeResult(lt) |
|
177 |
} |
|
178 |
} |
|
179 |
|
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
monitor.syncExec(new Runnable() { |
|
184 |
@Override |
|
185 |
public void run() { try { |
|
186 |
|
|
187 |
|
|
188 |
|
|
189 |
if (UnitsCorrelationMacro.this.output_lexicaltable) { |
|
190 |
CorporaView.refreshObject(corpus) |
|
191 |
CorporaView.expand(lt) |
|
192 |
} else { |
|
193 |
OpenSVGGraph.OpenSVGFile(UnitsCorrelationMacro.this.file.getAbsolutePath(), "Correlations Units") |
|
194 |
} |
|
195 |
} catch (e) { e.printStackTrace() }} |
|
196 |
}) |
|
197 |
|
|
198 |
|
|
199 |
return correlations |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/SchemaTypesMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
MainCorpus corpus = corpusViewSelection |
|
20 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
21 |
|
|
22 |
def schemas = analecCorpus.getTousSchemas() |
|
23 |
def set = new HashMap() |
|
24 |
for (def s : schemas.collect { it.getType() }) { |
|
25 |
if (!set.containsKey(s)) set[s] = 0; |
|
26 |
set[s] = set[s] +1 |
|
27 |
} |
|
28 |
println "Schemas types: "+set.sort() { it -> set[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/CompUnitPropertiesMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.Toolbox; |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import visuAnalec.donnees.Structure; |
|
17 |
import visuAnalec.elements.Unite; |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
// BEGINNING OF PARAMETERS |
|
25 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
26 |
String unit_type |
|
27 |
|
|
28 |
@Field @Option(name="print_diff",usage="", widget="Boolean", required=true, def="true") |
|
29 |
boolean print_diff |
|
30 |
|
|
31 |
@Field @Option(name="unit_property_name1", usage="", widget="String", required=false, def="CATEGORIE") |
|
32 |
String unit_property_name1 |
|
33 |
|
|
34 |
@Field @Option(name="unit_property_name2", usage="", widget="String", required=false, def="CATEGORIE_ORIG") |
|
35 |
String unit_property_name2 |
|
36 |
|
|
37 |
if (!ParametersDialog.open(this)) return; |
|
38 |
|
|
39 |
int n = 1; |
|
40 |
int nDiff = 0; |
|
41 |
MainCorpus corpus = corpusViewSelection |
|
42 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
43 |
def word = corpus.getWordProperty() |
|
44 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
45 |
|
|
46 |
def units = analecCorpus.getUnites(unit_type) |
|
47 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
48 |
for (Unite unit : units) { |
|
49 |
int[] pos = null |
|
50 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
51 |
else pos = (unit.getDeb()..unit.getFin()) |
|
52 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
|
53 |
def props = unit.getProps() |
|
54 |
def v1 = props.get(unit_property_name1); |
|
55 |
def v2 = props.get(unit_property_name2); |
|
56 |
|
|
57 |
if (v1 != v2) { |
|
58 |
if (print_diff) println "$n - ${unit.getDeb()} -> ${unit.getFin()} - $props : $form" |
|
59 |
nDiff++ |
|
60 |
} |
|
61 |
n++ |
|
62 |
} |
|
63 |
|
|
64 |
if (nDiff == 0) println "$unit_property_name1 and $unit_property_name2 have the same values." |
|
65 |
else println "$unit_property_name1 and $unit_property_name2 have $nDiff/$n different values." |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/RelationsListMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.apache.tools.ant.types.resources.selectors.InstanceOf; |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
|
|
12 |
import groovy.transform.Field |
|
13 |
|
|
14 |
import org.txm.Toolbox; |
|
15 |
import org.txm.rcp.swt.widget.parameters.* |
|
16 |
import org.txm.annotation.urs.* |
|
17 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
|
|
20 |
import visuAnalec.donnees.Structure; |
|
21 |
import visuAnalec.elements.Relation |
|
22 |
import visuAnalec.elements.Unite; |
|
23 |
|
|
24 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
25 |
println "Corpora selection is not a Corpus" |
|
26 |
return; |
|
27 |
} |
|
28 |
|
|
29 |
// BEGINNING OF PARAMETERS |
|
30 |
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE") |
|
31 |
String relation_type |
|
32 |
|
|
33 |
if (!ParametersDialog.open(this)) return; |
|
34 |
|
|
35 |
MainCorpus corpus = corpusViewSelection |
|
36 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
37 |
def word = corpus.getWordProperty() |
|
38 |
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(corpus); |
|
39 |
|
|
40 |
int n = 1; |
|
41 |
def relations = null |
|
42 |
if (relation_type.length() > 0) { |
|
43 |
relations = [] |
|
44 |
for (String type : analecCorpus.getStructure().getTypes(Relation.class)) |
|
45 |
relations.addAll(analecCorpus.getRelations(type)) |
|
46 |
} else { |
|
47 |
relations = analecCorpus.getToutesRelations() |
|
48 |
} |
|
49 |
|
|
50 |
for (Relation relation : relations) { |
|
51 |
def unit1 = relation.getElt1(); |
|
52 |
def unit2 = relation.getElt2(); |
|
53 |
def props = relation.getProps() |
|
54 |
if (unit1 instanceof Unite && unit2 instanceof Unite) { |
|
55 |
int[] pos1 = null |
|
56 |
if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()] |
|
57 |
else pos1 = (unit1.getDeb()..unit1.getFin()) |
|
58 |
def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ") |
|
59 |
|
|
60 |
int[] pos2 = null |
|
61 |
if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()] |
|
62 |
else pos2 = (unit2.getDeb()..unit2.getFin()) |
|
63 |
def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ") |
|
64 |
|
|
65 |
println "$n - $props : $form1 -> $form2" |
|
66 |
} else { |
|
67 |
println "$n - $props" |
|
68 |
} |
|
69 |
n++ |
|
70 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitTypesInSchemaMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
21 |
String schema_type |
|
22 |
|
|
23 |
if (!ParametersDialog.open(this)) return; |
|
24 |
|
|
25 |
MainCorpus corpus = corpusViewSelection |
|
26 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
27 |
def map = new HashMap() |
|
28 |
def unitesInSchema = [] |
|
29 |
def n = 0 |
|
30 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
31 |
def unites = schema.getUnitesSousjacentes() |
|
32 |
unitesInSchema.addAll(unites) |
|
33 |
n += unites.size() |
|
34 |
} |
|
35 |
|
|
36 |
def counts = unitesInSchema.countBy() { it }; |
|
37 |
for (def c : counts.keySet()) { |
|
38 |
if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()} |
|
39 |
} |
|
40 |
|
|
41 |
def set = new HashSet() |
|
42 |
set.addAll(unitesInSchema) |
|
43 |
for (def s : set.collect { it.getType() }) { |
|
44 |
if (!map.containsKey(s)) map[s] = 0; |
|
45 |
map[s] = map[s] +1 |
|
46 |
} |
|
47 |
|
|
48 |
println "Unites types: "+map.sort() { it -> map[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitTypesNotInSchemaMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
21 |
String schema_type |
|
22 |
if (!ParametersDialog.open(this)) return; |
|
23 |
|
|
24 |
MainCorpus corpus = corpusViewSelection |
|
25 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
26 |
|
|
27 |
def unitesInSchema = new HashSet() |
|
28 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
29 |
unitesInSchema.addAll(schema.getUnitesSousjacentes()) |
|
30 |
} |
|
31 |
println "unites: "+analecCorpus.getToutesUnites().size() |
|
32 |
println "unites in schema: "+unitesInSchema.size() |
|
33 |
|
|
34 |
def set = new HashMap() |
|
35 |
for (def u : analecCorpus.getToutesUnites()) { |
|
36 |
if (unitesInSchema.contains(u)) continue; |
|
37 |
|
|
38 |
if (!set.containsKey(u.getType())) set[u.getType()] = 0; |
|
39 |
set[u.getType()] = set[u.getType()] +1 |
|
40 |
} |
|
41 |
|
|
42 |
println "unites not in schema: "+set.sort() { it -> set[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitsProgressionMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.misc |
|
7 |
|
|
8 |
import java.util.ArrayList; |
|
9 |
import java.util.List; |
|
10 |
|
|
11 |
import org.apache.commons.lang.StringUtils |
|
12 |
import org.jfree.chart.JFreeChart |
|
13 |
import org.jfree.chart.plot.XYPlot |
|
14 |
import org.kohsuke.args4j.* |
|
15 |
|
|
16 |
import groovy.transform.Field |
|
17 |
|
|
18 |
import org.txm.Toolbox |
|
19 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
20 |
import org.txm.progression.core.functions.Progression |
|
21 |
import org.txm.rcp.swt.widget.parameters.* |
|
22 |
import org.txm.annotation.urs.* |
|
23 |
import org.txm.chartsengine.rcp.editors.ChartEditor |
|
24 |
import org.txm.macro.urs.AnalecUtils |
|
25 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
26 |
import org.txm.searchengine.cqp.corpus.* |
|
27 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
28 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
29 |
import org.txm.rcp.Application |
|
30 |
import org.txm.rcp.IImageKeys |
|
31 |
|
|
32 |
import visuAnalec.donnees.Structure |
|
33 |
import visuAnalec.elements.* |
|
34 |
|
|
35 |
def scriptName = this.class.getSimpleName() |
|
36 |
def parent |
|
37 |
def selection = [] |
|
38 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
39 |
println "** $scriptName please select a Corpus to run the macro" |
|
40 |
} |
|
41 |
selection << corpusViewSelection |
|
42 |
parent = corpusViewSelection |
|
43 |
|
|
44 |
// BEGINNING OF PARAMETERS |
|
45 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
46 |
String schema_ursql |
|
47 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
48 |
int minimum_schema_size |
|
49 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
50 |
int maximum_schema_size |
|
51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
52 |
String unit_ursql |
|
53 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
54 |
int limit_distance_in_schema |
|
55 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
56 |
limit_cql |
|
57 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
58 |
boolean strict_inclusion |
|
59 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
60 |
int limit_distance |
|
61 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE") |
|
62 |
String unit_property_display |
|
63 |
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div") |
|
64 |
String struct_name |
|
65 |
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n") |
|
66 |
String struct_prop |
|
67 |
@Field @Option(name="line_width", usage="line width", widget="Integer", required=true, def="1") |
|
68 |
int line_width = 2 |
|
69 |
@Field @Option(name="bande_width", usage="bande width", widget="Float", required=true, def="1.0f") |
|
70 |
float bande_width = 1.0f |
|
71 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
72 |
debug |
|
73 |
if (!ParametersDialog.open(this)) return |
|
74 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
75 |
|
|
76 |
|
|
77 |
def CQI = CQPSearchEngine.getCqiClient() |
|
78 |
|
|
79 |
def queries = [] |
|
80 |
def queryResults = [] |
|
81 |
def informations = [] |
|
82 |
for (def corpus : selection) { |
|
83 |
|
|
84 |
mainCorpus = corpus.getMainCorpus() |
|
85 |
|
|
86 |
def word = mainCorpus.getWordProperty() |
|
87 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
88 |
|
|
89 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
|
90 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
91 |
|
|
92 |
def query = "" |
|
93 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) query += limit_cql |
|
94 |
if (schema_ursql != null && schema_ursql.length() > 0) { if (query.length() > 0) query += " & "; query += ""+schema_ursql+ " >"} |
|
95 |
if (unit_ursql != null && unit_ursql.length() > 0) query += " "+unit_ursql |
|
96 |
query = new CQLQuery(query) |
|
97 |
int[] starts = new int[selectedUnits.size()]; |
|
98 |
int[] ends = new int[selectedUnits.size()]; |
|
99 |
def unitsinformations = [] |
|
100 |
int n = 0; |
|
101 |
for (Unite unite : selectedUnits) { |
|
102 |
starts[n] = unite.getDeb() |
|
103 |
ends[n] = unite.getFin() |
|
104 |
unitsinformations << AnalecUtils.toString(CQI, word, unite); |
|
105 |
n++ |
|
106 |
} |
|
107 |
def queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
|
108 |
queries << query |
|
109 |
queryResults << queryResult |
|
110 |
informations << unitsinformations |
|
111 |
|
|
112 |
if (unit_property_display != null && unit_property_display.length() > 0) { |
|
113 |
def propvalues = [:] |
|
114 |
for (def unit : selectedUnits) { |
|
115 |
def v = unit.getProp(unit_property_display) |
|
116 |
if (v == null) v = "<null>" |
|
117 |
else if (v.length() == 0) v = "<empty>" |
|
118 |
|
|
119 |
if (!propvalues.containsKey(v))propvalues[v] = [] |
|
120 |
propvalues[v] << unit |
|
121 |
} |
|
122 |
|
|
123 |
for (def v : propvalues.keySet().sort()) { |
|
124 |
selectedUnits = propvalues[v] |
|
125 |
query = corpus.getID()+" "+limit_cql |
|
126 |
query = new CQLQuery(v) |
|
127 |
starts = new int[selectedUnits.size()]; |
|
128 |
ends = new int[selectedUnits.size()]; |
|
129 |
unitsinformations = [] |
|
130 |
n = 0; |
|
131 |
for (Unite unite : selectedUnits) { |
|
132 |
starts[n] = unite.getDeb() |
|
133 |
ends[n] = unite.getFin() |
|
134 |
unitsinformations << AnalecUtils.toString(CQI, word, unite); |
|
135 |
n++ |
|
136 |
} |
|
137 |
queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
|
138 |
queries << query |
|
139 |
queryResults << queryResult |
|
140 |
informations << unitsinformations |
|
141 |
} |
|
142 |
} |
|
143 |
} |
|
144 |
|
|
145 |
corpus = parent |
|
146 |
try { |
|
147 |
def struct = corpus.getStructuralUnit(struct_name) |
|
148 |
def struct_p = struct.getProperty(struct_prop) |
|
149 |
|
|
150 |
Progression progression = new Progression(corpus, queries, |
|
151 |
struct, struct_p, ".*", |
|
152 |
true, false, false, |
|
153 |
line_width, false, bande_width) |
|
154 |
|
|
155 |
progression.stepQueries(queryResults); // new |
|
156 |
|
|
157 |
if (!progression.stepStructuralUnits() || monitor.isCanceled()) return |
|
158 |
monitor.worked(20) |
|
159 |
if (!progression.stepFinalize() || monitor.isCanceled()) return |
|
160 |
monitor.worked(20) |
|
161 |
|
|
162 |
monitor.syncExec(new Runnable() { |
|
163 |
@Override |
|
164 |
public void run() { |
|
165 |
try { |
|
166 |
ChartEditor charteditorpart = SWTChartsComponentProvider.openEditor(Application.swtComponentProvider.createProgressionChartEditorPart(IImageKeys.getImage(IImageKeys.ACTION_PROGRESSION), progression, progression.isMonochrome(), progression.isMonostyle(), progression.isDoCumulative())) |
|
167 |
JFreeChart chart = charteditorpart.getChart() |
|
168 |
def plot = chart.getXYPlot() |
|
169 |
ProgressionItemSelectionRenderer renderer = plot.getRenderer(); |
|
170 |
renderer.setAdditionalLabelInformation(informations) |
|
171 |
} catch(Exception e) {e.printStackTrace()} |
|
172 |
} |
|
173 |
}) |
|
174 |
|
|
175 |
} catch(Exception e) { |
|
176 |
e.printStackTrace() |
|
177 |
return false |
|
178 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/SchemaTypesMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
MainCorpus corpus = corpusViewSelection |
|
20 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
21 |
|
|
22 |
def schemas = analecCorpus.getTousSchemas() |
|
23 |
def set = new HashMap() |
|
24 |
for (def s : schemas.collect { it.getType() }) { |
|
25 |
if (!set.containsKey(s)) set[s] = 0; |
|
26 |
set[s] = set[s] +1 |
|
27 |
} |
|
28 |
println "Schemas types: "+set.sort() { it -> set[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/CreationRelationsMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// STANDARD DECLARATIONS |
|
5 |
package org.txm.macro.urs |
|
6 |
|
|
7 |
import org.kohsuke.args4j.* |
|
8 |
|
|
9 |
import groovy.transform.Field |
|
10 |
|
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
|
|
15 |
import visuAnalec.donnees.Structure; |
|
16 |
import visuAnalec.elements.Relation; |
|
17 |
import visuAnalec.elements.Schema |
|
18 |
import visuAnalec.elements.Unite; |
|
19 |
import visuAnalec.vue.Vue |
|
20 |
|
|
21 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
22 |
println "Corpora selection is not a Corpus" |
|
23 |
return; |
|
24 |
} |
|
25 |
|
|
26 |
// BEGINNING OF PARAMETERS |
|
27 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
28 |
String unit_type |
|
29 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE") |
|
30 |
String schema_type |
|
31 |
if (!ParametersDialog.open(this)) return; |
|
32 |
|
|
33 |
int nCreated = 0 // count the number of created RELATION |
|
34 |
|
|
35 |
MainCorpus corpus = corpusViewSelection |
|
36 |
def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus |
|
37 |
Structure structure = analecCorpus.getStructure() |
|
38 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units |
|
39 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
|
40 |
return |
|
41 |
} |
|
42 |
if (!structure.getSchemas().contains(schema_type)) { // check if the structure contains the unit_type units |
|
43 |
println "Error: corpus structure does not contains schema with name=$schema_type" |
|
44 |
return |
|
45 |
} |
|
46 |
if (!structure.getRelations().contains("ANAPHORE")) { // update the structure if needed |
|
47 |
println "Creating the 'ANAPHORE' relation in the structure" |
|
48 |
structure.ajouterType(Relation.class, "ANAPHORE") |
|
49 |
analecCorpus.ajouterProp(Relation.class, "ANAPHORE", "TYPE") |
|
50 |
analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "COREFERENTE") |
|
51 |
analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "ASSOCIATIVE") |
|
52 |
} |
|
53 |
if (analecCorpus.getRelations("ANAPHORE").size() > 0) { |
|
54 |
println "Error: This macro can't update existing Relations" |
|
55 |
return |
|
56 |
} |
|
57 |
|
|
58 |
for (Schema schema : analecCorpus.getSchemas(schema_type)) { // parse all CHAINE |
|
59 |
def units = [] |
|
60 |
for (Unite unit : schema.getUnitesSousjacentes()) { // keep only the 'unit_type' units |
|
61 |
if (unit.type.equals(unit_type)) units << unit |
|
62 |
} |
|
63 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } // sort them |
|
64 |
|
|
65 |
for (int i = 0 ; i < units.size() - 1 ; i++) { // build RELATIONS and don't process the last unit |
|
66 |
println "creating "+units[i+1]+", "+units[i] |
|
67 |
Relation relation = new Relation("ANAPHORE", units[i+1], units[i]) |
|
68 |
relation.getProps().put("TYPE", "COREFERENTE") |
|
69 |
analecCorpus.addRelationLue(relation) // add the new relation |
|
70 |
nCreated++; |
|
71 |
} |
|
72 |
} |
|
73 |
|
|
74 |
println "nCreated=$nCreated" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/NombreDeChainesMacro.groovy (revision 1217) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
|
Formats disponibles : Unified diff