Révision 1217
| tmp/org.txm.core/buildJavadoc.xml (revision 1217) | ||
|---|---|---|
| 24 | 24 |
--> |
| 25 | 25 |
<groovydoc |
| 26 | 26 |
destdir="javadoc/" |
| 27 |
sourcepath="../org.txm.core/src/java:../org.txm.statsengine.r.core/src:../org.txm.core.tests/src:../org.txm.cql2lsa.rcp/src:../org.txm.oriflamms.rcp/src:../org.txm.dictionary.rcp/src:../org.txm.para.core/src:../org.txm.groovy.core/src:../org.txm.para.rcp/src:../org.txm.groovy.rcp/src:../org.txm.partition.core/src:../org.txm.statsengine.r.rcp/src:../org.txm.imports.metopes/src:../org.txm.partition.rcp/src:../org.txm.svn.rcp/src:../org.txm.analec.rcp/src:../org.txm.index.core/src:../org.txm.practically.rcp/src:../org.txm.annotation.core/src:../org.txm.index.rcp/src:../org.txm.progression.core/src:../org.txm.edition.rcp/src:../org.txm.annotation.rcp/src:../org.txm.internalview.core/src:../org.txm.progression.rcp/src:../org.txm.textsbalance.core/src:../org.txm.backtomedia.rcp/src:../org.txm.internalview.rcp/src:../org.txm.querycooccurrences.rcp/src:../org.txm.ca.core/src:../org.txm.jodconverter.core/src:../org.txm.queryindex.rcp/src:../org.txm.textsbalance.rcp/src:../org.txm.ahc.core/src:../org.txm.lexicaltable.core/src:../org.txm.r/src:../org.txm.tigersearch.rcp/src:../org.txm.cah.rcp/src:../org.txm.lexicaltable.rcp/src:../org.txm.rcp/src/main/java:../org.txm.tmp.rcp/src:../org.txm.ca.rcp/src:../org.txm.lexicon.core/src:../org.txm.rcp.about.fragment/src:../org.txm.treetaggerinstaller.rcp/src:../org.txm.chartsengine.core/src:../org.txm.lexicon.rcp/src:../org.txm.treetagger.rcp/src:../org.txm.chartsengine.jfreechart.core/src:../org.txm.rcp.p2.ui/src:../org.txm.utils/src:../org.txm.chartsengine.jfreechart.rcp/src:../org.txm.referencer.core/src:../org.txm.wordcloud.core/src:../org.txm.chartsengine.raster.rcp/src:../org.txm.referencer.rcp/src:../org.txm.chartsengine.r.core/src:../org.txm.searchengine.core/src:../org.txm.wordcloud.rcp/src:../org.txm.chartsengine.rcp/src:../org.txm.searchengine.cqp.core/src:../org.txm.xmleditor.rcp/src:../org.txm.chartsengine.r.rcp/src:../org.txm.chartsengine.svgbatik.rcp/src:../org.txm.searchengine.cqp.rcp/src:../org.txm.concordance.core/src:../org.txm.setups.startdialog/src:../org.txm.concordance.rcp/src:../org.txm.specificities.core/src:../org.txm.cooccurrence.core/src:../org.txm.specificities.rcp/src:../org.txm.cooccurrence.rcp/src:../org.txm.statsengine.core"
|
|
| 27 |
sourcepath="../org.txm.core/src/java:../org.txm.statsengine.r.core/src:../org.txm.core.tests/src:../org.txm.cql2lsa.rcp/src:../org.txm.oriflamms.rcp/src:../org.txm.dictionary.rcp/src:../org.txm.para.core/src:../org.txm.groovy.core/src:../org.txm.para.rcp/src:../org.txm.groovy.rcp/src:../org.txm.partition.core/src:../org.txm.statsengine.r.rcp/src:../org.txm.imports.metopes/src:../org.txm.partition.rcp/src:../org.txm.svn.rcp/src:../org.txm.annotation.urs.rcp/src:../org.txm.index.core/src:../org.txm.practically.rcp/src:../org.txm.annotation.core/src:../org.txm.index.rcp/src:../org.txm.progression.core/src:../org.txm.edition.rcp/src:../org.txm.annotation.rcp/src:../org.txm.internalview.core/src:../org.txm.progression.rcp/src:../org.txm.textsbalance.core/src:../org.txm.backtomedia.rcp/src:../org.txm.internalview.rcp/src:../org.txm.querycooccurrences.rcp/src:../org.txm.ca.core/src:../org.txm.jodconverter.core/src:../org.txm.queryindex.rcp/src:../org.txm.textsbalance.rcp/src:../org.txm.ahc.core/src:../org.txm.lexicaltable.core/src:../org.txm.r/src:../org.txm.tigersearch.rcp/src:../org.txm.cah.rcp/src:../org.txm.lexicaltable.rcp/src:../org.txm.rcp/src/main/java:../org.txm.tmp.rcp/src:../org.txm.ca.rcp/src:../org.txm.lexicon.core/src:../org.txm.rcp.about.fragment/src:../org.txm.treetaggerinstaller.rcp/src:../org.txm.chartsengine.core/src:../org.txm.lexicon.rcp/src:../org.txm.treetagger.rcp/src:../org.txm.chartsengine.jfreechart.core/src:../org.txm.rcp.p2.ui/src:../org.txm.utils/src:../org.txm.chartsengine.jfreechart.rcp/src:../org.txm.referencer.core/src:../org.txm.wordcloud.core/src:../org.txm.chartsengine.raster.rcp/src:../org.txm.referencer.rcp/src:../org.txm.chartsengine.r.core/src:../org.txm.searchengine.core/src:../org.txm.wordcloud.rcp/src:../org.txm.chartsengine.rcp/src:../org.txm.searchengine.cqp.core/src:../org.txm.xmleditor.rcp/src:../org.txm.chartsengine.r.rcp/src:../org.txm.chartsengine.svgbatik.rcp/src:../org.txm.searchengine.cqp.rcp/src:../org.txm.concordance.core/src:../org.txm.setups.startdialog/src:../org.txm.concordance.rcp/src:../org.txm.specificities.core/src:../org.txm.cooccurrence.core/src:../org.txm.specificities.rcp/src:../org.txm.cooccurrence.rcp/src:../org.txm.statsengine.core"
|
|
| 28 | 28 |
packagenames="org.txm.*" |
| 29 | 29 |
|
| 30 | 30 |
use="true" |
| tmp/org.txm.core/src/java/org/txm/importer/StaxIdentityParser.java (revision 1217) | ||
|---|---|---|
| 46 | 46 |
this.factory = XMLInputFactory.newInstance(); |
| 47 | 47 |
this.parser = factory.createXMLStreamReader(inputData); |
| 48 | 48 |
} |
| 49 |
|
|
| 50 |
/** |
|
| 51 |
* Helper method to get an attribute value |
|
| 52 |
* |
|
| 53 |
* @param name the attribute name |
|
| 54 |
* @return the value if any |
|
| 55 |
*/ |
|
| 56 |
public String getParserAttributeValue(String name) {
|
|
| 57 |
if (name == null) return null; |
|
| 58 |
|
|
| 59 |
int c = parser.getAttributeCount(); |
|
| 60 |
for (int i = 0 ; i < c ; i++) {
|
|
| 61 |
if (name.equals(parser.getAttributeLocalName(i))) {
|
|
| 62 |
return parser.getAttributeValue(i); |
|
| 63 |
} |
|
| 64 |
} |
|
| 65 |
|
|
| 66 |
return null; |
|
| 67 |
} |
|
| 49 | 68 |
|
| 50 | 69 |
protected void before() {
|
| 51 | 70 |
|
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationSyMoGIHWriter.java (revision 1217) | ||
|---|---|---|
| 184 | 184 |
} catch (XMLStreamException e) {
|
| 185 | 185 |
e.printStackTrace(); |
| 186 | 186 |
} |
| 187 |
|
|
| 188 | 187 |
} |
| 189 | 188 |
|
| 190 | 189 |
/** |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationWriter.java (revision 1217) | ||
|---|---|---|
| 3 | 3 |
import java.io.File; |
| 4 | 4 |
import java.io.IOException; |
| 5 | 5 |
import java.util.ArrayList; |
| 6 |
import java.util.Arrays; |
|
| 6 | 7 |
import java.util.HashMap; |
| 7 | 8 |
import java.util.List; |
| 8 | 9 |
import java.util.logging.Level; |
| ... | ... | |
| 88 | 89 |
*/ |
| 89 | 90 |
public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
|
| 90 | 91 |
|
| 91 |
List<String> textsIds = corpus.getProject().getTextsID();
|
|
| 92 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
| 92 | 93 |
System.out.println("Exporting annotations of "+StringUtils.join(textsIds, ", ")+".");
|
| 93 | 94 |
|
| 94 | 95 |
File resultDirectory = new File(Toolbox.getTxmHomePath(), "results/"+corpus.getID()+"_annotations"); |
| ... | ... | |
| 140 | 141 |
|
| 141 | 142 |
int[] end_limits = corpus.getTextEndLimits(); |
| 142 | 143 |
int[] start_limits = corpus.getTextStartLimits(); |
| 143 |
List<String> textsIds = corpus.getProject().getTextsID();
|
|
| 144 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
| 144 | 145 |
|
| 145 | 146 |
File inputDirectory = corpus.getProjectDirectory(); |
| 146 | 147 |
File txmDirectory = new File(inputDirectory, "txm/"+corpus.getID()); |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationInjector.java (revision 1217) | ||
|---|---|---|
| 8 | 8 |
import java.util.Comparator; |
| 9 | 9 |
import java.util.Date; |
| 10 | 10 |
import java.util.HashMap; |
| 11 |
import java.util.LinkedHashMap; |
|
| 11 | 12 |
import java.util.List; |
| 12 | 13 |
|
| 13 | 14 |
import javax.xml.stream.XMLInputFactory; |
| ... | ... | |
| 34 | 35 |
HashMap<Integer, List<Annotation>> annotationsToAddByEndPos; |
| 35 | 36 |
List<Annotation> currentStartAnnotations; |
| 36 | 37 |
List<Annotation> currentEndAnnotations; |
| 37 |
HashMap<String, Annotation> currentTokenAnnotations = new HashMap<String, Annotation>();
|
|
| 38 |
LinkedHashMap<String, Annotation> currentTokenAnnotations = new LinkedHashMap<String, Annotation>();
|
|
| 38 | 39 |
|
| 39 | 40 |
int n = 0; |
| 40 | 41 |
boolean debug = false; |
| tmp/org.txm.setups/shared/debian/DEBIAN/control (revision 1217) | ||
|---|---|---|
| 1 |
Package: txm |
|
| 1 |
Package: txm-TXMVERSION
|
|
| 2 | 2 |
Version: TXMVERSION |
| 3 | 3 |
Section: base |
| 4 | 4 |
Priority: optional |
| tmp/org.txm.setups/shared/debian/usr/share/lintian/overrides/txm (revision 1217) | ||
|---|---|---|
| 1 |
txm-TXMVERSION binary: arch-independent-package-contains-binary-or-object |
|
| 1 |
overtxm-TXMVERSION binary: arch-independent-package-contains-binary-or-object
|
|
| 2 | 2 |
txm-TXMVERSION binary: unstripped-binary-or-object |
| tmp/org.txm.rcp/src/main/java/org/txm/rcp/corpuswizard/ImportWizard.java (revision 1217) | ||
|---|---|---|
| 2 | 2 |
|
| 3 | 3 |
import java.io.File; |
| 4 | 4 |
import java.util.Date; |
| 5 |
import java.util.Locale; |
|
| 5 | 6 |
|
| 6 | 7 |
import org.eclipse.core.resources.IFolder; |
| 7 | 8 |
import org.eclipse.core.resources.IProject; |
| ... | ... | |
| 69 | 70 |
project = new Project(Toolbox.workspace, name); |
| 70 | 71 |
project.setSourceDirectory(path.getAbsolutePath()); |
| 71 | 72 |
project.setDescription(page1.getDescription()); |
| 73 |
project.setLang(Locale.getDefault().getCountry()); |
|
| 72 | 74 |
} else {
|
| 73 | 75 |
System.out.println("Using parameters from already imported corpus: "+project.getCorpusBuild(project.getName()));
|
| 74 | 76 |
} |
| tmp/org.txm.rcp/src/main/java/org/txm/rcp/handlers/scripts/ExecuteGroovyScript.java (revision 1217) | ||
|---|---|---|
| 243 | 243 |
System.out.println(relativepath); |
| 244 | 244 |
gse.run(relativepath, binding); |
| 245 | 245 |
|
| 246 |
System.out.println(TXMUIMessages.ExecuteScript_12+(System.currentTimeMillis()-time)+TXMUIMessages.ExecuteGroovyScript_0);
|
|
| 246 |
System.out.println(TXMUIMessages.bind(TXMUIMessages.ExecuteScript_12, System.currentTimeMillis()-time));
|
|
| 247 | 247 |
} catch (ThreadDeath td) {
|
| 248 | 248 |
return Status.CANCEL_STATUS; |
| 249 | 249 |
} catch (UIParameterException e) {
|
| tmp/org.txm.rcp/src/main/java/org/txm/rcp/messages/messages_fr.properties (revision 1217) | ||
|---|---|---|
| 271 | 271 |
ExecuteRScript_13 = \ ms |
| 272 | 272 |
|
| 273 | 273 |
ExecuteScriptImport_0 = Début de l'importation |
| 274 |
ExecuteScriptImport_1 = ** Le fichier de script {0} n''existe pas
|
|
| 274 |
ExecuteScriptImport_1 = ** Le fichier de script {0} n''existe pas
|
|
| 275 | 275 |
ExecuteScriptImport_2 = ** Erreur: le dossier 'corpora' n'existe pas : |
| 276 | 276 |
ExecuteScriptImport_3 = Erreur: le dossier {0} n''a pu être supprimé et existe toujours. L''import est interrompu. Vous pouvez supprimer manuellement le dossier et recommencer l''import
|
| 277 | 277 |
ExecuteScriptImport_5 = ** Erreur de paramètres du Tokenizer : abandon de l'import. |
| ... | ... | |
| 280 | 280 |
ExecuteScriptImport_8 = ** Erreur lors du chargement du corpus |
| 281 | 281 |
|
| 282 | 282 |
ExecuteScript_0 = Exécution de {0} ...
|
| 283 |
ExecuteScript_1 = Sauver le fichier avant d'exécuter ? |
|
| 283 |
ExecuteScript_1 = Sauver le fichier avant d'exécuter ? |
|
| 284 |
ExecuteScript_11 = Exécution Groovy... |
|
| 285 |
ExecuteScript_12 = Terminé en {0} ms.
|
|
| 284 | 286 |
ExecuteScript_2 = Sauver et lancer |
| 285 | 287 |
ExecuteScript_3 = ** Aucun interpréteur trouvé pour l'extension du fichier de script |
| 286 | 288 |
ExecuteScript_4 = Annuler |
| tmp/org.txm.rcp/src/main/java/org/txm/rcp/messages/messages.properties (revision 1217) | ||
|---|---|---|
| 316 | 316 |
ExecuteScriptImport_8 = ** Error while loading corpus |
| 317 | 317 |
|
| 318 | 318 |
ExecuteScript_0 = Execution of {0}
|
| 319 |
ExecuteScript_1 = Save file before execution ? |
|
| 319 |
ExecuteScript_1 = Save file before execution ? |
|
| 320 |
ExecuteScript_11 = Executing Groovy script... |
|
| 321 |
ExecuteScript_12 = Done {0} ms.
|
|
| 320 | 322 |
ExecuteScript_2 = Save and run |
| 321 | 323 |
ExecuteScript_3 = ** No interpreter found for script file extension |
| 322 | 324 |
ExecuteScript_4 = Abort |
| tmp/org.txm.cql2lsa.rcp/src/org/txm/rcp/commands/function/ComputeExpII.java (revision 1217) | ||
|---|---|---|
| 45 | 45 |
import org.txm.rcp.swt.dialog.LastOpened; |
| 46 | 46 |
import org.txm.rcp.utils.JobHandler; |
| 47 | 47 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
| 48 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 48 | 49 |
import org.txm.utils.logger.Log; |
| 49 | 50 |
|
| 50 | 51 |
/** |
| ... | ... | |
| 70 | 71 |
Object s = selection.getFirstElement(); |
| 71 | 72 |
if (!(s instanceof CQPCorpus)) return null; |
| 72 | 73 |
|
| 73 |
final CQPCorpus corpus = (CQPCorpus) s;
|
|
| 74 |
final MainCorpus corpus = (MainCorpus) s;
|
|
| 74 | 75 |
System.out.println("Select lemma queries property file");
|
| 75 | 76 |
final File queriesFiles = getPropFile(event); |
| 76 | 77 |
if (queriesFiles == null) return Status.CANCEL_STATUS; |
| tmp/org.txm.cql2lsa.rcp/src/org/txm/rcp/commands/function/ComputeExpI.java (revision 1217) | ||
|---|---|---|
| 54 | 54 |
import org.txm.rcp.swt.dialog.LastOpened; |
| 55 | 55 |
import org.txm.rcp.utils.JobHandler; |
| 56 | 56 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
| 57 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 57 | 58 |
import org.txm.utils.logger.Log; |
| 58 | 59 |
// TODO: Auto-generated Javadoc |
| 59 | 60 |
/** |
| ... | ... | |
| 77 | 78 |
selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
| 78 | 79 |
|
| 79 | 80 |
Object s = selection.getFirstElement(); |
| 80 |
if (s instanceof CQPCorpus) {
|
|
| 81 |
CQPCorpus corpus = (CQPCorpus) s;
|
|
| 81 |
if (s instanceof MainCorpus) {
|
|
| 82 |
MainCorpus corpus = (MainCorpus) s;
|
|
| 82 | 83 |
try {
|
| 83 | 84 |
File propFile = null; |
| 84 | 85 |
if (LastOpened.getFile(ID) != null) |
| ... | ... | |
| 105 | 106 |
return null; |
| 106 | 107 |
} |
| 107 | 108 |
|
| 108 |
public static void compute(final CQPCorpus corpus, final File propFile) {
|
|
| 109 |
public static void compute(final MainCorpus corpus, final File propFile) {
|
|
| 109 | 110 |
final String title = "Compute QueryIndexOfText with "+corpus +" corpus properties file: "+propFile; |
| 110 | 111 |
JobHandler jobhandler = new JobHandler(title) {
|
| 111 | 112 |
@Override |
| tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpI.java (revision 1217) | ||
|---|---|---|
| 8 | 8 |
import java.io.InputStreamReader; |
| 9 | 9 |
import java.io.OutputStreamWriter; |
| 10 | 10 |
import java.util.ArrayList; |
| 11 |
import java.util.Arrays; |
|
| 11 | 12 |
import java.util.Collection; |
| 12 | 13 |
import java.util.Collections; |
| 13 | 14 |
import java.util.Comparator; |
| ... | ... | |
| 29 | 30 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
| 30 | 31 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
| 31 | 32 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
| 33 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 32 | 34 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
| 33 | 35 |
import org.txm.searchengine.cqp.corpus.query.Match; |
| 34 | 36 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
| ... | ... | |
| 46 | 48 |
/** The writer. */ |
| 47 | 49 |
private OutputStreamWriter writer; |
| 48 | 50 |
|
| 49 |
public ExpI(CQPCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
| 51 |
public ExpI(MainCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
| 50 | 52 |
super(corpus); |
| 51 | 53 |
this.corpus = corpus; |
| 52 | 54 |
//System.out.println("get text ids");
|
| 53 |
texts = corpus.getProject().getTextsID();
|
|
| 55 |
texts = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
| 54 | 56 |
//System.out.println( "init texts: "+texts); |
| 55 | 57 |
//System.out.println("get text limits: "+texts.size());
|
| 56 | 58 |
textBoundaries = corpus.getTextEndLimits(); |
| tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpII.java (revision 1217) | ||
|---|---|---|
| 10 | 10 |
import java.io.OutputStreamWriter; |
| 11 | 11 |
import java.io.PrintWriter; |
| 12 | 12 |
import java.util.ArrayList; |
| 13 |
import java.util.Arrays; |
|
| 13 | 14 |
import java.util.Collection; |
| 14 | 15 |
import java.util.Collections; |
| 15 | 16 |
import java.util.Comparator; |
| ... | ... | |
| 32 | 33 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
| 33 | 34 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
| 34 | 35 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
| 36 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 35 | 37 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
| 36 | 38 |
import org.txm.searchengine.cqp.corpus.query.Match; |
| 37 | 39 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
| ... | ... | |
| 49 | 51 |
/** The writer. */ |
| 50 | 52 |
private OutputStreamWriter writer; |
| 51 | 53 |
|
| 52 |
public ExpII(CQPCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
| 54 |
public ExpII(MainCorpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
|
|
| 53 | 55 |
super(corpus); |
| 54 | 56 |
this.corpus = corpus; |
| 55 |
texts = corpus.getProject().getTextsID();
|
|
| 57 |
texts = Arrays.asList(corpus.getCorpusTextIdsList());
|
|
| 56 | 58 |
textBoundaries = corpus.getTextEndLimits(); |
| 57 | 59 |
|
| 58 | 60 |
} |
| tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPCorpus.java (revision 1217) | ||
|---|---|---|
| 123 | 123 |
/** The language. */ |
| 124 | 124 |
private String language = "???"; //$NON-NLS-1$ |
| 125 | 125 |
|
| 126 |
|
|
| 127 | 126 |
/** |
| 128 | 127 |
* Gets the locale of the corpus. |
| 129 | 128 |
* |
| tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/MainCorpus.java (revision 1217) | ||
|---|---|---|
| 445 | 445 |
} |
| 446 | 446 |
return textids; |
| 447 | 447 |
} |
| 448 |
|
|
| 449 |
public String[] getCorpusTextIdsList() throws CqiClientException, IOException, CqiServerError {
|
|
| 450 |
|
|
| 451 |
int nbtext = getNbTexts(); |
|
| 452 |
int[] structs = new int[nbtext]; |
|
| 453 |
for(int i = 0 ; i < nbtext ; i++) |
|
| 454 |
structs[i] = i; |
|
| 448 | 455 |
|
| 456 |
StructuralUnit text_su = this.getStructuralUnit("text"); //$NON-NLS-1$
|
|
| 457 |
StructuralUnitProperty text_id_sup = text_su.getProperty("id"); //$NON-NLS-1$
|
|
| 458 |
|
|
| 459 |
return CorpusManager.getCorpusManager().getCqiClient().struc2Str(text_id_sup.getQualifiedName(), structs); |
|
| 460 |
} |
|
| 461 |
|
|
| 449 | 462 |
/** |
| 450 | 463 |
* Find text no. |
| 451 | 464 |
* |
| tmp/org.txm.analec.rcp/META-INF/MANIFEST.MF (revision 1217) | ||
|---|---|---|
| 165 | 165 |
org.txm.annotation.urs.toolbar, |
| 166 | 166 |
org.txm.annotation.urs.view, |
| 167 | 167 |
org.txm.annotation.urs.widgets, |
| 168 |
org.txm.macro.analec,
|
|
| 169 |
org.txm.macro.analec.edit,
|
|
| 170 |
org.txm.macro.analec.exploit,
|
|
| 171 |
org.txm.macro.analec.exploit.mesures1,
|
|
| 172 |
org.txm.macro.analec.export,
|
|
| 173 |
org.txm.macro.analec.misc,
|
|
| 168 |
org.txm.macro.urs,
|
|
| 169 |
org.txm.macro.urs.edit,
|
|
| 170 |
org.txm.macro.urs.exploit,
|
|
| 171 |
org.txm.macro.urs.exploit.mesures1,
|
|
| 172 |
org.txm.macro.urs.export,
|
|
| 173 |
org.txm.macro.urs.misc,
|
|
| 174 | 174 |
visuAnalec, |
| 175 | 175 |
visuAnalec.chaines, |
| 176 | 176 |
visuAnalec.donnees, |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/EmptyPropValuesMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.apache.commons.lang.StringUtils |
|
| 9 |
import org.kohsuke.args4j.* |
|
| 10 |
|
|
| 11 |
import groovy.transform.Field |
|
| 12 |
|
|
| 13 |
import org.txm.Toolbox |
|
| 14 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 15 |
import org.txm.annotation.urs.* |
|
| 16 |
import org.txm.macro.urs.AnalecUtils |
|
| 17 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
| 18 |
import org.txm.searchengine.cqp.corpus.* |
|
| 19 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
| 20 |
|
|
| 21 |
import visuAnalec.donnees.Structure |
|
| 22 |
import visuAnalec.elements.* |
|
| 23 |
|
|
| 24 |
def scriptName = this.class.getSimpleName() |
|
| 25 |
|
|
| 26 |
def selection = [] |
|
| 27 |
for (def s : corpusViewSelections) {
|
|
| 28 |
if (s instanceof CQPCorpus) selection << s |
|
| 29 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 30 |
} |
|
| 31 |
|
|
| 32 |
if (selection.size() == 0) {
|
|
| 33 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 34 |
return false |
|
| 35 |
} |
|
| 36 |
|
|
| 37 |
// BEGINNING OF PARAMETERS |
|
| 38 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
| 39 |
String schema_ursql |
|
| 40 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
| 41 |
int minimum_schema_size |
|
| 42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
| 43 |
String unit_ursql |
|
| 44 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
| 45 |
limit_cql |
|
| 46 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
| 47 |
boolean strict_inclusion |
|
| 48 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 49 |
int limit_distance |
|
| 50 |
@Field @Option(name="debug", usage="Show internal variable content", widget="Boolean", required=true, def="false") |
|
| 51 |
debug |
|
| 52 |
if (!ParametersDialog.open(this)) return |
|
| 53 |
|
|
| 54 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 55 |
|
|
| 56 |
//corpus = corpusViewSelection |
|
| 57 |
for (def corpus : selection) {
|
|
| 58 |
|
|
| 59 |
mainCorpus = corpus.getMainCorpus() |
|
| 60 |
|
|
| 61 |
def word = mainCorpus.getWordProperty() |
|
| 62 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
| 63 |
|
|
| 64 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE, |
|
| 65 |
unit_ursql, limit_cql, strict_inclusion, limit_distance); |
|
| 66 |
|
|
| 67 |
for (def unit : selectedUnits) {
|
|
| 68 |
def props = unit.getProps(); |
|
| 69 |
for (def k : props.keySet()) {
|
|
| 70 |
if (props[k] == null) {
|
|
| 71 |
println "$corpus\t"+unit.getDeb()+"->"+unit.getFin()+"\t"+k |
|
| 72 |
} |
|
| 73 |
} |
|
| 74 |
} |
|
| 75 |
} |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/SchemasListMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
|
|
| 6 |
// STANDARD DECLARATIONS |
|
| 7 |
package org.txm.macro.urs.misc |
|
| 8 |
|
|
| 9 |
import org.kohsuke.args4j.* |
|
| 10 |
import groovy.transform.Field |
|
| 11 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 12 |
import org.txm.annotation.urs.* |
|
| 13 |
import org.txm.searchengine.cqp.corpus.* |
|
| 14 |
import org.txm.Toolbox |
|
| 15 |
import org.txm.rcp.commands.* |
|
| 16 |
import org.apache.commons.lang.StringUtils |
|
| 17 |
|
|
| 18 |
// BEGINNING OF PARAMETERS |
|
| 19 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
| 20 |
String schema_type |
|
| 21 |
|
|
| 22 |
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3") |
|
| 23 |
int minimum_schema_size |
|
| 24 |
|
|
| 25 |
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="") |
|
| 26 |
String schema_property_name |
|
| 27 |
|
|
| 28 |
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*") |
|
| 29 |
String schema_property_value |
|
| 30 |
|
|
| 31 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon") |
|
| 32 |
String unit_type |
|
| 33 |
|
|
| 34 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
| 35 |
String unit_property_name |
|
| 36 |
|
|
| 37 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
| 38 |
String unit_property_value |
|
| 39 |
|
|
| 40 |
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word lemma frlemma frolemma #forme# id", required=false, def="word") |
|
| 41 |
String word_property |
|
| 42 |
|
|
| 43 |
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ") |
|
| 44 |
String separator |
|
| 45 |
|
|
| 46 |
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false') |
|
| 47 |
def buildCQL |
|
| 48 |
|
|
| 49 |
|
|
| 50 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 51 |
println "Corpus view selection is not a Corpus" |
|
| 52 |
return; |
|
| 53 |
} |
|
| 54 |
|
|
| 55 |
if (!ParametersDialog.open(this)) return; |
|
| 56 |
// END OF PARAMETERS |
|
| 57 |
|
|
| 58 |
MainCorpus corpus = corpusViewSelection |
|
| 59 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 60 |
|
|
| 61 |
// check Schema parameters |
|
| 62 |
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
|
|
| 63 |
println "No schema with name=$schema_type" |
|
| 64 |
return; |
|
| 65 |
} else {
|
|
| 66 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
|
|
| 67 |
// test property existance |
|
| 68 |
def props = analecCorpus.getStructure().getSchemaProperties(schema_type); |
|
| 69 |
if (!props.contains(schema_property_name)) {
|
|
| 70 |
println "Schema $schema_type has no property named $schema_property_name" |
|
| 71 |
return; |
|
| 72 |
} |
|
| 73 |
} |
|
| 74 |
} |
|
| 75 |
|
|
| 76 |
// check unit parameters |
|
| 77 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
|
|
| 78 |
println "No unit with name=$unit_type" |
|
| 79 |
return; |
|
| 80 |
} else {
|
|
| 81 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
|
|
| 82 |
// test property existance |
|
| 83 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
| 84 |
if (!props.contains(unit_property_name)) {
|
|
| 85 |
println "Unit $unit_type has no property named $unit_property_name" |
|
| 86 |
return; |
|
| 87 |
} |
|
| 88 |
} |
|
| 89 |
} |
|
| 90 |
|
|
| 91 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 92 |
|
|
| 93 |
if (buildCQL) {
|
|
| 94 |
word_prop = corpus.getProperty("id")
|
|
| 95 |
} else {
|
|
| 96 |
word_prop = corpus.getProperty(word_property) |
|
| 97 |
} |
|
| 98 |
|
|
| 99 |
def schemas = analecCorpus.getSchemas(schema_type) |
|
| 100 |
schemas.sort() {it.getProps()}
|
|
| 101 |
def nSchemas = 0 |
|
| 102 |
|
|
| 103 |
def lens = [:] |
|
| 104 |
for (def schema : schemas) {
|
|
| 105 |
|
|
| 106 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
|
|
| 107 |
if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
|
|
| 108 |
// ignoring this schema |
|
| 109 |
continue |
|
| 110 |
} |
|
| 111 |
} |
|
| 112 |
|
|
| 113 |
def nUnites = 0 |
|
| 114 |
for (def unit : schema.getUnitesSousjacentes()) {
|
|
| 115 |
if (unit_type.length() > 0) {
|
|
| 116 |
if (!unit.getType().equals(unit_type)) {
|
|
| 117 |
continue |
|
| 118 |
} |
|
| 119 |
} |
|
| 120 |
|
|
| 121 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
|
|
| 122 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
|
|
| 123 |
// ignoring this schema |
|
| 124 |
continue |
|
| 125 |
} |
|
| 126 |
} |
|
| 127 |
|
|
| 128 |
nUnites++ |
|
| 129 |
} |
|
| 130 |
|
|
| 131 |
if (nUnites < minimum_schema_size) continue |
|
| 132 |
|
|
| 133 |
print schema.getProps().toString()+ ": " |
|
| 134 |
def first = true |
|
| 135 |
for (def unit : schema.getUnitesSousjacentes()) {
|
|
| 136 |
if (unit_type.length() > 0) {
|
|
| 137 |
if (!unit.getType().equals(unit_type)) {
|
|
| 138 |
continue |
|
| 139 |
} |
|
| 140 |
} |
|
| 141 |
|
|
| 142 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
|
|
| 143 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
|
|
| 144 |
// ignoring this schema |
|
| 145 |
continue |
|
| 146 |
} |
|
| 147 |
} |
|
| 148 |
|
|
| 149 |
String forme = null; |
|
| 150 |
|
|
| 151 |
if (buildCQL) {
|
|
| 152 |
int[] pos = null |
|
| 153 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
| 154 |
else pos = (unit.getDeb()..unit.getFin()) |
|
| 155 |
def first2= true |
|
| 156 |
q = "" |
|
| 157 |
pos.each {
|
|
| 158 |
if (first2) { first2 = false } else { q = q+" " }
|
|
| 159 |
int[] pos2 = [it] |
|
| 160 |
q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]" |
|
| 161 |
} |
|
| 162 |
if (first) { first = false } else { print "|" }
|
|
| 163 |
print "("+q+")"
|
|
| 164 |
} else {
|
|
| 165 |
if (word_prop == null) { // word_property is the analec unit property to use
|
|
| 166 |
forme = unit.getProp(word_property) |
|
| 167 |
} else {
|
|
| 168 |
int[] pos = null |
|
| 169 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
| 170 |
else pos = (unit.getDeb()..unit.getFin()) |
|
| 171 |
|
|
| 172 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough |
|
| 173 |
} |
|
| 174 |
|
|
| 175 |
if (first) { first = false } else { print separator }
|
|
| 176 |
print forme |
|
| 177 |
} |
|
| 178 |
} |
|
| 179 |
println "" |
|
| 180 |
|
|
| 181 |
nSchemas++ |
|
| 182 |
} |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitTypesMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.kohsuke.args4j.* |
|
| 9 |
import groovy.transform.Field |
|
| 10 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 11 |
import org.txm.annotation.urs.* |
|
| 12 |
import org.txm.searchengine.cqp.corpus.* |
|
| 13 |
|
|
| 14 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 15 |
println "Corpora selection is not a Corpus" |
|
| 16 |
return; |
|
| 17 |
} |
|
| 18 |
|
|
| 19 |
// BEGINNING OF PARAMETERS |
|
| 20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
| 21 |
String schema_type |
|
| 22 |
|
|
| 23 |
if (!ParametersDialog.open(this)) return; |
|
| 24 |
|
|
| 25 |
MainCorpus corpus = corpusViewSelection |
|
| 26 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 27 |
def map = new HashMap() |
|
| 28 |
def unitesInSchema = [] |
|
| 29 |
def n = 0 |
|
| 30 |
for (def schema : analecCorpus.getSchemas(schema_type)) {
|
|
| 31 |
def unites = schema.getUnitesSousjacentes() |
|
| 32 |
unitesInSchema.addAll(unites) |
|
| 33 |
n += unites.size() |
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
def counts = unitesInSchema.countBy() { it };
|
|
| 37 |
for (def c : counts.keySet()) {
|
|
| 38 |
if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()}
|
|
| 39 |
} |
|
| 40 |
|
|
| 41 |
def set = new HashSet() |
|
| 42 |
set.addAll(unitesInSchema) |
|
| 43 |
for (def s : set.collect { it.getType() }) {
|
|
| 44 |
if (!map.containsKey(s)) map[s] = 0; |
|
| 45 |
map[s] = map[s] +1 |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
println "Unites types: "+map.sort() { it -> map[it]}
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitsCorrelationMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.apache.commons.lang.StringUtils |
|
| 9 |
import org.txm.rcp.views.corpora.CorporaView |
|
| 10 |
import groovy.transform.Field |
|
| 11 |
|
|
| 12 |
import org.kohsuke.args4j.* |
|
| 13 |
import org.txm.Toolbox |
|
| 14 |
import org.txm.annotation.urs.* |
|
| 15 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl |
|
| 16 |
import org.txm.macro.analec.* |
|
| 17 |
import org.txm.rcp.commands.* |
|
| 18 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 19 |
import org.txm.searchengine.cqp.corpus.* |
|
| 20 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
|
| 21 |
|
|
| 22 |
import visuAnalec.donnees.* |
|
| 23 |
import visuAnalec.elements.* |
|
| 24 |
import cern.colt.matrix.DoubleFactory2D |
|
| 25 |
import cern.colt.matrix.DoubleMatrix2D |
|
| 26 |
|
|
| 27 |
def scriptName = this.class.getSimpleName() |
|
| 28 |
def parent |
|
| 29 |
def selection = [] |
|
| 30 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 31 |
println "** $scriptName please select a Corpus to run the macro" |
|
| 32 |
} |
|
| 33 |
|
|
| 34 |
|
|
| 35 |
// BEGINNING OF PARAMETERS |
|
| 36 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
| 37 |
String schema_ursql |
|
| 38 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
| 39 |
int minimum_schema_size |
|
| 40 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
| 41 |
int maximum_schema_size |
|
| 42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
| 43 |
String unit_ursql |
|
| 44 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 45 |
int limit_distance_in_schema |
|
| 46 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
| 47 |
limit_cql |
|
| 48 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
| 49 |
boolean strict_inclusion |
|
| 50 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 51 |
int limit_distance |
|
| 52 |
@Field @Option(name="unit_prop1", usage="PROP1", widget="String", required=false, def="PROP1") |
|
| 53 |
String unit_prop1 |
|
| 54 |
@Field @Option(name="unit_prop2", usage="PROP2", widget="String", required=false, def="PROP2") |
|
| 55 |
String unit_prop2 |
|
| 56 |
@Field @Option(name="corr_method", usage="try them all", widget="StringArray", metaVar="pearson spearman kendall", required=false, def="pearson") |
|
| 57 |
String corr_method |
|
| 58 |
@Field @Option(name="corr_style", usage="try them all", widget="StringArray", metaVar="circle square ellipse number shade color pie", required=false, def="number") |
|
| 59 |
String corr_style |
|
| 60 |
@Field @Option(name="corr_layout", usage="try them all", widget="StringArray", metaVar="full lower upper", required=false, def="upper") |
|
| 61 |
String corr_layout |
|
| 62 |
@Field @Option(name="corr_order", usage="try them all", widget="StringArray", metaVar="AOE FPC hclust alphabet", required=false, def="hclust") |
|
| 63 |
String corr_order |
|
| 64 |
@Field @Option(name="output_lexicaltable", usage="create or not a lexical table with the result", widget="Boolean", required=true, def="false") |
|
| 65 |
output_lexicaltable |
|
| 66 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 67 |
debug |
|
| 68 |
if (!ParametersDialog.open(this)) return |
|
| 69 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 70 |
|
|
| 71 |
|
|
| 72 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 73 |
|
|
| 74 |
def correlations = [:] |
|
| 75 |
def values1 = new HashSet() |
|
| 76 |
def values2 = new HashSet() |
|
| 77 |
def corpus = corpusViewSelection |
|
| 78 |
|
|
| 79 |
mainCorpus = corpus.getMainCorpus() |
|
| 80 |
|
|
| 81 |
def word = mainCorpus.getWordProperty() |
|
| 82 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
| 83 |
|
|
| 84 |
|
|
| 85 |
|
|
| 86 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
|
| 87 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
| 88 |
|
|
| 89 |
for (def unit : selectedUnits) {
|
|
| 90 |
def value1 = unit.getProp(unit_prop1) |
|
| 91 |
if (value1 == null) value1 = "<null>" |
|
| 92 |
if (value1.length() == 0) value1 = "<empty>" |
|
| 93 |
def value2 = unit.getProp(unit_prop2) |
|
| 94 |
if (value2 == null) value2 = "<null>" |
|
| 95 |
if (value2.length() == 0) value2 = "<empty>" |
|
| 96 |
|
|
| 97 |
values1 << value1 |
|
| 98 |
values2 << value2 |
|
| 99 |
|
|
| 100 |
if (!correlations.containsKey(value1)) correlations[value1] = [:] |
|
| 101 |
def line = correlations[value1] |
|
| 102 |
if (!line.containsKey(value2)) line[value2] = 0 |
|
| 103 |
line[value2] += 1 |
|
| 104 |
} |
|
| 105 |
|
|
| 106 |
def matrix = new int[values1.size()][values2.size()]; |
|
| 107 |
println "\t"+values2.join("\t")
|
|
| 108 |
int i = 0; |
|
| 109 |
for (def value1 : values1) {
|
|
| 110 |
print value1 |
|
| 111 |
int j = 0; |
|
| 112 |
for (def value2 : values2) {
|
|
| 113 |
if (correlations[value1][value2] == null) correlations[value1][value2] = 0; |
|
| 114 |
print "\t"+correlations[value1][value2] |
|
| 115 |
|
|
| 116 |
matrix[i][j] = correlations[value1][value2] |
|
| 117 |
j++ |
|
| 118 |
} |
|
| 119 |
println "" |
|
| 120 |
i++ |
|
| 121 |
} |
|
| 122 |
|
|
| 123 |
def r = RWorkspace.getRWorkspaceInstance() |
|
| 124 |
r.addVectorToWorkspace("corrlines", values1 as String[])
|
|
| 125 |
r.addVectorToWorkspace("corrcols", values2 as String[])
|
|
| 126 |
r.addMatrixToWorkspace("corrmatrix", matrix)
|
|
| 127 |
r.eval("rownames(corrmatrix) = corrlines")
|
|
| 128 |
r.eval("colnames(corrmatrix) = corrcols")
|
|
| 129 |
|
|
| 130 |
def resultsDir = new File(Toolbox.getTxmHomePath(), "results") |
|
| 131 |
resultsDir.mkdirs() |
|
| 132 |
file = File.createTempFile("txm_corr_pairs_", ".svg", resultsDir)
|
|
| 133 |
|
|
| 134 |
|
|
| 135 |
|
|
| 136 |
def title = "${corpus.getMainCorpus()}.${corpus}\n${unit_ursql}"
|
|
| 137 |
if (limit_distance > 1) title += "[${limit_distance}]."
|
|
| 138 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) title += "\n(${limit_cql} limits)"
|
|
| 139 |
title += "\t P1=$unit_prop1 P2=unit_prop2" |
|
| 140 |
|
|
| 141 |
def plotScript = """ |
|
| 142 |
|
|
| 143 |
r1 = cor(corrmatrix, use="complete.obs", method="$corr_method"); |
|
| 144 |
r2 = cov(corrmatrix, use="complete.obs") ; |
|
| 145 |
|
|
| 146 |
library(corrplot) |
|
| 147 |
corrplot(r1, type="$corr_layout", order="$corr_order", method="$corr_style") |
|
| 148 |
""" |
|
| 149 |
|
|
| 150 |
|
|
| 151 |
// execute R script |
|
| 152 |
if (!output_lexicaltable) {
|
|
| 153 |
r.plot(file, plotScript) |
|
| 154 |
} |
|
| 155 |
title = "$unit_prop1 $corr_method correlations" |
|
| 156 |
|
|
| 157 |
|
|
| 158 |
def lt = null; |
|
| 159 |
if (output_lexicaltable) {
|
|
| 160 |
mFactory = DoubleFactory2D.dense |
|
| 161 |
dmatrix = mFactory.make(values1.size(), values2.size()) |
|
| 162 |
for (int ii = 0 ; ii < values1.size() ; ii++) {
|
|
| 163 |
for (int jj = 0 ; jj < values2.size() ; jj++) {
|
|
| 164 |
dmatrix.set(ii, jj, matrix[ii][jj]) |
|
| 165 |
} |
|
| 166 |
} |
|
| 167 |
if (corpusViewSelection instanceof Partition) {
|
|
| 168 |
lt = new LexicalTableImpl(dmatrix, corpusViewSelection, corpusViewSelection.getCorpus().getProperty("word"),
|
|
| 169 |
values1 as String[], values2 as String[]) |
|
| 170 |
lt.setCorpus(corpusViewSelection.getCorpus()); |
|
| 171 |
corpusViewSelection.storeResult(lt) |
|
| 172 |
} else {
|
|
| 173 |
lt = new LexicalTableImpl(dmatrix, corpus.getProperty("word"),
|
|
| 174 |
values1 as String[], values2 as String[]) |
|
| 175 |
lt.setCorpus(corpus); |
|
| 176 |
corpus.storeResult(lt) |
|
| 177 |
} |
|
| 178 |
} |
|
| 179 |
|
|
| 180 |
|
|
| 181 |
|
|
| 182 |
|
|
| 183 |
monitor.syncExec(new Runnable() {
|
|
| 184 |
@Override |
|
| 185 |
public void run() { try {
|
|
| 186 |
|
|
| 187 |
|
|
| 188 |
|
|
| 189 |
if (UnitsCorrelationMacro.this.output_lexicaltable) {
|
|
| 190 |
CorporaView.refreshObject(corpus) |
|
| 191 |
CorporaView.expand(lt) |
|
| 192 |
} else {
|
|
| 193 |
OpenSVGGraph.OpenSVGFile(UnitsCorrelationMacro.this.file.getAbsolutePath(), "Correlations Units") |
|
| 194 |
} |
|
| 195 |
} catch (e) { e.printStackTrace() }}
|
|
| 196 |
}) |
|
| 197 |
|
|
| 198 |
|
|
| 199 |
return correlations |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/SchemaTypesMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.kohsuke.args4j.* |
|
| 9 |
import groovy.transform.Field |
|
| 10 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 11 |
import org.txm.annotation.urs.* |
|
| 12 |
import org.txm.searchengine.cqp.corpus.* |
|
| 13 |
|
|
| 14 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 15 |
println "Corpora selection is not a Corpus" |
|
| 16 |
return; |
|
| 17 |
} |
|
| 18 |
|
|
| 19 |
MainCorpus corpus = corpusViewSelection |
|
| 20 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 21 |
|
|
| 22 |
def schemas = analecCorpus.getTousSchemas() |
|
| 23 |
def set = new HashMap() |
|
| 24 |
for (def s : schemas.collect { it.getType() }) {
|
|
| 25 |
if (!set.containsKey(s)) set[s] = 0; |
|
| 26 |
set[s] = set[s] +1 |
|
| 27 |
} |
|
| 28 |
println "Schemas types: "+set.sort() { it -> set[it]}
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/CompUnitPropertiesMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.apache.commons.lang.StringUtils; |
|
| 9 |
import org.kohsuke.args4j.* |
|
| 10 |
import groovy.transform.Field |
|
| 11 |
import org.txm.Toolbox; |
|
| 12 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 13 |
import org.txm.annotation.urs.* |
|
| 14 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
| 15 |
import org.txm.searchengine.cqp.corpus.* |
|
| 16 |
import visuAnalec.donnees.Structure; |
|
| 17 |
import visuAnalec.elements.Unite; |
|
| 18 |
|
|
| 19 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 20 |
println "Corpora selection is not a Corpus" |
|
| 21 |
return; |
|
| 22 |
} |
|
| 23 |
|
|
| 24 |
// BEGINNING OF PARAMETERS |
|
| 25 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
| 26 |
String unit_type |
|
| 27 |
|
|
| 28 |
@Field @Option(name="print_diff",usage="", widget="Boolean", required=true, def="true") |
|
| 29 |
boolean print_diff |
|
| 30 |
|
|
| 31 |
@Field @Option(name="unit_property_name1", usage="", widget="String", required=false, def="CATEGORIE") |
|
| 32 |
String unit_property_name1 |
|
| 33 |
|
|
| 34 |
@Field @Option(name="unit_property_name2", usage="", widget="String", required=false, def="CATEGORIE_ORIG") |
|
| 35 |
String unit_property_name2 |
|
| 36 |
|
|
| 37 |
if (!ParametersDialog.open(this)) return; |
|
| 38 |
|
|
| 39 |
int n = 1; |
|
| 40 |
int nDiff = 0; |
|
| 41 |
MainCorpus corpus = corpusViewSelection |
|
| 42 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
| 43 |
def word = corpus.getWordProperty() |
|
| 44 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 45 |
|
|
| 46 |
def units = analecCorpus.getUnites(unit_type) |
|
| 47 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
|
|
| 48 |
for (Unite unit : units) {
|
|
| 49 |
int[] pos = null |
|
| 50 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
| 51 |
else pos = (unit.getDeb()..unit.getFin()) |
|
| 52 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
|
| 53 |
def props = unit.getProps() |
|
| 54 |
def v1 = props.get(unit_property_name1); |
|
| 55 |
def v2 = props.get(unit_property_name2); |
|
| 56 |
|
|
| 57 |
if (v1 != v2) {
|
|
| 58 |
if (print_diff) println "$n - ${unit.getDeb()} -> ${unit.getFin()} - $props : $form"
|
|
| 59 |
nDiff++ |
|
| 60 |
} |
|
| 61 |
n++ |
|
| 62 |
} |
|
| 63 |
|
|
| 64 |
if (nDiff == 0) println "$unit_property_name1 and $unit_property_name2 have the same values." |
|
| 65 |
else println "$unit_property_name1 and $unit_property_name2 have $nDiff/$n different values." |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/RelationsListMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.apache.commons.lang.StringUtils; |
|
| 9 |
import org.apache.tools.ant.types.resources.selectors.InstanceOf; |
|
| 10 |
import org.kohsuke.args4j.* |
|
| 11 |
|
|
| 12 |
import groovy.transform.Field |
|
| 13 |
|
|
| 14 |
import org.txm.Toolbox; |
|
| 15 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 16 |
import org.txm.annotation.urs.* |
|
| 17 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
| 18 |
import org.txm.searchengine.cqp.corpus.* |
|
| 19 |
|
|
| 20 |
import visuAnalec.donnees.Structure; |
|
| 21 |
import visuAnalec.elements.Relation |
|
| 22 |
import visuAnalec.elements.Unite; |
|
| 23 |
|
|
| 24 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 25 |
println "Corpora selection is not a Corpus" |
|
| 26 |
return; |
|
| 27 |
} |
|
| 28 |
|
|
| 29 |
// BEGINNING OF PARAMETERS |
|
| 30 |
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE") |
|
| 31 |
String relation_type |
|
| 32 |
|
|
| 33 |
if (!ParametersDialog.open(this)) return; |
|
| 34 |
|
|
| 35 |
MainCorpus corpus = corpusViewSelection |
|
| 36 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
| 37 |
def word = corpus.getWordProperty() |
|
| 38 |
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 39 |
|
|
| 40 |
int n = 1; |
|
| 41 |
def relations = null |
|
| 42 |
if (relation_type.length() > 0) {
|
|
| 43 |
relations = [] |
|
| 44 |
for (String type : analecCorpus.getStructure().getTypes(Relation.class)) |
|
| 45 |
relations.addAll(analecCorpus.getRelations(type)) |
|
| 46 |
} else {
|
|
| 47 |
relations = analecCorpus.getToutesRelations() |
|
| 48 |
} |
|
| 49 |
|
|
| 50 |
for (Relation relation : relations) {
|
|
| 51 |
def unit1 = relation.getElt1(); |
|
| 52 |
def unit2 = relation.getElt2(); |
|
| 53 |
def props = relation.getProps() |
|
| 54 |
if (unit1 instanceof Unite && unit2 instanceof Unite) {
|
|
| 55 |
int[] pos1 = null |
|
| 56 |
if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()] |
|
| 57 |
else pos1 = (unit1.getDeb()..unit1.getFin()) |
|
| 58 |
def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ") |
|
| 59 |
|
|
| 60 |
int[] pos2 = null |
|
| 61 |
if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()] |
|
| 62 |
else pos2 = (unit2.getDeb()..unit2.getFin()) |
|
| 63 |
def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ") |
|
| 64 |
|
|
| 65 |
println "$n - $props : $form1 -> $form2" |
|
| 66 |
} else {
|
|
| 67 |
println "$n - $props" |
|
| 68 |
} |
|
| 69 |
n++ |
|
| 70 |
} |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitTypesInSchemaMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.kohsuke.args4j.* |
|
| 9 |
import groovy.transform.Field |
|
| 10 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 11 |
import org.txm.annotation.urs.* |
|
| 12 |
import org.txm.searchengine.cqp.corpus.* |
|
| 13 |
|
|
| 14 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 15 |
println "Corpora selection is not a Corpus" |
|
| 16 |
return; |
|
| 17 |
} |
|
| 18 |
|
|
| 19 |
// BEGINNING OF PARAMETERS |
|
| 20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
| 21 |
String schema_type |
|
| 22 |
|
|
| 23 |
if (!ParametersDialog.open(this)) return; |
|
| 24 |
|
|
| 25 |
MainCorpus corpus = corpusViewSelection |
|
| 26 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 27 |
def map = new HashMap() |
|
| 28 |
def unitesInSchema = [] |
|
| 29 |
def n = 0 |
|
| 30 |
for (def schema : analecCorpus.getSchemas(schema_type)) {
|
|
| 31 |
def unites = schema.getUnitesSousjacentes() |
|
| 32 |
unitesInSchema.addAll(unites) |
|
| 33 |
n += unites.size() |
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
def counts = unitesInSchema.countBy() { it };
|
|
| 37 |
for (def c : counts.keySet()) {
|
|
| 38 |
if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()}
|
|
| 39 |
} |
|
| 40 |
|
|
| 41 |
def set = new HashSet() |
|
| 42 |
set.addAll(unitesInSchema) |
|
| 43 |
for (def s : set.collect { it.getType() }) {
|
|
| 44 |
if (!map.containsKey(s)) map[s] = 0; |
|
| 45 |
map[s] = map[s] +1 |
|
| 46 |
} |
|
| 47 |
|
|
| 48 |
println "Unites types: "+map.sort() { it -> map[it]}
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitTypesNotInSchemaMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import org.kohsuke.args4j.* |
|
| 9 |
import groovy.transform.Field |
|
| 10 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 11 |
import org.txm.annotation.urs.* |
|
| 12 |
import org.txm.searchengine.cqp.corpus.* |
|
| 13 |
|
|
| 14 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 15 |
println "Corpora selection is not a Corpus" |
|
| 16 |
return; |
|
| 17 |
} |
|
| 18 |
|
|
| 19 |
// BEGINNING OF PARAMETERS |
|
| 20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence") |
|
| 21 |
String schema_type |
|
| 22 |
if (!ParametersDialog.open(this)) return; |
|
| 23 |
|
|
| 24 |
MainCorpus corpus = corpusViewSelection |
|
| 25 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 26 |
|
|
| 27 |
def unitesInSchema = new HashSet() |
|
| 28 |
for (def schema : analecCorpus.getSchemas(schema_type)) {
|
|
| 29 |
unitesInSchema.addAll(schema.getUnitesSousjacentes()) |
|
| 30 |
} |
|
| 31 |
println "unites: "+analecCorpus.getToutesUnites().size() |
|
| 32 |
println "unites in schema: "+unitesInSchema.size() |
|
| 33 |
|
|
| 34 |
def set = new HashMap() |
|
| 35 |
for (def u : analecCorpus.getToutesUnites()) {
|
|
| 36 |
if (unitesInSchema.contains(u)) continue; |
|
| 37 |
|
|
| 38 |
if (!set.containsKey(u.getType())) set[u.getType()] = 0; |
|
| 39 |
set[u.getType()] = set[u.getType()] +1 |
|
| 40 |
} |
|
| 41 |
|
|
| 42 |
println "unites not in schema: "+set.sort() { it -> set[it]}
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/misc/UnitsProgressionMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs.misc |
|
| 7 |
|
|
| 8 |
import java.util.ArrayList; |
|
| 9 |
import java.util.List; |
|
| 10 |
|
|
| 11 |
import org.apache.commons.lang.StringUtils |
|
| 12 |
import org.jfree.chart.JFreeChart |
|
| 13 |
import org.jfree.chart.plot.XYPlot |
|
| 14 |
import org.kohsuke.args4j.* |
|
| 15 |
|
|
| 16 |
import groovy.transform.Field |
|
| 17 |
|
|
| 18 |
import org.txm.Toolbox |
|
| 19 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
| 20 |
import org.txm.progression.core.functions.Progression |
|
| 21 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 22 |
import org.txm.annotation.urs.* |
|
| 23 |
import org.txm.chartsengine.rcp.editors.ChartEditor |
|
| 24 |
import org.txm.macro.urs.AnalecUtils |
|
| 25 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
| 26 |
import org.txm.searchengine.cqp.corpus.* |
|
| 27 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
| 28 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
| 29 |
import org.txm.rcp.Application |
|
| 30 |
import org.txm.rcp.IImageKeys |
|
| 31 |
|
|
| 32 |
import visuAnalec.donnees.Structure |
|
| 33 |
import visuAnalec.elements.* |
|
| 34 |
|
|
| 35 |
def scriptName = this.class.getSimpleName() |
|
| 36 |
def parent |
|
| 37 |
def selection = [] |
|
| 38 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 39 |
println "** $scriptName please select a Corpus to run the macro" |
|
| 40 |
} |
|
| 41 |
selection << corpusViewSelection |
|
| 42 |
parent = corpusViewSelection |
|
| 43 |
|
|
| 44 |
// BEGINNING OF PARAMETERS |
|
| 45 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
| 46 |
String schema_ursql |
|
| 47 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
| 48 |
int minimum_schema_size |
|
| 49 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
| 50 |
int maximum_schema_size |
|
| 51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
| 52 |
String unit_ursql |
|
| 53 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 54 |
int limit_distance_in_schema |
|
| 55 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
| 56 |
limit_cql |
|
| 57 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
| 58 |
boolean strict_inclusion |
|
| 59 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
| 60 |
int limit_distance |
|
| 61 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE") |
|
| 62 |
String unit_property_display |
|
| 63 |
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div") |
|
| 64 |
String struct_name |
|
| 65 |
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n") |
|
| 66 |
String struct_prop |
|
| 67 |
@Field @Option(name="line_width", usage="line width", widget="Integer", required=true, def="1") |
|
| 68 |
int line_width = 2 |
|
| 69 |
@Field @Option(name="bande_width", usage="bande width", widget="Float", required=true, def="1.0f") |
|
| 70 |
float bande_width = 1.0f |
|
| 71 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 72 |
debug |
|
| 73 |
if (!ParametersDialog.open(this)) return |
|
| 74 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 75 |
|
|
| 76 |
|
|
| 77 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 78 |
|
|
| 79 |
def queries = [] |
|
| 80 |
def queryResults = [] |
|
| 81 |
def informations = [] |
|
| 82 |
for (def corpus : selection) {
|
|
| 83 |
|
|
| 84 |
mainCorpus = corpus.getMainCorpus() |
|
| 85 |
|
|
| 86 |
def word = mainCorpus.getWordProperty() |
|
| 87 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
| 88 |
|
|
| 89 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
|
| 90 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
| 91 |
|
|
| 92 |
def query = "" |
|
| 93 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) query += limit_cql
|
|
| 94 |
if (schema_ursql != null && schema_ursql.length() > 0) { if (query.length() > 0) query += " & "; query += ""+schema_ursql+ " >"}
|
|
| 95 |
if (unit_ursql != null && unit_ursql.length() > 0) query += " "+unit_ursql |
|
| 96 |
query = new CQLQuery(query) |
|
| 97 |
int[] starts = new int[selectedUnits.size()]; |
|
| 98 |
int[] ends = new int[selectedUnits.size()]; |
|
| 99 |
def unitsinformations = [] |
|
| 100 |
int n = 0; |
|
| 101 |
for (Unite unite : selectedUnits) {
|
|
| 102 |
starts[n] = unite.getDeb() |
|
| 103 |
ends[n] = unite.getFin() |
|
| 104 |
unitsinformations << AnalecUtils.toString(CQI, word, unite); |
|
| 105 |
n++ |
|
| 106 |
} |
|
| 107 |
def queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
|
| 108 |
queries << query |
|
| 109 |
queryResults << queryResult |
|
| 110 |
informations << unitsinformations |
|
| 111 |
|
|
| 112 |
if (unit_property_display != null && unit_property_display.length() > 0) {
|
|
| 113 |
def propvalues = [:] |
|
| 114 |
for (def unit : selectedUnits) {
|
|
| 115 |
def v = unit.getProp(unit_property_display) |
|
| 116 |
if (v == null) v = "<null>" |
|
| 117 |
else if (v.length() == 0) v = "<empty>" |
|
| 118 |
|
|
| 119 |
if (!propvalues.containsKey(v))propvalues[v] = [] |
|
| 120 |
propvalues[v] << unit |
|
| 121 |
} |
|
| 122 |
|
|
| 123 |
for (def v : propvalues.keySet().sort()) {
|
|
| 124 |
selectedUnits = propvalues[v] |
|
| 125 |
query = corpus.getID()+" "+limit_cql |
|
| 126 |
query = new CQLQuery(v) |
|
| 127 |
starts = new int[selectedUnits.size()]; |
|
| 128 |
ends = new int[selectedUnits.size()]; |
|
| 129 |
unitsinformations = [] |
|
| 130 |
n = 0; |
|
| 131 |
for (Unite unite : selectedUnits) {
|
|
| 132 |
starts[n] = unite.getDeb() |
|
| 133 |
ends[n] = unite.getFin() |
|
| 134 |
unitsinformations << AnalecUtils.toString(CQI, word, unite); |
|
| 135 |
n++ |
|
| 136 |
} |
|
| 137 |
queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
|
| 138 |
queries << query |
|
| 139 |
queryResults << queryResult |
|
| 140 |
informations << unitsinformations |
|
| 141 |
} |
|
| 142 |
} |
|
| 143 |
} |
|
| 144 |
|
|
| 145 |
corpus = parent |
|
| 146 |
try {
|
|
| 147 |
def struct = corpus.getStructuralUnit(struct_name) |
|
| 148 |
def struct_p = struct.getProperty(struct_prop) |
|
| 149 |
|
|
| 150 |
Progression progression = new Progression(corpus, queries, |
|
| 151 |
struct, struct_p, ".*", |
|
| 152 |
true, false, false, |
|
| 153 |
line_width, false, bande_width) |
|
| 154 |
|
|
| 155 |
progression.stepQueries(queryResults); // new |
|
| 156 |
|
|
| 157 |
if (!progression.stepStructuralUnits() || monitor.isCanceled()) return |
|
| 158 |
monitor.worked(20) |
|
| 159 |
if (!progression.stepFinalize() || monitor.isCanceled()) return |
|
| 160 |
monitor.worked(20) |
|
| 161 |
|
|
| 162 |
monitor.syncExec(new Runnable() {
|
|
| 163 |
@Override |
|
| 164 |
public void run() {
|
|
| 165 |
try {
|
|
| 166 |
ChartEditor charteditorpart = SWTChartsComponentProvider.openEditor(Application.swtComponentProvider.createProgressionChartEditorPart(IImageKeys.getImage(IImageKeys.ACTION_PROGRESSION), progression, progression.isMonochrome(), progression.isMonostyle(), progression.isDoCumulative())) |
|
| 167 |
JFreeChart chart = charteditorpart.getChart() |
|
| 168 |
def plot = chart.getXYPlot() |
|
| 169 |
ProgressionItemSelectionRenderer renderer = plot.getRenderer(); |
|
| 170 |
renderer.setAdditionalLabelInformation(informations) |
|
| 171 |
} catch(Exception e) {e.printStackTrace()}
|
|
| 172 |
} |
|
| 173 |
}) |
|
| 174 |
|
|
| 175 |
} catch(Exception e) {
|
|
| 176 |
e.printStackTrace() |
|
| 177 |
return false |
|
| 178 |
} |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/SchemaTypesMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs |
|
| 7 |
|
|
| 8 |
import org.kohsuke.args4j.* |
|
| 9 |
import groovy.transform.Field |
|
| 10 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 11 |
import org.txm.annotation.urs.* |
|
| 12 |
import org.txm.searchengine.cqp.corpus.* |
|
| 13 |
|
|
| 14 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 15 |
println "Corpora selection is not a Corpus" |
|
| 16 |
return; |
|
| 17 |
} |
|
| 18 |
|
|
| 19 |
MainCorpus corpus = corpusViewSelection |
|
| 20 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
| 21 |
|
|
| 22 |
def schemas = analecCorpus.getTousSchemas() |
|
| 23 |
def set = new HashMap() |
|
| 24 |
for (def s : schemas.collect { it.getType() }) {
|
|
| 25 |
if (!set.containsKey(s)) set[s] = 0; |
|
| 26 |
set[s] = set[s] +1 |
|
| 27 |
} |
|
| 28 |
println "Schemas types: "+set.sort() { it -> set[it]}
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/CreationRelationsMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// STANDARD DECLARATIONS |
|
| 5 |
package org.txm.macro.urs |
|
| 6 |
|
|
| 7 |
import org.kohsuke.args4j.* |
|
| 8 |
|
|
| 9 |
import groovy.transform.Field |
|
| 10 |
|
|
| 11 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 12 |
import org.txm.annotation.urs.* |
|
| 13 |
import org.txm.searchengine.cqp.corpus.* |
|
| 14 |
|
|
| 15 |
import visuAnalec.donnees.Structure; |
|
| 16 |
import visuAnalec.elements.Relation; |
|
| 17 |
import visuAnalec.elements.Schema |
|
| 18 |
import visuAnalec.elements.Unite; |
|
| 19 |
import visuAnalec.vue.Vue |
|
| 20 |
|
|
| 21 |
if (!(corpusViewSelection instanceof MainCorpus)) {
|
|
| 22 |
println "Corpora selection is not a Corpus" |
|
| 23 |
return; |
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
// BEGINNING OF PARAMETERS |
|
| 27 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
| 28 |
String unit_type |
|
| 29 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE") |
|
| 30 |
String schema_type |
|
| 31 |
if (!ParametersDialog.open(this)) return; |
|
| 32 |
|
|
| 33 |
int nCreated = 0 // count the number of created RELATION |
|
| 34 |
|
|
| 35 |
MainCorpus corpus = corpusViewSelection |
|
| 36 |
def analecCorpus = URSCorpora.getCorpus(corpus); // analec corpus has the same name has the TXM corpus |
|
| 37 |
Structure structure = analecCorpus.getStructure() |
|
| 38 |
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
|
|
| 39 |
println "Error: corpus structure does not contains unit with name=$unit_type" |
|
| 40 |
return |
|
| 41 |
} |
|
| 42 |
if (!structure.getSchemas().contains(schema_type)) { // check if the structure contains the unit_type units
|
|
| 43 |
println "Error: corpus structure does not contains schema with name=$schema_type" |
|
| 44 |
return |
|
| 45 |
} |
|
| 46 |
if (!structure.getRelations().contains("ANAPHORE")) { // update the structure if needed
|
|
| 47 |
println "Creating the 'ANAPHORE' relation in the structure" |
|
| 48 |
structure.ajouterType(Relation.class, "ANAPHORE") |
|
| 49 |
analecCorpus.ajouterProp(Relation.class, "ANAPHORE", "TYPE") |
|
| 50 |
analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "COREFERENTE") |
|
| 51 |
analecCorpus.ajouterVal(Relation.class, "ANAPHORE", "TYPE", "ASSOCIATIVE") |
|
| 52 |
} |
|
| 53 |
if (analecCorpus.getRelations("ANAPHORE").size() > 0) {
|
|
| 54 |
println "Error: This macro can't update existing Relations" |
|
| 55 |
return |
|
| 56 |
} |
|
| 57 |
|
|
| 58 |
for (Schema schema : analecCorpus.getSchemas(schema_type)) { // parse all CHAINE
|
|
| 59 |
def units = [] |
|
| 60 |
for (Unite unit : schema.getUnitesSousjacentes()) { // keep only the 'unit_type' units
|
|
| 61 |
if (unit.type.equals(unit_type)) units << unit |
|
| 62 |
} |
|
| 63 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } // sort them
|
|
| 64 |
|
|
| 65 |
for (int i = 0 ; i < units.size() - 1 ; i++) { // build RELATIONS and don't process the last unit
|
|
| 66 |
println "creating "+units[i+1]+", "+units[i] |
|
| 67 |
Relation relation = new Relation("ANAPHORE", units[i+1], units[i])
|
|
| 68 |
relation.getProps().put("TYPE", "COREFERENTE")
|
|
| 69 |
analecCorpus.addRelationLue(relation) // add the new relation |
|
| 70 |
nCreated++; |
|
| 71 |
} |
|
| 72 |
} |
|
| 73 |
|
|
| 74 |
println "nCreated=$nCreated" |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/NombreDeChainesMacro.groovy (revision 1217) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// @author sheiden |
|
| 5 |
// STANDARD DECLARATIONS |
|
| 6 |
package org.txm.macro.urs |
|
| 7 |
|
|
| 8 |
import org.kohsuke.args4j.* |
|
| 9 |
import groovy.transform.Field |
|
| 10 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 11 |
import org.txm.annotation.urs.* |
|
| 12 |
import org.txm.searchengine.cqp.corpus.* |
|
| 13 |
|
|
| 14 |
|
|
Formats disponibles : Unified diff