/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

Révision 468

     <?xml version="1.0" encoding="UTF-8"?>
     <?eclipse version="3.4"?>
     <plugin>
      <extension
              point="org.eclipse.ui.commands">
               <command
                     categoryId="org.txm.rcpapplication.category.txm"
                     defaultHandler="org.txm.rcpapplication.commands.function.ComputeExpI"
                     id="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts"
                     name="QueryIndexOfTexts">
               </command>
               <command
                     categoryId="org.txm.rcpapplication.category.txm"
                     defaultHandler="org.txm.rcpapplication.commands.function.ComputeValuesOfQuery"
                     id="org.txm.rcpapplication.commands.function.ComputeValuesOfQuery"
                     name="ValuesOfQuery">
               </command>
               <command
                     categoryId="org.txm.rcpapplication.category.txm"
                     defaultHandler="org.txm.rcpapplication.commands.function.ComputeExpII"
                     id="org.txm.rcpapplication.commands.function.ComputeExpIIA"
                     name="ExpIIA">
               </command>
               <command
                     categoryId="org.txm.rcpapplication.category.txm"
                     defaultHandler="org.txm.rcpapplication.commands.function.CreateContextSubcorpus"
                     id="org.txm.rcpapplication.commands.function.CreateQueriesContextSubcorpus"
                     name="ContextSubcorpus">
               </command>
             </extension>
       <extension
              point="org.eclipse.ui.menus">
           <menuContribution
                 allPopups="false"
                 locationURI="toolbar:org.txm.rcpapplication.toolbartools?before=org.txm.rcpapplication.separator1">
              <command
                    commandId="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts"
                    label="Exp I"
                    style="push">
                 <visibleWhen
                       checkEnabled="false">
                    <or>
                       <reference
                             definitionId="OneCorpusSelected">
                       </reference>
                    </or>
                 </visibleWhen>
              </command>
              <command
                    commandId="org.txm.rcpapplication.commands.function.ComputeValuesOfQuery"
                    label="ValuesOfQuery"
                    style="push">
                 <visibleWhen
                       checkEnabled="false">
                    <or>
                       <reference
                             definitionId="OneCorpusSelected">
                       </reference>
                    </or>
                 </visibleWhen>
              </command>
              <command
                    commandId="org.txm.rcpapplication.commands.function.CreateQueriesContextSubcorpus"
                    label="ContextSubcorpus"
                    style="push">
                 <visibleWhen
                       checkEnabled="false">
                    <or>
                       <reference
                             definitionId="OneCorpusSelected">
                       </reference>
                    </or>
                 </visibleWhen>
              </command>
              <command
                    commandId="org.txm.rcpapplication.commands.function.ComputeExpIIA"
                    label="Exp II"
                    style="push">
                 <visibleWhen
                       checkEnabled="false">
                    <or>
                       <reference
                             definitionId="OneCorpusSelected">
                       </reference>
                    </or>
                 </visibleWhen>
              </command>
           </menuContribution>
           <menuContribution
                 allPopups="false"
                 locationURI="popup:org.txm.rcpapplication.views.CorporaView">
              <command
                    commandId="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts"
                    icon="icons/functions/QueryIndexOfTexts.png"
                    style="push">
                 <visibleWhen
                       checkEnabled="false">
                    <or>
                       <reference
                             definitionId="OneCorpusSelected">
                       </reference>
                    </or>
                 </visibleWhen>
              </command>
           </menuContribution>
           <menuContribution
                 locationURI="menu:menu.tools">
              <command
                    commandId="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts"
                    icon="icons/functions/QueryIndexOfTexts.png"
                    style="push">
                 <visibleWhen
                       checkEnabled="false">
                    <or>
                       <reference
                             definitionId="OneCorpusSelected">
                       </reference>
                    </or>
                 </visibleWhen>
              </command>
           </menuContribution>
           <menuContribution
                 locationURI="menu:menu.help.plugins">
              <command
                    commandId="org.txm.rcpapplication.commands.OpenBrowser"
                    label="CQL2LSA"
                    style="push">
                 <parameter
                       name="org.txm.rcpapplication.commands.commandParameter2"
                       value="https://groupes.renater.fr/wiki/txm-users/public/extensions#CQL2LSA">
                 </parameter>
              </command>
           </menuContribution>
        </extension>
     </plugin>

     <?xml version="1.0" encoding="UTF-8"?>
     <classpath>
     	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
     	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
     		<accessrules>
     			<accessrule kind="accessible" pattern="**"/>
     		</accessrules>
     	</classpathentry>
     	<classpathentry kind="src" path="src"/>
     	<classpathentry kind="output" path="bin"/>
     </classpath>

     Manifest-Version: 1.0
     Bundle-ManifestVersion: 2
     Bundle-Name: CQL2LSA
     Bundle-SymbolicName: CQL2LSA;singleton:=true
     Bundle-Version: 1.0.0.qualifier
     Bundle-Activator: cql2lsarcp.Activator
     Require-Bundle: org.txm.core;bundle-version="0.7.0",
      org.txm.rcp;bundle-version="0.7.8",
      org.eclipse.ui,
      org.eclipse.core.runtime,
      org.eclipse.ui.editors;bundle-version="3.8.100",
      org.eclipse.core.expressions;bundle-version="3.4.500",
      org.txm.index.core,
      org.txm.lexicaltable.core,
      org.txm.statsengine.r.core
     Bundle-RequiredExecutionEnvironment: JavaSE-1.6
     Bundle-ActivationPolicy: lazy
     Export-Package: cql2lsarcp,
      org.txm.functions.cql2lsa,
      org.txm.rcpapplication.commands.function
     Bundle-Vendor: Textometrie.org

     <?xml version="1.0" encoding="UTF-8"?>
     <projectDescription>
     	<name>CQL2LSARCP</name>
     	<comment></comment>
     	<projects>
     	</projects>
     	<buildSpec>
     		<buildCommand>
     			<name>org.eclipse.jdt.core.javabuilder</name>
     			<arguments>
     			</arguments>
     		</buildCommand>
     		<buildCommand>
     			<name>org.eclipse.pde.ManifestBuilder</name>
     			<arguments>
     			</arguments>
     		</buildCommand>
     		<buildCommand>
     			<name>org.eclipse.pde.SchemaBuilder</name>
     			<arguments>
     			</arguments>
     		</buildCommand>
     	</buildSpec>
     	<natures>
     		<nature>org.eclipse.pde.PluginNature</nature>
     		<nature>org.eclipse.jdt.core.javanature</nature>
     	</natures>
     </projectDescription>

     package cql2lsarcp;
     import org.eclipse.jface.resource.ImageDescriptor;
     import org.eclipse.ui.plugin.AbstractUIPlugin;
     import org.osgi.framework.BundleContext;
     /**
      * The activator class controls the plug-in life cycle
      */
     public class Activator extends AbstractUIPlugin {
     	// The plug-in ID
     	public static final String PLUGIN_ID = "CQL2LSA"; //$NON-NLS-1$
     	// The shared instance
     	private static Activator plugin;
     	/**
     	 * The constructor
     	 */
     	public Activator() {
+    	}
     	/*
     	 * (non-Javadoc)
     	 * @see org.eclipse.ui.plugin.AbstractUIPlugin#start(org.osgi.framework.BundleContext)
     	 */
     	public void start(BundleContext context) throws Exception {
     		super.start(context);
     		plugin = this;
+    	}
     	/*
     	 * (non-Javadoc)
     	 * @see org.eclipse.ui.plugin.AbstractUIPlugin#stop(org.osgi.framework.BundleContext)
     	 */
     	public void stop(BundleContext context) throws Exception {
     		plugin = null;
     		super.stop(context);
+    	}
     	/**
     	 * Returns the shared instance
+    	 *
     	 * @return the shared instance
     	 */
     	public static Activator getDefault() {
     		return plugin;
+    	}
     	/**
     	 * Returns an image descriptor for the image file at the given
     	 * plug-in relative path
+    	 *
     	 * @param path the path
     	 * @return the image descriptor
     	 */
     	public static ImageDescriptor getImageDescriptor(String path) {
     		return imageDescriptorFromPlugin(PLUGIN_ID, path);
+    	}
+    }

     package org.txm.functions.cql2lsa;
     import java.io.BufferedReader;
     import java.io.File;
     import java.io.FileInputStream;
     import java.io.IOException;
     import java.io.InputStreamReader;
     import java.util.ArrayList;
     import java.util.HashMap;
     import org.txm.core.results.TXMResult;
     import org.txm.functions.ProgressWatcher;
     import org.txm.functions.TXMCommand;
     import org.txm.searchengine.cqp.CQPEngine;
     import org.txm.searchengine.cqp.MemCqiClient;
     import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
     import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
     import org.txm.searchengine.cqp.corpus.Corpus;
     import org.txm.searchengine.cqp.corpus.QueryResult;
     import org.txm.searchengine.cqp.corpus.Subcorpus;
     import org.txm.searchengine.cqp.corpus.query.Query;
     import org.txm.searchengine.cqp.serverException.CqiServerError;
     public class ContextSubcorpus extends TXMCommand {
     	protected ArrayList<QueryResult> results = new ArrayList<QueryResult>();
     	protected HashMap<String, String> keywordQueriestoName = new HashMap<String, String>();
     	protected Corpus corpus;
     	protected File keywordsFile;
     	protected  int contextSize;
     	public ContextSubcorpus(Corpus corpus, File keywordsFile, int contextSize) {
     		this.corpus = corpus;
     		this.keywordsFile = keywordsFile;
     		this.contextSize = contextSize;
+    	}
     	@Override
     	public boolean toTxt(File outfile, String encoding, String colseparator,
     			String txtseparator) throws Exception {
     		return false;
+    	}
     	@Override
     	public void clean() {
+    	}
     	@Override
     	public boolean delete() {
     		return false;
+    	}
     	@Override
     	public TXMResult getParent() {
     		return corpus;
+    	}
     	public Subcorpus getSubcorpus() throws InvalidCqpIdException {
     		if (results.size() > 0) {
     			String name = keywordsFile.getName();
     			int idx = name.indexOf(".");
     			if (idx > 0) name = name.substring(0, idx);
     			Subcorpus sub = corpus.createSubcorpus(name+"_contexts", results.get(0));
     			corpus.addResult(sub);
     			return sub;
+    		}
     		return null;
+    	}
     	@Override
     	public boolean compute(ProgressWatcher monitor) throws IOException, CqiClientException, CqiServerError, InvalidCqpIdException {
     if (!(CQPEngine.getCqiClient() instanceof MemCqiClient));
     		MemCqiClient CQI = (MemCqiClient) CQPEngine.getCqiClient();
     		BufferedReader keywordFileReader = new BufferedReader(new InputStreamReader(
     				new FileInputStream(keywordsFile), "UTF-8")); //$NON-NLS-1$
     		ArrayList<String> keywordLines = new ArrayList<String>();
     		String l = keywordFileReader.readLine();
     		while (l != null) {
     			keywordLines.add(l);
     			l = keywordFileReader.readLine();
+    		}
     		keywordFileReader.close();
     		System.out.println("Number of keywords lines: "+keywordLines.size());
     		System.out.println("context left and right size is: "+contextSize);
     		monitor.beginTask("Querying keywords...", keywordLines.size());
     		int nkeyword = 0;
     		for (String line : keywordLines) {
     			String[] split = line.split("=", 2); //$NON-NLS-1$
     			if (split.length == 2) {
     				Query q = new Query(split[1]+" expand to "+contextSize);
     				keywordQueriestoName.put(split[1], split[0]);
     				results.add(corpus.query(q, "K"+nkeyword++, false));
+    			}
+    		}
     		keywordFileReader.close();
     		// Loop over QueryResult to Merge them into one subcorpus
     		int n = 0;
     		monitor.beginTask("Creating subcorpus...", results.size());
     		while(results.size() > 1) {
     			QueryResult q1 = results.get(0);
     			QueryResult q2 = results.get(1);
     			//System.out.println("Mergin... "+q1+" "+q2);
     			//System.out.println("match sizes "+q1.getNMatch()+" "+q2.getNMatch());
     			String merge_name = "Merge"+(n++);
     			CQI.query(merge_name+"= union "+q1.getQualifiedCqpId()+" "+q2.getQualifiedCqpId()+";");
     			results.remove(0);
     			results.remove(0);
     			results.add(new QueryResult(merge_name, merge_name, corpus, null));
     			monitor.worked(1);
+    		}
     		System.out.println("Done.");
     		return true;
+    	}
     	@Override
     	public String getName() {
     		// TODO Auto-generated method stub
     		return null;
+    	}
     	@Override
     	public String getSimpleName() {
     		// TODO Auto-generated method stub
     		return null;
+    	}
     	@Override
     	public String getDetails() {
     		// TODO Auto-generated method stub
     		return null;
+    	}
+    }

     package org.txm.functions.cql2lsa;
     import java.io.BufferedReader;
     import java.io.BufferedWriter;
     import java.io.File;
     import java.io.FileInputStream;
     import java.io.FileOutputStream;
     import java.io.IOException;
     import java.io.InputStreamReader;
     import java.io.OutputStreamWriter;
     import java.util.Arrays;
     import org.txm.core.results.TXMResult;
     import org.txm.functions.ProgressWatcher;
     import org.txm.functions.TXMCommand;
     import org.txm.searchengine.cqp.CQPEngine;
     import org.txm.searchengine.cqp.MemCqiClient;
     import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
     import org.txm.searchengine.cqp.corpus.Corpus;
     import org.txm.searchengine.cqp.corpus.Property;
     import org.txm.searchengine.cqp.corpus.QueryResult;
     import org.txm.searchengine.cqp.corpus.StructuralUnitProperty;
     import org.txm.searchengine.cqp.corpus.query.Match;
     import org.txm.searchengine.cqp.corpus.query.Query;
     import org.txm.searchengine.cqp.serverException.CqiServerError;
     /**
      * Simple index with no multiple properties
+     *
      * @author mdecorde
+     *
      */
     public class ValuesOfQuery extends TXMCommand {
     	Corpus corpus;
     	Query query;
     	Property prop;
     	int fmin;
     	public ValuesOfQuery(Corpus corpus, Query query, Property prop, int fmin)  {
     		this.corpus = corpus;
     		this.query = query;
     		this.prop = prop;
     		this.fmin = fmin;
+    	}
     	public boolean compute(File outputFile) throws CqiClientException, IOException, CqiServerError {
     		if (!(CQPEngine.getCqiClient() instanceof MemCqiClient)) return false;
     		monitor.beginTask("Start querying...", 100);
     		File file = File.createTempFile("query", ".txt");
     		//ExecTimer t = new ExecTimer();
     		//System.out.println("run query "+query);t.start();
     		QueryResult result = corpus.query(query, "ValuesOf", false);
     		//System.out.println("query done"+t.stop());t.start();
     		//System.out.println("group query "+query+" and save in "+file.getAbsolutePath());t.start();
     		MemCqiClient cli = (MemCqiClient)CQPEngine.getCqiClient();
     		cli.query("group "+result.getQualifiedCqpId()+" match "+prop.getName()+" > \""+file+"\";");
     		//System.out.println("query done"+t.stop());t.start();
     		monitor.worked(50);
     		if (!file.exists()) return false;
     		BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
     		String line = reader.readLine(); // "#---------------------------------"
     		line = reader.readLine();        // "(none)          word     \t   freq"
     		BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8"));
     		while (line != null) {
     			line = line.substring(30);
     //			int len = line.length();
     //
     			int i = line.indexOf('\t');
     //			while (line.charAt(i) == ' ') i++;
     //			int i_start = i;
     //			while (line.charAt(i) != ' ') i++;
     //			String w = line.substring(i_start, i);
     //
     //			while (line.charAt(i) == ' ') i++;
     //			i_start = i;
     //			while (line.charAt(i) != ' ' && i < len) i++;
     //			String f = line.substring(i_start, i);
     //
     //			System.out.println("'"+w+"'\t'"+f+"'");
     			int f = Integer.parseInt(line.substring(i).trim());
     			String s;
     			if (f > fmin) {
     				//System.out.println(line.substring(0, i).trim() + "\t"+f);
     				s = line.substring(0, i).trim();
     				writer.write(s+"=["+prop.getName()+"=\""+s+"\"]\n");
     			} else {
     				break;
+    			}
     			line = reader.readLine();
     			monitor.worked(1);
+    		}
     		reader.close();
     		writer.close();
     		System.out.println("Done printing queries in "+outputFile.getAbsolutePath());
     		return true;
+    	}
     	/**
     	 * Test purpose function.
     	 * Does not manage big corpus
+    	 *
     	 * @throws CqiClientException
     	 * @throws IOException
     	 * @throws CqiServerError
     	 */
     	public void test() throws CqiClientException, IOException, CqiServerError {
     		QueryResult result = corpus.query(query, "ValuesOf", false);
     		int[] positions = new int[result.getNMatch()];
     		int i = 0;
     		for (Match m : result.getMatches()) {
     			positions[i++] = m.getStart();
+    		}
     		String[] values = null;
     		if (prop instanceof StructuralUnitProperty) {
     			int[] structs = CQPEngine.getCqiClient().cpos2Struc(prop.getQualifiedName(), positions);
     			positions = null;
     			structs = uniquify(structs);
     			values = CQPEngine.getCqiClient().struc2Str(prop.getQualifiedName(), structs);
     			structs = null;
     		} else {
     			int[] indexes = CQPEngine.getCqiClient().cpos2Id(prop.getQualifiedName(), positions);
     			positions = null;
     			indexes =  uniquify(indexes);
     			values = CQPEngine.getCqiClient().id2Str(prop.getQualifiedName(), indexes);
     			indexes = null;
+    		}
     		System.out.println("Values: ");
     		for (String v : values) {
     			System.out.println(v);
+    		}
+    	}
     	/**
+    	 *
     	 * @param idx
     	 * @return the uniq valuesof the idx array
     	 */
     	public static int[] uniquify(int[] idx) {
     		int[] result = new int[idx.length];
     		int n = 0;
     		Arrays.sort(idx);
     		int previous = -1;
     		for (int i : idx) {
     			if (previous != i) {
     				result[n++] = i;
     				previous = i;
+    			}
+    		}
     		int[] final_result = new int[n];
     		System.arraycopy(result, 0, final_result, 0, n);
     		return final_result;
+    	}
     	@Override
     	public boolean toTxt(File outfile, String encoding, String colseparator,
     			String txtseparator) throws Exception {
     		// TODO Auto-generated method stub
     		return false;
+    	}
     	@Override
     	public void clean() {
     		// TODO Auto-generated method stub
+    	}
     	@Override
     	public boolean delete() {
     		// TODO Auto-generated method stub
     		return false;
+    	}
     	@Override
     	public TXMResult getParent() {
     		return corpus;
+    	}
     	@Override
     	public boolean compute(ProgressWatcher watcher) throws Exception {
     		// TODO Auto-generated method stub
     		return false;
+    	}
     	@Override
     	public String getName() {
     		return "ValuesOfQuery";
+    	}
     	@Override
     	public String getSimpleName() {
     		return "ValuesOfQuery";
+    	}
     	@Override
     	public String getDetails() {
     		return 	this.corpus.getName()+" "+this.query+" "+this.prop+" "+this.fmin;
+    	}
+    }

     package org.txm.functions.cql2lsa;
     import java.io.BufferedReader;
     import java.io.BufferedWriter;
     import java.io.File;
     import java.io.FileInputStream;
     import java.io.FileOutputStream;
     import java.io.IOException;
     import java.io.InputStreamReader;
     import java.io.OutputStreamWriter;
     import java.io.PrintWriter;
     import java.util.ArrayList;
     import java.util.Collection;
     import java.util.Collections;
     import java.util.Comparator;
     import java.util.HashMap;
     import java.util.LinkedHashMap;
     import java.util.List;
     import java.util.Map;
     import org.eclipse.core.runtime.IAdaptable;
     import org.eclipse.jface.resource.ImageDescriptor;
     import org.eclipse.ui.model.IWorkbenchAdapter;
     import org.txm.core.messages.TXMCoreMessages;
     import org.txm.core.results.TXMResult;
     import org.txm.functions.ProgressWatcher;
     import org.txm.functions.TXMCommand;
     import org.txm.index.core.functions.LineComparator.SortMode;
     import org.txm.index.core.messages.IndexCoreMessages;
     import org.txm.lexicaltable.core.functions.LexicalTable;
     import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl;
     import org.txm.rcp.Messages;
     import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
     import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
     import org.txm.searchengine.cqp.corpus.Corpus;
     import org.txm.searchengine.cqp.corpus.QueryResult;
     import org.txm.searchengine.cqp.corpus.query.Match;
     import org.txm.searchengine.cqp.corpus.query.Query;
     import org.txm.searchengine.cqp.serverException.CqiServerError;
     import org.txm.statsengine.r.core.exceptions.RWorkspaceException;
     import org.txm.utils.logger.Log;
     public class ExpII extends TXMCommand implements IAdaptable {
     	Corpus corpus;
     	List<String> texts;
     	int[] textBoundaries;
     	LinkedHashMap<String, QueryIndexLine> lines = new LinkedHashMap<String, QueryIndexLine>();
     	/** The writer. */
     	private OutputStreamWriter writer;
     	public ExpII(Corpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {
     		this.corpus = corpus;
     		texts = corpus.getTextsID();
     		textBoundaries = corpus.getTextEndLimits();
+    	}
     	public int getT() {
     		int t = 0;
     		for (QueryIndexLine line : lines.values()) {
     			t += line.getFrequency();
+    		}
     		return t;
+    	}
     	public int getFmin() {
     		int t = 999999999;
     		for (QueryIndexLine line : lines.values()) {
     			int f = line.getFrequency();
     			if (f < t) t = f;
+    		}
     		return t;
+    	}
     	public int getFmax() {
     		int t = 0;
     		for (QueryIndexLine line : lines.values()) {
     			int f = line.getFrequency();
     			if (f > t) t = f;
+    		}
     		return t;
+    	}
     	public int getV() {
     		return lines.values().size();
+    	}
     	public String getName() {
     		return corpus.getName();
+    	}
     	public List<String> getTextNames() {
     		return texts;
+    	}
     	public TXMResult getParent() {
     		return corpus;
+    	}
     	public Corpus getCorpus() {
     		return corpus;
+    	}
     	public Collection<QueryIndexLine> getLines() {
     		return lines.values();
+    	}
     	public LinkedHashMap<String, QueryIndexLine> getLinesHash() {
     		return lines;
+    	}
     	int multi = 1;
     	public void sortLines(SortMode mode, boolean revert) {
     		multi = 1;
     		if (revert) multi = -1;
     		List<Map.Entry<String, QueryIndexLine>> entries =
     				new ArrayList<Map.Entry<String, QueryIndexLine>>(lines.entrySet());
     		if (mode == SortMode.FREQUNIT) {
     			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
     				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
     					int ret = multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
     					if (ret == 0) {
     						return multi * a.getValue().getName().compareTo(b.getValue().getName());
+    					}
     					return ret;
+    				}
     			});
     		} else if (mode == SortMode.FREQ) {
     			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
     				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
     					return multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
+    				}
     			});
     		} else if (mode == SortMode.UNIT) {
     			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
     				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
     					return multi * a.getValue().getName().compareTo(b.getValue().getName());
+    				}
     			});
     		} else if (mode == SortMode.UNITFREQ) {
     			Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() {
     				public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){
     					int ret = multi * a.getValue().getName().compareTo(b.getValue().getName());
     					if (ret == 0) {
     						return multi * (a.getValue().getFrequency() - b.getValue().getFrequency());
+    					}
     					return ret;
+    				}
     			});
+    		}
     		LinkedHashMap<String, QueryIndexLine> sortedMap = new LinkedHashMap<String, QueryIndexLine>();
     		for (Map.Entry<String, QueryIndexLine> entry : entries) {
     			sortedMap.put(entry.getKey(), entry.getValue());
+    		}
     		lines = sortedMap;
+    	}
     	HashMap<String, int[]> keywordStartPositions;
     	HashMap<String, int[]> keywordEndPositions;
     	public void compute(File queriesFile, File keywordFile, File outputDir) throws CqiClientException, IOException, CqiServerError {
     		System.out.println("Starting ExpII");
     		File outputFile = new File(outputDir, "doc_word_freq_keyword_2.txt");
     		File lexiconFile = new File(outputDir, "lexicon_2.txt");
     		PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8")));
     		keywordStartPositions = new HashMap<String, int[]>();
     		keywordEndPositions = new HashMap<String, int[]>();
     		HashMap<String, String> keywordQueriestoName = new HashMap<String, String>();
     		int idx_lexicon_counter = 0;
     		// assos query et idx
     		// si queries est dans keywords alors, pas la peine de calculer keyword, parce que la query est un keyword
     		LinkedHashMap<String, Integer> idxLexicon = new LinkedHashMap<String, Integer>();
     		LinkedHashMap<String, int[]> keywordsMaxCountsLexicon = new LinkedHashMap<String, int[]>();
     		BufferedReader keywordFileReader = new BufferedReader(new InputStreamReader(
     				new FileInputStream(keywordFile), "UTF-8")); //$NON-NLS-1$
     		ArrayList<String> keywordLines = new ArrayList<String>();
     		String l = keywordFileReader.readLine();
     		while (l != null) {
     			keywordLines.add(l);
     			l = keywordFileReader.readLine();
+    		}
     		keywordFileReader.close();
     		System.out.println("Number of keywords lines: "+keywordLines.size());
     		monitor.beginTask("Querying keywords...", keywordLines.size());
     		int nkeyword = 0;
     		for (String line : keywordLines) {
     			String[] split = line.split("=", 2); //$NON-NLS-1$
     			if (split.length == 2) {
     				Query q = new Query(split[1]);
     				keywordQueriestoName.put(split[1], split[0]);
     				QueryResult result = corpus.query(q, "K"+nkeyword++, false); //$NON-NLS-1$
     				keywordStartPositions.put(split[1], result.getStarts());
     				keywordEndPositions.put(split[1], result.getEnds());
     				keywordsMaxCountsLexicon.put(split[1], new int[textBoundaries.length]);
     				idxLexicon.put(split[1], idx_lexicon_counter++);
     				monitor.worked(1);
+    			}
+    		}
     		keywordFileReader.close();
     		BufferedReader queriesFileReader = new BufferedReader(new InputStreamReader(new FileInputStream(queriesFile), "UTF-8")); //$NON-NLS-1$
     		ArrayList<String> lines = new ArrayList<String>();
     		l = queriesFileReader.readLine();
     		while (l != null) {
     			lines.add(l);
     			l = queriesFileReader.readLine();
+    		}
     		queriesFileReader.close();
     		System.out.println("Number of lemma lines: "+lines.size());
     		monitor.beginTask("Querying...", lines.size());
     		int nquery = 0;
     		for (String line : lines) {
     			String[] split = line.split("=", 2); //$NON-NLS-1$
     			if (split.length == 2) {
     				if (hasLine(split[0])) {
     					System.out.println(TXMCoreMessages.QueryIndex_2+line);
     				} else {
     					QueryResult result = corpus.query(new Query(split[1]), "Q"+nquery++, false); //$NON-NLS-1$
     					int[] starts = result.getStarts();
     					int[] ends =  result.getEnds();
     					int[] counts = new int[textBoundaries.length];
     					int count = 0;
     					int noText = 0;
     					int endOfCurrentText = textBoundaries[noText];
     					for (int i : starts) {
     						while (i >= endOfCurrentText) {
     							if (noText >= textBoundaries.length) break;
     							counts[noText] = count;
     							noText++;
     							if (noText >= textBoundaries.length) break;
     							endOfCurrentText = textBoundaries[noText];
     							counts[noText] = 0;
     							count = 0;
+    						}
     						count++;
+    					}
     					counts[noText] = count;
     					if (idxLexicon.containsKey(split[1])) { // the query is already computed !!
     						for (noText = 0 ; noText < textBoundaries.length ; noText++) {
     							if (counts[noText] > 0) {
     								//System.out.println("MCL-"+split[1]+"\t"+noText+"\t"+idxLexicon.get(split[1])+"\t"+counts[noText]+"\t"+idxLexicon.get(split[1]));
     								writer.println(noText+"\t"+idxLexicon.get(split[1])+"\t"+counts[noText]+"\t"+idxLexicon.get(split[1]));
+    							}
+    						}
     					} else { // test if match is covered by keywords
     						idxLexicon.put(split[1], idx_lexicon_counter++); // put Lemma query
     						//int max_sum_f = 0;
     						int[] max_f = new int[textBoundaries.length];
     						String[] max_key = new String[textBoundaries.length];
     						for (String key : keywordEndPositions.keySet()) {
     							int[] keyMaxValues = keywordsMaxCountsLexicon.get(key);
     							int[] f = covered(starts, ends, keywordStartPositions.get(key), keywordEndPositions.get(key));
     							for (noText = 0 ; noText < textBoundaries.length ; noText++) {
     								if (f[noText] > max_f[noText]) {
     									max_f[noText] = f[noText];
     									max_key[noText] = key;
+    								}
     								if (f[noText] > keyMaxValues[noText]) {
     									keyMaxValues[noText] = f[noText];
+    								}
+    							}
+    						}
     //						System.out.println("max_f="+Arrays.toString(counts));
     //						System.out.println("max_f="+Arrays.toString(max_f));
     						for (noText = 0 ; noText < textBoundaries.length ; noText++) {
     							int F = counts[noText] - max_f[noText];
     							if (F > 0) {
     								//System.out.println("L-"+split[1]+"\t"+noText+"\t"+idxLexicon.get(split[1])+"\t"+F+"\t0");
     								writer.println(noText+"\t"+idxLexicon.get(split[1])+"\t"+F+"\t0");
+    							}
+    						}
+    					}
+    				}
+    			}
     			monitor.worked(1);
+    		}
     		monitor.setMessage("Finalizing doc_word_freq_2.txt file...");
     		for (String key : keywordsMaxCountsLexicon.keySet()) {
     			int[] keyMaxValues = keywordsMaxCountsLexicon.get(key);
     			for (int noText = 0 ; noText < textBoundaries.length ; noText++) {
     				int F = keyMaxValues[noText];
     				if (F > 0) {
     					//System.out.println("MC-"+key+"\t"+noText+"\t"+idxLexicon.get(key)+"\t"+F+"\t"+idxLexicon.get(key));
     					writer.println(noText+"\t"+idxLexicon.get(key)+"\t"+F+"\t"+idxLexicon.get(key));
+    				}
+    			}
+    		}
     		queriesFileReader.close();
     		writer.close();
     		monitor.setMessage("Writing lexicon file...");
     		writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lexiconFile), "UTF-8")));
     		for (String query : idxLexicon.keySet()) {
     			String name = keywordQueriestoName.get(query);
     			if (name != null) {
     				writer.println(keywordQueriestoName.get(query));
     			} else {
     				writer.println(query);
+    			}
+    		}
     		writer.close();
     		System.out.println("Done, result saved in: \n - "+outputFile.getAbsolutePath()+"\n - "+lexiconFile.getAbsolutePath());
+    	}
     	/**
     	 * Write all the lines on a writer.
+    	 *
     	 * @param outfile the outfile
     	 * @param encoding the encoding
     	 * @param colseparator the colseparator
     	 * @param txtseparator the txtseparator
     	 * @return true, if successful
     	 */
     	public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) {
     		try {
     			toTxt(outfile, 0, lines.size(), encoding, colseparator, txtseparator);
     		} catch (Exception e) {
     			System.err.println(IndexCoreMessages.Index_7 + Log.toString(e));
     			return false;
+    		}
     		return true;
+    	}
     	/**
     	 * Write the lines between from and to on a writer.
+    	 *
     	 * @param outfile the outfile
     	 * @param from The first line to be written
     	 * @param to The last line to be writen
     	 * @param encoding the encoding
     	 * @param colseparator the colseparator
     	 * @param txtseparator the txtseparator
     	 * @throws CqiClientException the cqi client exception
     	 * @throws IOException Signals that an I/O exception has occurred.
     	 */
     	public void toTxt(File outfile, int from, int to, String encoding, String colseparator, String txtseparator)
     			throws CqiClientException, IOException {
     		// NK: writer declared as class attribute to perform a clean if the operation is interrupted
     		this.writer = new OutputStreamWriter(new FileOutputStream(outfile),
     				encoding);
     		String header = "Queries"; //$NON-NLS-1$
     		header = txtseparator+ header.substring(0, header.length() - 1) +txtseparator;
     		header += colseparator+ txtseparator+ "F" + txtseparator; //$NON-NLS-1$
     		for (String t : texts)
     			header += colseparator + txtseparator+ t.replace(txtseparator, txtseparator+txtseparator)+txtseparator;
     		header += "\n"; //$NON-NLS-1$
     		writer.write(header);
     		// for(Line ligne: lines)
     		for (String name : lines.keySet()) {
     			QueryIndexLine ligne = lines.get(name);
     			writer.write(txtseparator+ ligne.getName().replace(txtseparator, txtseparator+txtseparator)+ txtseparator + colseparator + ligne.getFrequency());
     			for (int j = 0; j < texts.size(); j++)
     				writer.write(colseparator + ligne.getFrequency(j));
     			writer.write("\n"); //$NON-NLS-1$
+    		}
     		writer.flush();
     		writer.close();
+    	}
     	/**
     	 * Only one query result and uses texts boundaries to count frequencies for each text
     	 * @param name
     	 * @param query
     	 * @return
     	 * @throws CqiClientException
     	 */
     	public QueryIndexLine addLine(String name, Query query) throws CqiClientException {
     		if (lines.containsKey(name)) return null;
     		QueryResult qresult = corpus.query(query, "tmp", true); //$NON-NLS-1$
     		//System.out.println(query.toString()+" "+qresult.getNMatch());
     		int[] counts = new int[textBoundaries.length];
     		int count = 0;
     		int noText = 0;
     		int endOfCurrentText = textBoundaries[noText];
     		for (Match m : qresult.getMatches()) {
     			while (m.getStart() >= endOfCurrentText) {
     				//System.out.println(m.getStart() +">="+endOfCurrentText);
     				if (noText >= textBoundaries.length) break;
     				//System.out.println("Text: "+texts.get(noText)+" count="+count+" notext="+noText);
     				counts[noText] = count;
     				noText++;
     				if (noText >= textBoundaries.length) break;
     				endOfCurrentText = textBoundaries[noText];
     				counts[noText] = 0;
     				count = 0;
+    			}
     			count++;
+    		}
     		//System.out.println(noText +"<?"+textBoundaries.length+" count="+count);
     		if (noText < textBoundaries.length) // last text
     			counts[noText] = count;
     		qresult.drop();
     		QueryIndexLine line = new QueryIndexLine(name, query, null);
     		line.setFrequencies(counts);
     		lines.put(name, line);
     		return line;
+    	}
     	public LexicalTable toLexicalTable() {
     		int npart = texts.size();
     		int[][] freqs = new int[lines.size()][npart];
     		String[] rownames = new String[lines.size()];
     		String[] colnames = new String[npart];
     		for (int i = 0 ; i < lines.size() ; i++) {
     			QueryIndexLine line = lines.get(i);
     			int[] linefreqs = line.getFreqs();
     			rownames[i] = line.getName();
     			for (int j = 0 ; j < npart ; j++) {
     				freqs[i][j] = linefreqs[j];
+    			}
+    		}
     		int j = 0;
     		for (String t : texts) {
     			colnames[j] = t;
     			j++;
+    		}
     		try {
     			LexicalTable lt = new LexicalTable(corpus, corpus.getProperty("word"), new LexicalTableImpl(freqs, rownames, colnames));
     			return lt;
     		} catch (RWorkspaceException e) {
     			org.txm.utils.logger.Log.printStackTrace(e);
     		} catch (CqiClientException e) {
     			org.txm.utils.logger.Log.printStackTrace(e);
+    		}
     		return null;
+    	}
     	public boolean removeLine(String name) {
     		if (lines.containsKey(name)) {
     			this.acquireSemaphore();
     			lines.remove(name);
     			this.releaseSemaphore();
     			return true;
     		} else {
     			return false;
+    		}
+    	}
     	public boolean hasLine(String name) {
     		return lines.containsKey(name);
+    	}
     	public ArrayList<QueryIndexLine> getLines(int from, int to) {
     		if (lines.size() == 0) return new ArrayList<QueryIndexLine>();
     		if (from < 0) from = 0;
     		if (to < 0) to = 0;
     		if (to > lines.size()) to = lines.size();
     		if (from > to) from = to - 1;
     		ArrayList<QueryIndexLine> tmp = new ArrayList<QueryIndexLine>();
     		int i = 0;
     		for (QueryIndexLine line : lines.values()) {
     			if (i >= from && i < to) {
     				tmp.add(line);
+    			}
     			i++;
+    		}
     		return tmp;
+    	}
     	@Override
     	public boolean delete() {
     		if (corpus != null) return corpus.removeResult(this);
     		return false;
+    	}
     	@Override
     	public String[] getExportTXTExtensions() {
     		return new String[]{"*.csv"};
+    	}
     	@Override
     	public void clean() {
     		try {
     			this.writer.flush();
     			this.writer.close();
     		} catch (IOException e) {
     			// TODO Auto-generated catch block
     			org.txm.utils.logger.Log.printStackTrace(e);
+    		}
+    	}
     	@Override
     	public Object getAdapter(Class adapterType) {
     		if (adapterType == IWorkbenchAdapter.class)
     			return ExpIIAAdapter;
     		return null;
+    	}
     	/** The WordCloud adapter. */
     	private static IWorkbenchAdapter ExpIIAAdapter = new IWorkbenchAdapter() {
     		@Override
     		public Object[] getChildren(Object o) {
     			return new Object[0];
+    		}
     		@Override
     		public ImageDescriptor getImageDescriptor(Object object) {
     			return null;
+    		}
     		@Override
     		public String getLabel(Object o) {
     			return ((ExpII) o).getName();
+    		}
     		@Override
     		public Object getParent(Object o) {
     			return ((ExpII) o).getCorpus();
+    		}
     	};
     	private int[] covered(int[] starts, int[] ends, int[] keywordStarts, int[] keywordEnds) {
     		int[] counts = new int[textBoundaries.length];
     		int noText = 0;
     		int endOfCurrentText = textBoundaries[noText];
     		int f = 0;
     		//		printMatchs(starts, ends, keywordStarts, keywordEnds);
     		int i_keyword = 0;
     		//System.out.println("len i="+starts.length+ " len i_keyword="+keywordStarts.length);
     		for (int i = 0 ; i < starts.length ; ) {
     			//System.out.println("i="+i+ " i_keyword="+i_keyword);
     			if (i_keyword >= keywordStarts.length) break; // no more keyword positions
     			//System.out.println(""+starts[i]+"->"+ends[i]+" : "+keywordStarts[i_keyword]+"->"+keywordEnds[i_keyword]);
     			while (starts[i] >= endOfCurrentText) {
     				if (noText >= textBoundaries.length) break;
     				counts[noText] = f;
     				noText++;
     				if (noText >= textBoundaries.length) break;
     				endOfCurrentText = textBoundaries[noText];
     				counts[noText] = 0;
     				f = 0;
+    			}
     			if (starts[i] < keywordStarts[i_keyword]) {
     				//System.out.println(" match start is not covered");
     				i++;
     			} else if (starts[i] > keywordEnds[i_keyword]) {
     				//System.out.println(" next keyword");
     				i_keyword++; // see next keyword match
     			} else if (ends[i] <= keywordEnds[i_keyword]) {
     				//System.out.println(" next match");
     				//System.out.println(">>>> "+starts[i]+"->"+ends[i]+" : "+keywordStarts[i_keyword]+"->"+keywordEnds[i_keyword]);
     				i++; // OK, test next match
     				f++;
     			} else {
     				//System.out.println(" match end is not covered");
     				i++; // OK, test next match
+    			}
+    		}
     		counts[noText] = f;
     		return counts;
+    	}
     	private static void printMatchs(int[] starts, int[] ends, int[] keywordStarts, int[] keywordEnds) {
     		int min = 999999999;
     		int max = 0;
     		for (int i : starts) if (i < min) min = i;
     		for (int i : keywordStarts) if (i < min) min = i;
     		for (int i : ends) if (i > max) max = i;
     		for (int i : keywordEnds) if (i > max) max = i;
     		for (int i = min ; i <= max ; i++) System.out.print(""+i+"\t");
     		System.out.println();
     		int j = 0;
     		int k = 0;
     		boolean inout = false;
     		for (int i = min ; i <= max ; i++) {
     			if (j < starts.length && starts[j] == i) {
     				if (k < ends.length && ends[k] == i) {
     					System.out.print(""+starts[j++]+"><\t");
     					k++;
     				} else {
     					System.out.print(""+starts[j++]+">\t");
     					inout = true;
+    				}
     			} else if (k < ends.length && ends[k] == i) {
     				System.out.print("<"+ends[k++]+"\t");
     				inout = false;
     			} else {
     				if (inout) {
     					System.out.print("-\t");
     				} else {
     					System.out.print("\t");
+    				}
+    			}
+    		}
     		System.out.println();
     		j = 0;
     		k = 0;
     		inout = false;
     		for (int i = min ; i <= max ; i++) {
     			if (j < keywordStarts.length && keywordStarts[j] == i) {
     				if (k < keywordEnds.length && keywordEnds[k] == i) {
     					System.out.print(""+keywordStarts[j++]+"><\t");
     					k++;
     				} else {
     					System.out.print(""+keywordStarts[j++]+">\t");
     					inout = true;
+    				}
     			} else if (k < keywordEnds.length && keywordEnds[k] == i) {
     				System.out.print("<"+keywordEnds[k++]+"\t");
     				inout = false;
     			} else {
     				if (inout) {
     					System.out.print("-\t");
     				} else {
     					System.out.print("\t");
+    				}
+    			}
+    		}
     		System.out.println();
+    	}
     	public static void main(String[] args) {
     		//		int starts[] = {1, 10, 16, 24};
     		//		int ends[] = {2, 12, 16, 30};
     		//		int kstarts[] = {5, 9, 16, 22};
     		//		int kends[] = {8, 12, 16, 35};
     		//		System.out.println(covered(starts, ends, kstarts, kends));
     		//		int starts[] = {10, 15 ,24, 50};
     		//		int ends[] = {12, 16 ,30, 60};
     		//		int kstarts[] = {5, 9, 14, 20};
     		//		int kends[] = {8, 12, 18, 40};
     		//		System.out.println(covered(starts, ends, kstarts, kends));
     		//		int starts[] = {1, 10, 15 ,24};
     		//		int ends[] = {2, 12, 16 ,30};
     		//		int kstarts[] = {5, 9, 14, 20};
     		//		int kends[] = {8, 12, 18, 40};
     		//		System.out.println(covered(starts, ends, kstarts, kends));
     		//		int starts[] = {4, 10, 16};
     		//		int ends[] = {5, 11, 17};
     		//		int kstarts[] = {3, 11, 15};
     		//		int kends[] = {8, 13, 20};
     		//		System.out.println(covered(starts, ends, kstarts, kends));
     		int starts[] = {4, 10, 16, 20};
     		int ends[] = {5, 11, 17, 22};
     		int kstarts[] = {3, 11, 15};
     		int kends[] = {8, 13, 20};
     		//System.out.println(covered(starts, ends, kstarts, kends));
+    	}
     	@Override
     	public boolean compute(ProgressWatcher watcher) throws Exception {
     		System.out.println("ExpII.compute(monitor): not implemented.");
     		return false;
+    	}
     	@Override
     	public String getSimpleName() {
     		return "ExpII";
+    	}
     	@Override
     	public String getDetails() {
     		return texts.toString();
+    	}
+    }

tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/package.html (revision 468)
	1	<html>
	2	<body>
	3	<p>Prototype of Query index.</p>
	4	</body>
	5	</html>
0	6

     package org.txm.functions.cql2lsa;
     import java.io.BufferedReader;
     import java.io.File;
     import java.io.FileInputStream;
     import java.io.FileOutputStream;
     import java.io.IOException;
     import java.io.InputStreamReader;
     import java.io.OutputStreamWriter;
     import java.util.ArrayList;
     import java.util.Collection;
     import java.util.Collections;
     import java.util.Comparator;
     import java.util.LinkedHashMap;
     import java.util.List;
     import java.util.Map;
     import org.eclipse.core.runtime.IAdaptable;
     import org.eclipse.jface.resource.ImageDescriptor;
     import org.eclipse.ui.model.IWorkbenchAdapter;
     import org.txm.core.messages.TXMCoreMessages;
     import org.txm.functions.ProgressWatcher;
     import org.txm.functions.TXMCommand;
     import org.txm.index.core.functions.LineComparator.SortMode;
     import org.txm.index.core.messages.IndexCoreMessages;
     import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl;
     import org.txm.rcp.IImageKeys;
     import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
     import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
     import org.txm.searchengine.cqp.corpus.Corpus;
     import org.txm.searchengine.cqp.corpus.QueryResult;
     import org.txm.searchengine.cqp.corpus.query.Match;
     import org.txm.searchengine.cqp.corpus.query.Query;
     import org.txm.searchengine.cqp.serverException.CqiServerError;
     import org.txm.statsengine.r.core.exceptions.RWorkspaceException;
     import org.txm.utils.logger.Log;
     public class ExpI extends TXMCommand implements IAdaptable {
     	Corpus corpus;
     	List<String> texts;
     	int[] textBoundaries;
     	LinkedHashMap<String, QueryIndexLine> lines = new LinkedHashMap<String, QueryIndexLine>();
     	/** The writer. */
     	private OutputStreamWriter writer;
     	public ExpI(Corpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException {

... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff

Laboratoire ICAR » Plateforme TXM

Révision 468