/ - Diff - Plateforme TXM - Forge du Centre Blaise Pascal

     package org.txm.macro.pdf
     // STANDARD DECLARATIONS
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.rcp.swt.widget.parameters.*
     import org.apache.pdfbox.Loader
     import org.apache.pdfbox.rendering.PDFRenderer
     import org.apache.pdfbox.tools.imageio.ImageIOUtil
     import org.apache.pdfbox.rendering.ImageType
     // BEGINNING OF PARAMETERS
     @Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
     def input_file
     @Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="")
     def input_dir
     // Open the parameters input dialog box
     if (!ParametersDialog.open(this)) return
     // END OF PARAMETERS
     if (input_dir != null &&  input_dir.exists()) {
     	nFiles = 0
     	input_dir.eachFileMatch(~/.*.pdf/) { f ->
     		name = f.getName()
     		idx = name.lastIndexOf(".")
     		if (idx > 0) name = name.substring(0, idx)
     		dir = f.getParentFile()
     		println "Processing "+name+"..."
     		pdfFile = f.getAbsolutePath()
     		if (pdfFile.toUpperCase().endsWith(".PDF")) {
     			textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
     			try {
     				doc = Loader.loadPDF(f)
     				pdfRenderer = new PDFRenderer(doc)
     				for (page = 0; page < doc.getNumberOfPages(); ++page) {
         					bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB)
         					fos = new FileOutputStream(new File(dir, name + "-" + (page+1) + ".png"))
     					ImageIOUtil.writeImage(bim, "png", fos, 300)
+    				}
     				fos.close()
     				doc.close()
     				nFiles++
     			} catch (Exception e) {
     				e.printStackTrace()
+    			}
+    		}
+    	}
     	println "Processed "+nFiles+" files."
     } else {
     	name = input_file.getName()
     	dir = input_file.getParentFile()
     	idx = name.lastIndexOf(".")
     	if (idx > 0) name = name.substring(0, idx)
     	pdfFile = input_file.getAbsolutePath()
     	if (pdfFile.toUpperCase().endsWith(".PDF")) {
     		textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
     		try {
     			doc = Loader.loadPDF(input_file)
     			pdfRenderer = new PDFRenderer(doc)
     			for (page = 0; page < doc.getNumberOfPages(); ++page) {
         				bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB)
         				fos = new FileOutputStream(new File(dir, name + "-" + (page+1) + ".png"))
     				ImageIOUtil.writeImage(bim, "png", fos, 300)
+    			}
     			fos.close()
     			doc.close()
     		} catch (Exception e) {
     			e.printStackTrace()
+    		}
+    	}
+    }

     package org.txm.macro.pdf
     // STANDARD DECLARATIONS
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.rcp.swt.widget.parameters.*
     import org.apache.pdfbox.Loader
     import org.apache.pdfbox.text.PDFTextStripper
     // BEGINNING OF PARAMETERS
     @Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
     def input_file
     @Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="")
     def input_dir
     // Open the parameters input dialog box
     if (!ParametersDialog.open(this)) return
     // END OF PARAMETERS
     if (input_dir != null &&  input_dir.exists()) {
     	nFiles = 0
     	println "File\tAuthor\tTitle\tSubject\tCreator\tProducer\tCreation date\tModification date\tKeywords\tPages"
     	input_dir.eachFileMatch(~/.*.pdf/) { f ->
     		pdfFile = f.getAbsolutePath()
     		if (pdfFile.toUpperCase().endsWith(".PDF")) {
     			try {
     				doc = Loader.loadPDF(f)
     				pdd = doc.getDocumentInformation()
           				print "\t\""+ ((pdd.getAuthor()==null)?"":pdd.getAuthor())+"\""
           				print "\t\""+ ((pdd.getTitle()==null)?"":pdd.getTitle())+"\""
           				print "\t\""+ ((pdd.getSubject()==null)?"":pdd.getSubject())+"\""
           				print "\t\""+ ((pdd.getCreator()==null)?"":pdd.getCreator())+"\""
           				print "\t\""+ ((pdd.getProducer()==null)?"":pdd.getProducer())+"\""
           				print "\t"+ ((pdd.getCreationDate()==null)?"":pdd.getCreationDate().toZonedDateTime().toInstant())
           				print "\t"+ ((pdd.getModificationDate()==null)?"":pdd.getModificationDate().toZonedDateTime().toInstant())
           				print "\t\""+ ((pdd.getKeywords()==null)?"":pdd.getKeywords())+"\""
           				println "\t"+ doc.getNumberOfPages()
           				doc.close()
     				nFiles++
     			} catch (Exception e) {
     			// e.printStackTrace()
+    			}
+    		}
+    	}
     	println "Processed "+nFiles+" files."
     } else {
     	pdfFile = input_file.getAbsolutePath()
     	if (pdfFile.toUpperCase().endsWith(".PDF")) {
     		try {
     			doc = Loader.loadPDF(input_file)
     			pdd = doc.getDocumentInformation()
           			println "Author: "+((pdd.getAuthor()==null)?"":pdd.getAuthor())
           			println "Title: "+ ((pdd.getTitle()==null)?"":pdd.getTitle())
           			println "Subject: "+ ((pdd.getSubject()==null)?"":pdd.getSubject())
           			println "Creator: "+ ((pdd.getCreator()==null)?"":pdd.getCreator())
           			println "Producer: "+ ((pdd.getProducer()==null)?"":pdd.getProducer())
           			println "Creation date: "+ ((pdd.getCreationDate()==null)?"":pdd.getCreationDate().toZonedDateTime().toInstant())
           			println "Modification date: "+ ((pdd.getModificationDate()==null)?"":pdd.getModificationDate().toZonedDateTime().toInstant())
           			println "Keywords: "+ ((pdd.getKeywords()==null)?"":pdd.getKeywords())
           			println "Pages: "+ doc.getNumberOfPages()
     			doc.close()
     		} catch (Exception e) {
     			// e.printStackTrace()
+    		}
+    	}
+    }

     import org.apache.pdfbox.Loader
     import org.apache.pdfbox.text.PDFTextStripper
     // WARNING THIS MACRO NEEDS THE INSTALLATION OF THE PDFBOX (https://pdfbox.apache.org) Java library: https://dlcdn.apache.org/pdfbox/3.0.0-alpha3/pdfbox-app-3.0.0-alpha3.jar (standalone library)
     // BEGINNING OF PARAMETERS
     @Field @Option(name="input_file", usage=".pdf input file", widget="File", required=true, def="")
     		def input_file
     @Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
     def input_file
     @Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="")
     		def input_dir
     def input_dir
     // Open the parameters input dialog box
     if (!ParametersDialog.open(this)) return
     	// END OF PARAMETERS
     // END OF PARAMETERS
     	if (input_dir != null && input_dir.exists() && input_dir.isDirectory()) {
     		nFiles = 0
     		input_dir.eachFileMatch(~/.*.pdf/) { f ->
     			name = f.getName()
     			println "Processing $name..."
     			idx = name.lastIndexOf(".")
     			if (idx > 0) name = name.substring(0, idx)
     			outputFile = new File(f.getParentFile(), name + ".txt")
     			pdfFile = f.getAbsolutePath()
     			if (pdfFile.toUpperCase().endsWith(".PDF")) {
     				textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
     				try {
     					outputFile.withWriter("UTF-8") { writer ->
     						doc = Loader.loadPDF(f)
     						strip = new PDFTextStripper()
     						writer.print strip.getText(doc)
     						doc.close()
     						writer.close()
     						nFiles++
+    					}
     				} catch (Exception e) {
     					e.printStackTrace()
+    				}
+    			}
+    		}
     		println "Done processed $nFiles files."
     	} else if (input_file != null && input_file.exists() && input_file.isFile()) {
     		name = input_file.getName()
     if (input_dir != null &&  input_dir.exists()) {
     	nFiles = 0
     	input_dir.eachFileMatch(~/.*.pdf/) { f ->
     		name = f.getName()
     		println "Processing "+name+"..."
     		idx = name.lastIndexOf(".")
     		if (idx > 0) name = name.substring(0, idx)
     		outputFile = new File(input_file.getParentFile(), name + ".txt")
     		pdfFile = input_file.getAbsolutePath()
     		outputFile = new File(f.getParentFile(), name + ".txt")
     		pdfFile = f.getAbsolutePath()
     		if (pdfFile.toUpperCase().endsWith(".PDF")) {
     			textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
     			try {
     				outputFile.withWriter("UTF-8") { writer ->
     					doc = Loader.loadPDF(input_file)
     					strip = new PDFTextStripper()
     					writer.print strip.getText(doc)
     					doc.close()
     					writer.close()
     				doc = Loader.loadPDF(f)
     				strip = new PDFTextStripper()
     				strip.setSortByPosition(true)
     				strip.setPageStart("<pb/>")
     				// strip.setParagraphStart("\n<p>")
     				// strip.setParagraphEnd("</p>")
     				writer.print strip.getText(doc)
     				doc.close()
     				writer.close()
     				nFiles++
+    				}
     			} catch (Exception e) {
     				e.printStackTrace()
     			e.printStackTrace()
+    			}
+    		}
+    	}
     	println "Processed "+nFiles+" files."
     } else {
     	name = input_file.getName()
     	idx = name.lastIndexOf(".")
     	if (idx > 0) name = name.substring(0, idx)
     	outputFile = new File(input_file.getParentFile(), name + ".txt")
     	pdfFile = input_file.getAbsolutePath()
     	if (pdfFile.toUpperCase().endsWith(".PDF")) {
     		textFile = pdfFile.substring(0, pdfFile.length() - 3) + "txt"
     		try {
     			outputFile.withWriter("UTF-8") { writer ->
     			doc = Loader.loadPDF(input_file)
     			strip = new PDFTextStripper()
     			println "SpacingTolerance = "+strip.getSpacingTolerance()
     			// Set the space width-based tolerance value that is used to estimate where spaces in text should be added. Note that the default value for this has been determined from trial and error. Setting this value larger will reduce the number of spaces added.
     			println "AverageCharTolerance = "+strip.getAverageCharTolerance()
     			// Set the character width-based tolerance value that is used to estimate where spaces in text should be added. Note that the default value for this has been determined from trial and error. Setting this value larger will reduce the number of spaces added.
     			println "IndentThreshold = "+strip.getIndentThreshold()
     			// sets the multiple of whitespace character widths for the current text which the current line start can be indented from the previous line start beyond which the current line start is considered to be a paragraph start. The default value is 2.0.
     			println "DropThreshold = "+strip.getDropThreshold()
     			// sets the minimum whitespace, as a multiple of the max height of the current characters beyond which the current line start is considered to be a paragraph start. The default value is 2.5.
     			strip.setSortByPosition(true)
     			strip.setPageStart("<pb/>")
     			// strip.setParagraphStart("\n<p>")
     			// strip.setParagraphEnd("</p>")
     			writer.print strip.getText(doc)
     			doc.close()
     			writer.close()
+    			}
     		} catch (Exception e) {
     			e.printStackTrace()
+    		}
+    	}
+    }

     // Copyright © 2022 MY_INSTITUTION
     // Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html)
     // @author mdecorde
     // STANDARD DECLARATIONS
     package org.txm.macro
     import org.kohsuke.args4j.*
     import groovy.transform.Field
     import org.txm.rcp.swt.widget.parameters.*
     import org.txm.Toolbox;
     import org.txm.utils.io.FileCopy
     import org.txm.rcp.views.fileexplorer.MacroExplorer
     // BEGINNING OF PARAMETERS
     @Field @Option(name="files", usage="an example file", widget="FilesOpen", metaVar="*.groovy;*.jar", required=true, def="")
     def files
     // Open the parameters input dialog box
     if (!ParametersDialog.open(this)) return
     // END OF PARAMETERS
     File rootDir = new File(Toolbox.getTxmHomePath(), "scripts/groovy/")
     for (File f : files) {
     	if (!f.isFile()) continue;
     	if (f.getName().endsWith(".jar")) {
     		File f2 = new File(rootDir, "lib/"+f.getName())
     		if (f2.exists()) {
     			println "Updating library $f2"
     			f2.delete()
     		} else {
     			println "New library: $f2"
+    		}
     		FileCopy.copy(f, f2)
     	} else if (f.getName().endsWith(".groovy")) {
     		String ppackage = "org.txm.macro"
     		def content = f.readLines("UTF-8")
     		for (def line : content) {
     			if (line.startsWith("package org.txm.macro")) {
     				ppackage = line.substring(8)
     				break
+    			}
+    		}
     		File f2 = new File(rootDir, "user/"+ppackage.replace(".", "/")+ "/"+ f.getName())
     		if (f2.exists()) {
     			println "Update macro: $f2"
     			f2.delete()
     		} else {
     			println "New macro: $f2"
+    		}
     		f2.getParentFile().mkdirs()
     		FileCopy.copy(f, f2)
     	} else {
     		println "Ignoring $f"
+    	}
+    }
     // update macros view
     monitor.syncExec(new Runnable() {
     		public void run() {
     			MacroExplorer.refresh();
+    		}
     	});

TXM/trunk/org.txm.groovy.core/src/java/org/txm/groovy/core/GSERunner.java (revision 3536)
8	8	import java.util.HashSet;
9	9	import java.util.Map;
10	10
11		import org.apache.commons.lang.StringUtils;
12	11	import org.codehaus.groovy.control.CompilerConfiguration;
13	12	import org.codehaus.groovy.control.customizers.ImportCustomizer;
14	13	import org.eclipse.core.internal.runtime.InternalPlatform;

Laboratoire ICAR » Plateforme TXM

Révision 3536