Révision 4026

TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDF2ImagesMacro.groovy (revision 4026)
1
package org.txm.macro.pdf
2
// STANDARD DECLARATIONS
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.apache.pdfbox.Loader
8
import org.apache.pdfbox.rendering.PDFRenderer
9
import org.apache.pdfbox.tools.imageio.ImageIOUtil
10
import org.apache.pdfbox.rendering.ImageType
11

  
12
// BEGINNING OF PARAMETERS
13

  
14
@Field @Option(name="input_file", usage=".pdf input file", widget="File", required=false, def="")
15
def input_file
16

  
17
@Field @Option(name="input_dir", usage="The directory containing the .pdf files to read", widget="Folder", required=false, def="")
18
def input_dir
19

  
20
@Field @Option(name="image_density", usage="image density (e.g. 300 for printer or 75 for screen)", widget="Integer", required=true, def="300")
21
def image_density
22

  
23
@Field @Option(name="image_format", usage="image format (jpg, gif, tiff or png)", widget="StringArray", required=true, metaVar="gif	jpg	png	tiff", def="png")
24
def image_format
25

  
26
// Open the parameters input dialog box
27
if (!ParametersDialog.open(this)) return
28

  
29
// END OF PARAMETERS
30

  
31
if (input_dir != null &&  input_dir.exists()) {
32
	nFiles = 0
33
	input_dir.eachFileMatch(~/.*.pdf/) { f ->
34
		if (f.getAbsolutePath().toUpperCase().endsWith(".PDF")) {
35
	 		processFile(f)
36
	 		nFiles++
37
		}
38
	}
39
	println "Processed "+nFiles+" files."
40
} else {
41
	if (input_file.getAbsolutePath().toUpperCase().endsWith(".PDF")) {
42
	 processFile(input_file)
43
	}
44
}
45

  
46
def processFile(input_file) {
47

  
48
	name = input_file.getName()
49
	dir = input_file.getParentFile()
50
	idx = name.lastIndexOf(".")
51
	if (idx > 0) name = name.substring(0, idx)
52

  
53
	pdfFile = input_file.getAbsolutePath()
54
	if (pdfFile.toUpperCase().endsWith(".PDF")) {
55
	
56
		println "Processing "+name+"..."
57
		
58
		try {
59
			doc = Loader.loadPDF(input_file)
60
			pdfRenderer = new PDFRenderer(doc)
61
			nPages = doc.getNumberOfPages()
62
			widthNPages = Math.log10(nPages)+1 as int
63
			for (page = 0; page < nPages; ++page) {
64
    				bim = pdfRenderer.renderImageWithDPI(page, image_density, ImageType.RGB)
65
    				fileName = sprintf("%s-%0"+widthNPages+"d.%s", name, page+1, image_format)
66
    				fos = new FileOutputStream(new File(dir, fileName))
67
						ImageIOUtil.writeImage(bim, image_format, fos, image_density)
68
						print "."
69
			}
70
			println " Done."
71
			fos.close()
72
			doc.close()
73
		} catch (Exception e) {
74
			e.printStackTrace()
75
		}
76
	}
77
}
78

  
79

  
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/pdf/PDF2TXTMacro.groovy (revision 4026)
21 21
A PDF writer could choose to write each character in a different order. By default PDFBox does not sort the text tokens before processing them due to performance reasons.""", widget="Boolean", required=true, def="false")
22 22
def set_sort_by_position
23 23

  
24
@Field @Option(name="page_break_symbol", usage="page break symbol at beginning of page (e.g. %%PB%% or <pb/>)", widget="String", required=true, def="%%PB%%")
25
def page_break_symbol
26

  
27

  
24 28
// Open the parameters input dialog box
25 29
if (!ParametersDialog.open(this)) return
26 30

  
......
86 90
*/
87 91

  
88 92
			strip.setSortByPosition(set_sort_by_position)
89
			strip.setPageStart("<pb/>")
93
			strip.setPageStart(page_break_symbol)
90 94
			// strip.setParagraphStart("\n<p>")
91 95
			// strip.setParagraphEnd("</p>")
92 96
			writer.print strip.getText(doc)

Formats disponibles : Unified diff