Statistics
| Revision:

root / tmp / org.txm.core / res / org / txm / xml / xsl / txm-filter-teibrown-xmlw.xsl @ 187

History | View | Annotate | Download (4.9 kB)

1
<?xml version="1.0"?>
2
<xsl:stylesheet
3
  xmlns:xd="http://www.pnp-software.com/XSLTdoc"
4
  xmlns:edate="http://exslt.org/dates-and-times"
5
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0"
6
  exclude-result-prefixes="tei edate xd" version="2.0">
7
  
8
  <xd:doc type="stylesheet">
9
    <xd:short>
10
      This stylesheet prepares XML-TEI P5 files of the Brown corpus
11
      for TXM import with xml/w+csv module.
12
      Specify the location of this file on your system in the "Front
13
      XSLT" import option.
14
    </xd:short>
15
    <xd:detail>
16
      This stylesheet is free software; you can redistribute it and/or
17
      modify it under the terms of the GNU Lesser General Public
18
      License as published by the Free Software Foundation; either
19
      version 3 of the License, or (at your option) any later version.
20
      
21
      This stylesheet is distributed in the hope that it will be useful,
22
      but WITHOUT ANY WARRANTY; without even the implied warranty of
23
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
      Lesser General Public License for more details.
25
      
26
      You should have received a copy of GNU Lesser Public License with
27
      this stylesheet. If not, see http://www.gnu.org/licenses/lgpl.html
28
    </xd:detail>
29
    <xd:author>Alexei Lavrentiev alexei.lavrentev@ens-lyon.fr</xd:author>
30
    <xd:copyright>2012, CNRS / ICAR (ICAR3 LinCoBaTO)</xd:copyright>
31
  </xd:doc>
32
  
33

    
34
  <xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no"/>
35
  
36
  
37
<!--  <xsl:template match="/">
38
      <xsl:apply-templates/>  
39
  </xsl:template>
40
-->
41
  
42
  <xsl:template match="*">
43
        <xsl:element namespace="http://www.tei-c.org/ns/1.0" name="{local-name(.)}">
44
          <xsl:apply-templates select="*|@*|processing-instruction()|comment()|text()"/>
45
        </xsl:element>
46
  </xsl:template>
47

    
48
  <xsl:template match="@*|comment()">
49
    <xsl:copy/>
50
  </xsl:template>
51

    
52
  <xsl:template match="text()">
53
    <xsl:analyze-string select="." regex="&amp;|&lt;|&gt;|--|\*(\w+)">
54
      <xsl:matching-substring>
55
        <xsl:choose>
56
          <xsl:when test="matches(.,'&amp;')">
57
            <expan xmlns="http://www.tei-c.org/ns/1.0">and</expan>
58
          </xsl:when>
59
          <xsl:when test="matches(.,'&lt;')">
60
            <xsl:text>[</xsl:text>
61
          </xsl:when>
62
          <xsl:when test="matches(.,'&gt;')">
63
            <xsl:text>]</xsl:text>
64
          </xsl:when>          
65
          <xsl:when test="matches(.,'--')">
66
            <!-- double dash provokes malformed xml in the notes in the notes placed in xml comments by the tokenizer -->
67
            <xsl:text> - </xsl:text>
68
          </xsl:when>
69
          <xsl:otherwise>
70
            <xsl:value-of select="."/>
71
          </xsl:otherwise>
72
        </xsl:choose>
73
      </xsl:matching-substring>
74
      <xsl:non-matching-substring>         
75
            <xsl:value-of select="."/>
76
      </xsl:non-matching-substring>      
77
    </xsl:analyze-string>
78
  </xsl:template>
79
  
80

    
81
  <xsl:template match="processing-instruction()"/>
82

    
83
<!-- teiHeader deleted for the xml/w import module -->
84
  
85
  <xsl:template match="tei:teiHeader">    
86
      <!--<xsl:copy-of select="."/>-->    
87
  </xsl:template>
88
	
89
	<xsl:template match="tei:text">
90
		<xsl:copy>
91
		<xsl:apply-templates select="@*"/>
92
		<xsl:attribute name="type">
93
			<xsl:choose>
94
				<xsl:when test="@decls='A'">PRESS: REPORTAGE</xsl:when>
95
				<xsl:when test="@decls='B'">PRESS: EDITORIAL</xsl:when>
96
				<xsl:when test="@decls='C'">PRESS: REVIEWS</xsl:when>
97
				<xsl:when test="@decls='D'">RELIGION</xsl:when>
98
				<xsl:when test="@decls='E'">SKILL AND HOBBIES</xsl:when>
99
				<xsl:when test="@decls='F'">POPULAR LORE</xsl:when>
100
				<xsl:when test="@decls='G'">BELLES-LETTRES</xsl:when>
101
				<xsl:when test="@decls='H'">MISCELLANEOUS: GOVERNMENT AND HOUSE ORGANS</xsl:when>
102
				<xsl:when test="@decls='J'">LEARNED</xsl:when>
103
				<xsl:when test="@decls='K'">FICTION: GENERAL</xsl:when>
104
				<xsl:when test="@decls='L'">FICTION: MYSTERY</xsl:when>
105
				<xsl:when test="@decls='M'">FICTION: SCIENCE</xsl:when>
106
				<xsl:when test="@decls='N'">FICTION: ADVENTURE</xsl:when>
107
				<xsl:when test="@decls='P'">FICTION: ROMANCE</xsl:when>
108
				<xsl:when test="@decls='R'">HUMOR</xsl:when>				
109
			</xsl:choose>
110
		</xsl:attribute>
111
		<xsl:apply-templates/>
112
		</xsl:copy>
113
	</xsl:template>
114

    
115
<xsl:template match="@type|@pos">
116
	<xsl:attribute name="type">
117
		<xsl:value-of select="translate(.,' *','_')"></xsl:value-of>
118
	</xsl:attribute>
119
</xsl:template>
120
	
121
	<xsl:template match="tei:mw">
122
		<w xmlns="http://www.tei-c.org/ns/1.0">
123
			<xsl:apply-templates select="@*"/>
124
			<xsl:apply-templates/>
125
		</w>
126
	</xsl:template>
127
	
128
	<xsl:template match="tei:c[@type='pct']">
129
		<w xmlns="http://www.tei-c.org/ns/1.0">
130
			<xsl:apply-templates select="@*"/>
131
			<xsl:apply-templates/>
132
		</w>			
133
	</xsl:template>
134

    
135
</xsl:stylesheet>