1 #
2 # Pyrex wrapper for the expat API
3 #
4 import __builtin__
5 import sys
6
7 cdef extern from "expat/expat.h":
8 cdef struct XML_ParserStruct
9 ctypedef XML_ParserStruct *XML_Parser
10 ctypedef char XML_Char
11 cdef int XML_Parse(XML_Parser p, char *data, int length, int isFinal)
12
13 cdef XML_Parser XML_ParserCreate(XML_Char *encodingName)
14 cdef int XML_GetErrorCode(XML_Parser p)
15 cdef int XML_GetErrorLineNumber(XML_Parser p)
16 cdef int XML_GetErrorColumnNumber(XML_Parser p)
17 cdef char *XML_ErrorString(int code)
18 cdef void XML_SetStartElementHandler(XML_Parser, void *)
19 cdef void XML_SetEndElementHandler(XML_Parser, void *)
20 cdef void XML_SetUserData(XML_Parser, void *)
21 cdef void *XML_GetBuffer(XML_Parser, int bufsize)
22 cdef int XML_ParseBuffer(XML_Parser, int bytes, int final)
23
24 cdef extern from "stdio.h":
25 cdef struct FILE
26 cdef int strlen(char *string)
27 cdef int fread(void *buf, int size, int length, FILE *)
28
29 cdef extern from "Python.h":
30 cdef object PyUnicode_DecodeUTF8(char *str, int l, char *strict)
31 cdef void *PyDict_GetItem(object intern_dict, object val)
32 cdef FILE *PyFile_AsFile(object file)
33
34 class ExpatError(Exception):
35 def __init__(self, char *msg, int code, int offset, int lineno):
36 Exception.__init__(self, msg)
37 self.message = msg
38 self.code = code
39 self.offset = offset
40 self.lineno = lineno
41
42 cdef class ExpatParser:
43 cdef XML_Parser _parser
44 cdef object exception_occurred
45 cdef char *encoding
46 cdef object intern_dict
47 cdef object StartElementHandler
48 cdef object EndElementHandler
49
50 def __init__(self, char *encoding):
51 self._parser = XML_ParserCreate(encoding)
52 XML_SetUserData(self._parser, <void *>self)
53 self.encoding = encoding
54 self.intern_dict = {}
55
56 def set_error(self, int code):
57 cdef int lineno, column
58 lineno = XML_GetErrorLineNumber(self._parser)
59 column = XML_GetErrorColumnNumber(self._parser)
60 message = "%s: line %d, column %d" % (
61 XML_ErrorString(code), lineno, column)
62 raise ExpatError(message, code, lineno, column)
63
64 cdef ParseFP(self, FILE *fp):
65 cdef int BUF_SIZE, rv, bytes_read
66 cdef void *buf
67
68 BUF_SIZE = 2048
69
70 rv = bytes_read = 1
71 while(rv and bytes_read and
72 (self.exception_occurred is None)):
73 buf = XML_GetBuffer(self._parser, BUF_SIZE)
74 if not buf:
75 raise MemoryError
76
77 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp)
78 if bytes_read < 0:
79 raise IOError
80
81 rv = XML_ParseBuffer(self._parser, bytes_read, bytes_read == 0)
82 return rv
83
84 cdef ParseFileLike(self, readmethod):
85 cdef int BUF_SIZE, rv, bytes_read
86 BUF_SIZE = 2048
87
88 rv = bytes_read = 1
89 while(rv and data and
90 (self.exception_occurred is None)):
91 data = readmethod(BUF_SIZE)
92 if data:
93 rv = self.Parse(data, 0)
94 else:
95 rv = self.Parse(data, 1)
96
97 return rv
98
99 def ParseFile(self, object f):
100 cdef XML_Parser parser
101 cdef int rv
102 cdef FILE *fp
103
104 parser = self._parser
105
106 if isinstance(f, file):
107 fp = PyFile_AsFile(f)
108 rv = self.ParseFP(fp)
109 else:
110 # this branch hasn't been tested
111 readmethod = getattr(f, "read", None)
112 if not readmethod:
113 raise TypeError, \
115 "Argument should have 'read' attribute"
116 self.ParseFileLike(readmethod)
117
118 if self.exception_occurred is not None:
119 raise self.exception_occurred[0], \
121 self.exception_occurred[1], \
123 self.exception_occurred[2]
124
125 if not rv:
126 self.set_error(XML_GetErrorCode(parser))
127 else:
128 return rv
129
130 def Parse(self, char *data, int isFinal):
131 """Parse(data[, isfinal])
132 Parse XML data. `isfinal` should be true at end of input."""
133 result = XML_Parse(self._parser, data, len(data), isFinal)
134 if not result:
135 self.set_error(XML_GetErrorCode(self._parser))
136 return result
137
138 def sethandler(self, name, function):
139 try:
140 handler = handlerInfo[name]
141 except KeyError:
142 raise AttributeError, "No such attribute %s" % name
143
144 handler(self, function)
145
146 def __setattr__(self, name, value):
147 self.sethandler(name, value)
148
149 cdef object string_intern(intern_dict, char *string):
150 if intern_dict is None:
151 return unicode(string)
152
153 cdef void *value
154 value = PyDict_GetItem(intern_dict, string)
155
156 if value != NULL:
157 return <object>value
158
159 result = unicode(string)
160 intern_dict[string] = result
161 return result
162
163 cdef do_StartElement( void *userData, XML_Char *name, XML_Char *attrs[]):
164 cdef int i, length, max
165 cdef ExpatParser parser
166
167 parser = <ExpatParser>userData
168 i = length = 0
169 while attrs[length]:
170 length = length + 2
171
172 max = length/2
173
174 pyattrs = []
175 for i from 0 <= i < max:
176 attr_name = string_intern(parser.intern_dict, attrs[i] )
177 attr_value = PyUnicode_DecodeUTF8(attrs[i+1],
178 strlen(attrs[i+1]),
179 "strict")
180
181 pyattrs.append((attr_name, attr_value))
182
183 try:
184 parser.StartElementHandler(name, pyattrs)
185 except:
186 print "EXCEPTION!!!"
187 parser.exception_occurred = sys.exc_info()
188
189 def setStartElementHandler(ExpatParser parser, object handler):
190 XML_SetStartElementHandler(parser._parser, <void *>do_StartElement)
191 parser.StartElementHandler = handler
192
193 cdef do_EndElement( void *userData, XML_Char *name):
194 cdef ExpatParser parser
195 parser = <ExpatParser> userData
196 parser.EndElementHandler(string_intern( parser.intern_dict, name))
197
198 def setEndElementHandler(ExpatParser parser, object handler):
199 XML_SetEndElementHandler(parser._parser, <void *>do_EndElement)
200 parser.EndElementHandler = handler
201
202 handlerInfo = {"StartElementHandler": setStartElementHandler,
203 "EndElementHandler" : setEndElementHandler }
204
205
206 def CreateParser(char *encodingName):
207 return ExpatParser(encodingName)
208
209 #def find(f):
210 # find_cheeses(callback, <void*>f)
211
212 #cdef void callback(char *name, void *f):
213 # (<object>f)(name)
214 # just a test
215 cdef int spam() except? -1: