1 #
    2 #   Pyrex wrapper for the expat API
    3 #
    4 import __builtin__
    5 import sys
    6 
    7 cdef extern from "expat/expat.h":
    8         cdef struct XML_ParserStruct
    9         ctypedef XML_ParserStruct *XML_Parser
   10         ctypedef char XML_Char
   11         cdef int XML_Parse(XML_Parser p, char *data, int length, int isFinal)
   12 
   13         cdef XML_Parser XML_ParserCreate(XML_Char *encodingName)
   14         cdef int XML_GetErrorCode(XML_Parser p)
   15         cdef int XML_GetErrorLineNumber(XML_Parser p)
   16         cdef int XML_GetErrorColumnNumber(XML_Parser p)
   17         cdef char *XML_ErrorString(int code)
   18         cdef void XML_SetStartElementHandler(XML_Parser, void *)
   19         cdef void XML_SetEndElementHandler(XML_Parser, void *)
   20         cdef void XML_SetUserData(XML_Parser, void *)
   21         cdef void *XML_GetBuffer(XML_Parser, int bufsize)
   22         cdef int XML_ParseBuffer(XML_Parser, int bytes, int final)
   23 
   24 cdef extern from "stdio.h":
   25         cdef struct FILE
   26         cdef int strlen(char *string)
   27         cdef int fread(void *buf, int size, int length, FILE *)
   28 
   29 cdef extern from "Python.h":
   30         cdef object PyUnicode_DecodeUTF8(char *str, int l, char *strict)
   31         cdef void *PyDict_GetItem(object intern_dict, object val)
   32         cdef FILE *PyFile_AsFile(object file)
   33 
   34 class ExpatError(Exception):
   35         def __init__(self, char *msg, int code, int offset, int lineno):
   36                 Exception.__init__(self, msg)
   37                 self.message = msg
   38                 self.code = code
   39                 self.offset = offset
   40                 self.lineno = lineno
   41 
   42 cdef class ExpatParser:
   43         cdef XML_Parser _parser
   44         cdef object exception_occurred
   45         cdef char *encoding
   46         cdef object intern_dict
   47         cdef object StartElementHandler
   48         cdef object EndElementHandler
   49 
   50         def __init__(self, char *encoding):
   51                 self._parser = XML_ParserCreate(encoding)
   52                 XML_SetUserData(self._parser, <void *>self)
   53                 self.encoding = encoding
   54                 self.intern_dict = {}
   55 
   56         def set_error(self, int code):
   57                 cdef int lineno, column
   58                 lineno = XML_GetErrorLineNumber(self._parser)
   59                 column = XML_GetErrorColumnNumber(self._parser)
   60                 message = "%s: line %d, column %d" % (
   61                         XML_ErrorString(code), lineno, column)
   62                 raise ExpatError(message, code, lineno, column)
   63 
   64         cdef ParseFP(self, FILE *fp):
   65                 cdef int BUF_SIZE, rv, bytes_read
   66                 cdef void *buf
   67 
   68                 BUF_SIZE = 2048
   69 
   70                 rv = bytes_read = 1
   71                 while(rv and bytes_read and
   72                         (self.exception_occurred is None)):
   73                         buf = XML_GetBuffer(self._parser, BUF_SIZE)
   74                         if not buf:
   75                                 raise MemoryError
   76 
   77                         bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp)
   78                         if bytes_read < 0:
   79                                 raise IOError
   80 
   81                         rv = XML_ParseBuffer(self._parser, bytes_read, bytes_read == 0)
   82                 return rv
   83 
   84         cdef ParseFileLike(self, readmethod):
   85                 cdef int BUF_SIZE, rv, bytes_read
   86                 BUF_SIZE = 2048
   87 
   88                 rv = bytes_read = 1
   89                 while(rv and data and
   90                         (self.exception_occurred is None)):
   91                         data = readmethod(BUF_SIZE)
   92                         if data:
   93                                 rv = self.Parse(data, 0)
   94                         else:
   95                                 rv = self.Parse(data, 1)
   96 
   97                 return rv
   98 
   99         def ParseFile(self, object f):
  100                 cdef XML_Parser parser
  101                 cdef int rv
  102                 cdef FILE *fp
  103 
  104                 parser = self._parser
  105 
  106                 if isinstance(f, file):
  107                         fp = PyFile_AsFile(f)
  108                         rv = self.ParseFP(fp)
  109                 else:
  110                         # this branch hasn't been tested
  111                         readmethod = getattr(f, "read", None)
  112                         if not readmethod:
  113                                 raise TypeError, \
  115                                   "Argument should have 'read' attribute"
  116                         self.ParseFileLike(readmethod)
  117 
  118                 if self.exception_occurred is not None:
  119                         raise self.exception_occurred[0], \
  121                                 self.exception_occurred[1], \
  123                                 self.exception_occurred[2]
  124 
  125                 if not rv:
  126                         self.set_error(XML_GetErrorCode(parser))
  127                 else:
  128                         return rv
  129 
  130         def Parse(self, char *data, int isFinal):
  131                 """Parse(data[, isfinal])
  132                 Parse XML data. `isfinal` should be true at end of input."""
  133                 result = XML_Parse(self._parser, data, len(data), isFinal)
  134                 if not result:
  135                         self.set_error(XML_GetErrorCode(self._parser))
  136                 return result
  137 
  138         def sethandler(self, name, function):
  139                 try:
  140                         handler = handlerInfo[name]
  141                 except KeyError:
  142                         raise AttributeError, "No such attribute %s" % name
  143 
  144                 handler(self, function)
  145 
  146         def __setattr__(self, name, value):
  147                 self.sethandler(name, value)
  148 
  149 cdef object string_intern(intern_dict, char *string):
  150         if intern_dict is None:
  151                 return unicode(string)
  152 
  153         cdef void *value
  154         value = PyDict_GetItem(intern_dict, string)
  155 
  156         if value != NULL:
  157                 return <object>value
  158 
  159         result = unicode(string)
  160         intern_dict[string] = result
  161         return result
  162 
  163 cdef do_StartElement( void *userData, XML_Char *name, XML_Char *attrs[]):
  164         cdef int i, length, max
  165         cdef ExpatParser parser
  166 
  167         parser = <ExpatParser>userData
  168         i = length = 0
  169         while attrs[length]:
  170                 length = length + 2
  171 
  172         max = length/2
  173 
  174         pyattrs = []
  175         for i from 0 <= i < max:
  176                 attr_name = string_intern(parser.intern_dict, attrs[i] )
  177                 attr_value = PyUnicode_DecodeUTF8(attrs[i+1],
  178                                         strlen(attrs[i+1]),
  179                                         "strict")
  180 
  181                 pyattrs.append((attr_name, attr_value))
  182 
  183         try:
  184                 parser.StartElementHandler(name, pyattrs)
  185         except:
  186                 print "EXCEPTION!!!"
  187                 parser.exception_occurred = sys.exc_info()
  188 
  189 def setStartElementHandler(ExpatParser parser, object handler):
  190         XML_SetStartElementHandler(parser._parser, <void *>do_StartElement)
  191         parser.StartElementHandler = handler
  192 
  193 cdef do_EndElement( void *userData, XML_Char *name):
  194         cdef ExpatParser parser
  195         parser = <ExpatParser> userData
  196         parser.EndElementHandler(string_intern( parser.intern_dict, name))
  197 
  198 def setEndElementHandler(ExpatParser parser, object handler):
  199         XML_SetEndElementHandler(parser._parser, <void *>do_EndElement)
  200         parser.EndElementHandler = handler
  201 
  202 handlerInfo = {"StartElementHandler": setStartElementHandler,
  203                 "EndElementHandler" : setEndElementHandler }
  204 
  205 
  206 def CreateParser(char *encodingName):
  207         return ExpatParser(encodingName)
  208 
  209 #def find(f):
  210 #       find_cheeses(callback, <void*>f)
  211 
  212 #cdef void callback(char *name, void *f):
  213 #       (<object>f)(name)
  214 # just a test
  215 cdef int spam() except? -1: