In file: C:\Python22\stripper.py
######################################################################
# Python source stripper / cleaner ;)
######################################################################
import os
import sys
import token
import keyword
import StringIO
import tokenize
import traceback
__credits__ = '''
Jürgen Hermann 
M.E.Farmer
Jean Brouwers
'''
__version__ = '.87'
__author__ = 'M.E.Farmer'
__date__ =  'Apr 16, 2005,' \
            'Jan 15 2005,' \
            'Oct 24 2004'
######################################################################

class Stripper:
    '''Python source stripper / cleaner 
    '''
    def __init__(self, raw):
        self.raw = raw

    def format(self, out=sys.stdout, comments=0, docstrings=0,
                spaces=1, untabify=1, eol='unix'):
        """ strip comments,
            strip docstrings, 
            strip extra whitespace and lines,
            convert tabs to spaces, 
            convert EOL's in Python code.
        """
        # Store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        self.temp = StringIO.StringIO()
        # Strips the first blank line if 1
        self.nlcount = 1
        self.removenextnl = 0
        self.continuedline = 0
        self.spaces = spaces
        # 0  = no change, 1 = strip 'em
        self.comments = comments # yep even these
        # 0  = no change, 1 = strip 'em, 8 = strip all but """'s,
        #'pep8' = convert ''''s into """ then strip all but """'s 
        self.docstrings = docstrings

        if untabify:
           self.raw = self.raw.expandtabs()
        # last line should have a newline ending
        self.raw = self.raw.rstrip()+'\n'
        self.out = out

        # Have you ever had a multiple line ending script?
        # They can be nasty so lets get them all the same.
        self.raw = self.raw.replace('\r\n', '\n')
        self.raw = self.raw.replace('\r', '\n')
        self.lineend = '\n'

        # Gather lines
        while 1:
            pos = self.raw.find(self.lineend, pos) + 1
            if not pos: break
            self.lines.append(pos)

        # Mac CR
        if eol == 'mac':
           self.lineend = '\r'
        # Windows CR LF
        elif eol == 'win':
           self.lineend = '\r\n'
        # Unix LF
        else:
           self.lineend = '\n'

        self.lines.append(len(self.raw))
        self.pos = 0

        # Wrap text in a filelike object
        text = StringIO.StringIO(self.raw)

        # Parse the source.
        ## Tokenize calls the __call__ 
        ## method for each token till done.
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            traceback.print_exc()

        # Ok now we write it to a file
        # but we also need to clean the whitespace
        # between the lines and at the ends.
        self.temp.seek(0)
        # write it out to out file or filelike object
        # just in case your object doesn't support writelines
        try:
            self.out.writelines(self.temp)
        except AttributeError:
            for line in self.temp:
                self.out.write(line)

    def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
        ''' Token handler.
        '''
        # calculate new positions
        oldpos = self.pos
        newpos = self.lines[srow] + scol
        self.pos = newpos + len(toktext)

        # kill comments
        if self.comments:
            if toktype == tokenize.COMMENT:
                return

        # kill doc strings
        if self.docstrings:
            # Assume if there is nothing on the 
            # left side it must be a docstring
            if toktype == tokenize.STRING and \
                line.lstrip(' rRuU')[0] in ["'",'"']:
                if not self.continuedline:
                    t = toktext.lstrip('rRuU')
                    # pep8 frowns on triple single quotes
                    if  self.docstrings == 8:
                        if not t.startswith('"""'):
                            self.removenextnl = 1
                            return
                    elif self.docstrings == 'pep8':
                        if toktext[:3] not in ['"""', "'''"]:
                           self.removenextnl = 1
                           return
                        else:
                            # covert docstrings to pep8 style?
                            toktext = toktext.replace("'''",'"""')
                    else:
                        # Docstrings do not have an attached EOL like comments
                        self.removenextnl = 1
                        return

            # continued strings that are assignments are troublesome
            elif toktype == tokenize.STRING and \
                 line.rstrip().endswith('\\'):
                 self.continuedline = 1

        # handle newlines and strip extra blank lines if desired
        if toktype in [token.NEWLINE, tokenize.NL]:
            self.continuedline = 0
            if line.isspace():
                if self.spaces == -1:
                    self.temp.write(self.lineend)
                else:
                    self.nlcount+=1
                    if self.nlcount<=self.spaces and self.spaces:
                        self.temp.write(self.lineend)
            else:
                # remove the newline left after docstring removal
                if self.removenextnl:
                    self.removenextnl = 0
                else:
                    self.temp.write(self.lineend)
            return
        else:
            self.nlcount = 0

        # send the original whitespace 
        if newpos > oldpos:
            self.temp.write(self.raw[oldpos:newpos])

        # skip indenting tokens
        if toktype in [token.INDENT, token.DEDENT]:
            self.pos = newpos
            return


        # send text to the temp file
        self.temp.write(toktext)
        return
######################################################################

def Main():
    "test single quoted docstring removal here"
    import sys
    try:
       filename = sys.argv[1]
    except IndexError:
       filename = sys.argv[0]
    filein = open(filename).read()
    Stripper(filein).format(out=sys.stdout,
           comments=0, docstrings=8, spaces=1, untabify=1, eol='win')

######################################################################
if __name__ == '__main__':
    Main()