In file: C:\Python22\stripper.py
######################################################################
# Python source stripper / cleaner ;)
######################################################################
import os
import sys
import token
import keyword
import StringIO
import tokenize
import traceback
__credits__ = '''
Jürgen Hermann
M.E.Farmer
Jean Brouwers
'''
__version__ = '.87'
__author__ = 'M.E.Farmer'
__date__ = 'Apr 16, 2005,' \
'Jan 15 2005,' \
'Oct 24 2004'
######################################################################
class Stripper:
'''Python source stripper / cleaner
'''
def __init__(self, raw):
self.raw = raw
def format(self, out=sys.stdout, comments=0, docstrings=0,
spaces=1, untabify=1, eol='unix'):
""" strip comments,
strip docstrings,
strip extra whitespace and lines,
convert tabs to spaces,
convert EOL's in Python code.
"""
# Store line offsets in self.lines
self.lines = [0, 0]
pos = 0
self.temp = StringIO.StringIO()
# Strips the first blank line if 1
self.nlcount = 1
self.removenextnl = 0
self.continuedline = 0
self.spaces = spaces
# 0 = no change, 1 = strip 'em
self.comments = comments # yep even these
# 0 = no change, 1 = strip 'em, 8 = strip all but """'s,
#'pep8' = convert ''''s into """ then strip all but """'s
self.docstrings = docstrings
if untabify:
self.raw = self.raw.expandtabs()
# last line should have a newline ending
self.raw = self.raw.rstrip()+'\n'
self.out = out
# Have you ever had a multiple line ending script?
# They can be nasty so lets get them all the same.
self.raw = self.raw.replace('\r\n', '\n')
self.raw = self.raw.replace('\r', '\n')
self.lineend = '\n'
# Gather lines
while 1:
pos = self.raw.find(self.lineend, pos) + 1
if not pos: break
self.lines.append(pos)
# Mac CR
if eol == 'mac':
self.lineend = '\r'
# Windows CR LF
elif eol == 'win':
self.lineend = '\r\n'
# Unix LF
else:
self.lineend = '\n'
self.lines.append(len(self.raw))
self.pos = 0
# Wrap text in a filelike object
text = StringIO.StringIO(self.raw)
# Parse the source.
## Tokenize calls the __call__
## method for each token till done.
try:
tokenize.tokenize(text.readline, self)
except tokenize.TokenError, ex:
traceback.print_exc()
# Ok now we write it to a file
# but we also need to clean the whitespace
# between the lines and at the ends.
self.temp.seek(0)
# write it out to out file or filelike object
# just in case your object doesn't support writelines
try:
self.out.writelines(self.temp)
except AttributeError:
for line in self.temp:
self.out.write(line)
def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
''' Token handler.
'''
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# kill comments
if self.comments:
if toktype == tokenize.COMMENT:
return
# kill doc strings
if self.docstrings:
# Assume if there is nothing on the
# left side it must be a docstring
if toktype == tokenize.STRING and \
line.lstrip(' rRuU')[0] in ["'",'"']:
if not self.continuedline:
t = toktext.lstrip('rRuU')
# pep8 frowns on triple single quotes
if self.docstrings == 8:
if not t.startswith('"""'):
self.removenextnl = 1
return
elif self.docstrings == 'pep8':
if toktext[:3] not in ['"""', "'''"]:
self.removenextnl = 1
return
else:
# covert docstrings to pep8 style?
toktext = toktext.replace("'''",'"""')
else:
# Docstrings do not have an attached EOL like comments
self.removenextnl = 1
return
# continued strings that are assignments are troublesome
elif toktype == tokenize.STRING and \
line.rstrip().endswith('\\'):
self.continuedline = 1
# handle newlines and strip extra blank lines if desired
if toktype in [token.NEWLINE, tokenize.NL]:
self.continuedline = 0
if line.isspace():
if self.spaces == -1:
self.temp.write(self.lineend)
else:
self.nlcount+=1
if self.nlcount<=self.spaces and self.spaces:
self.temp.write(self.lineend)
else:
# remove the newline left after docstring removal
if self.removenextnl:
self.removenextnl = 0
else:
self.temp.write(self.lineend)
return
else:
self.nlcount = 0
# send the original whitespace
if newpos > oldpos:
self.temp.write(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# send text to the temp file
self.temp.write(toktext)
return
######################################################################
def Main():
"test single quoted docstring removal here"
import sys
try:
filename = sys.argv[1]
except IndexError:
filename = sys.argv[0]
filein = open(filename).read()
Stripper(filein).format(out=sys.stdout,
comments=0, docstrings=8, spaces=1, untabify=1, eol='win')
######################################################################
if __name__ == '__main__':
Main()