doxyfilter-python.py Source File

BASIS version 1.2.3 (revision 2104)
00001 ##############################################################################
00002 # @file  doxyfilter-python.py
00003 # @brief Doxygen filter for Python scripts.
00004 #
00005 # This is the pythfilter.py implementation of Matthias Baas.
00006 #
00007 # @sa http://i31www.ira.uka.de/~baas/pydoxy/
00008 #
00009 # @ingroup Tools
00010 ##############################################################################
00011 
00012 ##############################################################################
00013 # pythfilter.py v1.5.5, written by Matthias Baas (baas@ira.uka.de)
00014 #
00015 # Doxygen filter which can be used to document Python source code.
00016 # Classes (incl. methods) and functions can be documented.
00017 # Every comment that begins with ## is literally turned into an
00018 # Doxygen comment. Consecutive comment lines are turned into
00019 # comment blocks (-> /** ... */).
00020 # All the stuff is put inside a namespace with the same name as
00021 # the source file.
00022 
00023 # Conversions:
00024 # ============
00025 # ##-blocks                  ->  /** ... */
00026 # "class name(base): ..."    ->  "class name : public base {...}"
00027 # "def name(params): ..."    ->  "name(params) {...}"
00028 
00029 # Changelog:
00030 # 21.01.2003: Raw (r"") or unicode (u"") doc string will now be properly
00031 #             handled. (thanks to Richard Laager for the patch)
00032 # 22.12.2003: Fixed a bug where no function names would be output for "def"
00033 #             blocks that were not in a class.
00034 #             (thanks to Richard Laager for the patch)
00035 # 12.12.2003: Implemented code to handle static and class methods with
00036 #             this logic: Methods with "self" as the first argument are
00037 #             non-static. Methods with "cls" are Python class methods,
00038 #             which translate into static methods for Doxygen. Other
00039 #             methods are assumed to be static methods. As should be
00040 #             obvious, this logic doesn't take into account if the method
00041 #             is actually setup as a classmethod() or a staticmethod(),
00042 #             just if it follows the normal conventions.
00043 #             (thanks to Richard Laager for the patch)
00044 # 11.12.2003: Corrected #includes to use os.path.sep instead of ".". Corrected
00045 #             namespace code to use "::" instead of ".".
00046 #             (thanks to Richard Laager for the patch)
00047 # 11.12.2003: Methods beginning with two underscores that end with
00048 #             something other than two underscores are considered private
00049 #             and are handled accordingly.
00050 #             (thanks to Richard Laager for the patch)
00051 # 03.12.2003: The first parameter of class methods (self) is removed from
00052 #             the documentation.
00053 # 03.11.2003: The module docstring will be used as namespace documentation
00054 #             (thanks to Joe Bronkema for the patch)
00055 # 08.07.2003: Namespaces get a default documentation so that the namespace
00056 #             and its contents will show up in the generated documentation.
00057 # 05.02.2003: Directories will be delted during synchronization.
00058 # 31.01.2003: -f option & filtering entire directory trees.
00059 # 10.08.2002: In base classes the '.' will be replaced by '::'
00060 # 18.07.2002: * and ** will be translated into arguments
00061 # 18.07.2002: Argument lists may contain default values using constructors.
00062 # 18.06.2002: Support for ## public:
00063 # 21.01.2002: from ... import will be translated to "using namespace ...;"
00064 #             TODO: "from ... import *" vs "from ... import names"
00065 #             TODO: Using normal imports: name.name -> name::name
00066 # 20.01.2002: #includes will be placed in front of the namespace
00067 #
00068 ##############################################################################
00069 #
00070 # The program is written as a state machine with the following states:
00071 #
00072 # - OUTSIDE               The current position is outside any comment,
00073 #                         class definition or function.
00074 #
00075 # - BUILD_COMMENT         Begins with first "##".
00076 #                         Ends with the first token that is no "##"
00077 #                         at the same column as before.
00078 #
00079 # - BUILD_CLASS_DECL      Begins with "class".
00080 #                         Ends with ":"
00081 # - BUILD_CLASS_BODY      Begins just after BUILD_CLASS_DECL.
00082 #                         The first following token (which is no comment)
00083 #                         determines indentation depth.
00084 #                         Ends with a token that has a smaller indendation.
00085 #
00086 # - BUILD_DEF_DECL        Begins with "def".
00087 #                         Ends with ":".
00088 # - BUILD_DEF_BODY        Begins just after BUILD_DEF_DECL.
00089 #                         The first following token (which is no comment)
00090 #                         determines indentation depth.
00091 #                         Ends with a token that has a smaller indendation.
00092 
00093 import getopt
00094 import glob
00095 import os.path
00096 import shutil
00097 import string
00098 import sys
00099 import token
00100 import tokenize
00101 
00102 from stat import *
00103 
00104 OUTSIDE          = 0
00105 BUILD_COMMENT    = 1
00106 BUILD_CLASS_DECL = 2
00107 BUILD_CLASS_BODY = 3
00108 BUILD_DEF_DECL   = 4
00109 BUILD_DEF_BODY   = 5
00110 IMPORT           = 6
00111 IMPORT_OP        = 7
00112 IMPORT_APPEND    = 8
00113 
00114 # Output file stream
00115 outfile = sys.stdout
00116 
00117 # Output buffer
00118 outbuffer = []
00119 
00120 out_row = 0
00121 out_col = 0
00122 
00123 # Variables used by rec_name_n_param()
00124 name         = ""
00125 param        = ""
00126 doc_string   = ""
00127 record_state = 0
00128 bracket_counter = 0
00129 
00130 # Tuple: (row,column)
00131 class_spos  = (0,0)
00132 def_spos    = (0,0)
00133 import_spos = (0,0)
00134 
00135 # Which import was used? ("import" or "from")
00136 import_token = ""
00137 
00138 # Comment block buffer
00139 comment_block = []
00140 comment_finished = 0
00141 
00142 # Imported modules
00143 modules = []
00144 
00145 # Program state
00146 stateStack = [OUTSIDE]
00147 
00148 # Keep track of whether module has a docstring
00149 module_has_docstring = False
00150 
00151 # Keep track of member protection
00152 protection_level = "public"
00153 private_member = False
00154 
00155 # Keep track of the module namespace
00156 namespace = ""
00157 
00158 ######################################################################
00159 # Output string s. '\n' may only be at the end of the string (not
00160 # somewhere in the middle).
00161 #
00162 # In: s    - String
00163 #     spos - Startpos
00164 ######################################################################
00165 def output(s,spos, immediate=0):
00166     global outbuffer, out_row, out_col, outfile
00167 
00168     os = string.rjust(s,spos[1]-out_col+len(s))
00169     if immediate:
00170         outfile.write(os)
00171     else:
00172         outbuffer.append(os)
00173     if (s[-1:]=="\n"):
00174         out_row = out_row+1
00175         out_col = 0
00176     else:
00177         out_col = spos[1]+len(s)
00178 
00179 
00180 ######################################################################
00181 # Records a name and parameters. The name is either a class name or
00182 # a function name. Then the parameter is either the base class or
00183 # the function parameters.
00184 # The name is stored in the global variable "name", the parameters
00185 # in "param".
00186 # The variable "record_state" holds the current state of this internal
00187 # state machine.
00188 # The recording is started by calling start_recording().
00189 #
00190 # In: type, tok
00191 ######################################################################
00192 def rec_name_n_param(type, tok):
00193     global record_state,name,param,doc_string,bracket_counter
00194     s = record_state
00195     # State 0: Do nothing.
00196     if   (s==0):
00197          return
00198     # State 1: Remember name.
00199     elif (s==1):
00200         name = tok
00201         record_state = 2
00202     # State 2: Wait for opening bracket or colon
00203     elif (s==2):
00204         if (tok=='('):
00205             bracket_counter = 1
00206             record_state=3
00207         if (tok==':'): record_state=4
00208     # State 3: Store parameter (or base class) and wait for an ending bracket
00209     elif (s==3):
00210         if (tok=='*' or tok=='**'):
00211             tok=''
00212         if (tok=='('):
00213             bracket_counter = bracket_counter+1
00214         if (tok==')'):
00215             bracket_counter = bracket_counter-1
00216         if bracket_counter==0:
00217             record_state=4
00218         else:
00219             param=param+tok
00220     # State 4: Look for doc string
00221     elif (s==4):
00222         if (type==token.NEWLINE or type==token.INDENT or type==token.SLASHEQUAL):
00223             return
00224         elif (tok==":"):
00225             return
00226         elif (type==token.STRING):
00227             while tok[:1]=='r' or tok[:1]=='u':
00228                 tok=tok[1:]
00229             while tok[:1]=='"':
00230                 tok=tok[1:]
00231             while tok[-1:]=='"':
00232                 tok=tok[:-1]
00233             doc_string=tok
00234         record_state=0
00235 
00236 ######################################################################
00237 # Starts the recording of a name & param part.
00238 # The function rec_name_n_param() has to be fed with tokens. After
00239 # the necessary tokens are fed the name and parameters can be found
00240 # in the global variables "name" und "param".
00241 ######################################################################
00242 def start_recording():
00243     global record_state,param,name, doc_string
00244     record_state=1
00245     name=""
00246     param=""
00247     doc_string=""
00248 
00249 ######################################################################
00250 # Test if recording is finished
00251 ######################################################################
00252 def is_recording_finished():
00253     global record_state
00254     return record_state==0
00255 
00256 ######################################################################
00257 ## Gather comment block
00258 ######################################################################
00259 def gather_comment(type,tok,spos):
00260     global comment_block,comment_finished
00261     if (type!=tokenize.COMMENT):
00262         comment_finished = 1
00263     else:
00264         # Output old comment block if a new one is started.
00265         if (comment_finished):
00266             print_comment(spos)
00267             comment_finished=0
00268         if (tok[0:2]=="##" and tok[0:3]!="###"):
00269             comment_block.append(tok[2:])
00270 
00271 ######################################################################
00272 ## Output comment block and empty buffer.
00273 ######################################################################
00274 def print_comment(spos):
00275     global comment_block,comment_finished
00276     if (comment_block!=[]):
00277         output("/**\n",spos)
00278         for c in comment_block:
00279             output(c,spos)
00280         output("*/\n",spos)
00281     comment_block    = []
00282     comment_finished = 0
00283 
00284 ######################################################################
00285 def set_state(s):
00286     global stateStack
00287     stateStack[len(stateStack)-1]=s
00288 
00289 ######################################################################
00290 def get_state():
00291     global stateStack
00292     return stateStack[len(stateStack)-1]
00293 
00294 ######################################################################
00295 def push_state(s):
00296     global stateStack
00297     stateStack.append(s)
00298 
00299 ######################################################################
00300 def pop_state():
00301     global stateStack
00302     stateStack.pop()
00303 
00304 
00305 ######################################################################
00306 def tok_eater(type, tok, spos, epos, line):
00307     global stateStack,name,param,class_spos,def_spos,import_spos
00308     global doc_string, modules, import_token, module_has_docstring
00309     global protection_level, private_member
00310 
00311     rec_name_n_param(type,tok)
00312     if (string.replace(string.strip(tok)," ","")=="##private:"):
00313          protection_level = "private"
00314          output("private:\n",spos)
00315     elif (string.replace(string.strip(tok)," ","")=="##protected:"):
00316          protection_level = "protected"
00317          output("protected:\n",spos)
00318     elif (string.replace(string.strip(tok)," ","")=="##public:"):
00319          protection_level = "public"
00320          output("public:\n",spos)
00321     else:
00322          gather_comment(type,tok,spos)
00323 
00324     state = get_state()
00325 
00326 #    sys.stderr.write("%d: %s\n"%(state, tok))
00327 
00328     # OUTSIDE
00329     if   (state==OUTSIDE):
00330         if  (tok=="class"):
00331             start_recording()
00332             class_spos = spos
00333             push_state(BUILD_CLASS_DECL)
00334         elif (tok=="def"):
00335             start_recording()
00336             def_spos = spos
00337             push_state(BUILD_DEF_DECL)
00338         elif (tok=="import") or (tok=="from"):
00339             import_token = tok
00340             import_spos = spos
00341             modules     = []
00342             push_state(IMPORT)
00343         elif (spos[1] == 0 and tok[:3] == '"""'):
00344             # Capture module docstring as namespace documentation
00345             module_has_docstring = True
00346             comment_block.append("\\namespace %s\n" % namespace)
00347             comment_block.append(tok[3:-3])
00348             print_comment(spos)
00349 
00350     # IMPORT
00351     elif (state==IMPORT):
00352         if (type==token.NAME):
00353             modules.append(tok)
00354             set_state(IMPORT_OP)
00355     # IMPORT_OP
00356     elif (state==IMPORT_OP):
00357         if (tok=="."):
00358             set_state(IMPORT_APPEND)
00359         elif (tok==","):
00360             set_state(IMPORT)
00361         else:
00362             for m in modules:
00363                 output('#include "'+m.replace('.',os.path.sep)+'.py"\n', import_spos, immediate=1)
00364                 if import_token=="from":
00365                     output('using namespace '+m.replace('.', '::')+';\n', import_spos)
00366             pop_state()
00367     # IMPORT_APPEND
00368     elif (state==IMPORT_APPEND):
00369         if (type==token.NAME):
00370             modules[len(modules)-1]+="."+tok
00371             set_state(IMPORT_OP)
00372     # BUILD_CLASS_DECL
00373     elif (state==BUILD_CLASS_DECL):
00374         if (is_recording_finished()):
00375             s = "class "+name
00376             if (param!=""): s = s+" : public "+param.replace('.','::')
00377             if (doc_string!=""): comment_block.append(doc_string)
00378             print_comment(class_spos)
00379             output(s+"\n",class_spos)
00380             output("{\n",(class_spos[0]+1,class_spos[1]))
00381             protection_level = "public"
00382             output("  public:\n",(class_spos[0]+2,class_spos[1]))
00383             set_state(BUILD_CLASS_BODY)
00384     # BUILD_CLASS_BODY
00385     elif (state==BUILD_CLASS_BODY):
00386         if (type!=token.INDENT and type!=token.NEWLINE and type!=40 and
00387             type!=tokenize.NL and type!=tokenize.COMMENT and
00388             (spos[1]<=class_spos[1])):
00389             output("}; // end of class\n",(out_row+1,class_spos[1]))
00390             pop_state()
00391         elif (tok=="def"):
00392             start_recording()
00393             def_spos = spos
00394             push_state(BUILD_DEF_DECL)
00395     # BUILD_DEF_DECL
00396     elif (state==BUILD_DEF_DECL):
00397         if (is_recording_finished()):
00398             s = ''
00399             # Do we document a class method? then remove the 'self' parameter
00400             if BUILD_CLASS_BODY in stateStack:
00401                 params = param.split(",")
00402                 if params[0] == 'self':
00403                     param = string.join(params[1:], ",")
00404                 else:
00405                     s = 'static '
00406                     if params[0] == 'cls':
00407                         param = string.join(params[1:], ",")
00408         s = s+name+"("+param+");\n"
00409                 if len(name) > 1 \
00410                    and name[0:2] == '__' \
00411                    and name[len(name)-2:len(name)] != '__' \
00412                    and protection_level != 'private':
00413                        private_member = True
00414                        output("  private:\n",(def_spos[0]+2,def_spos[1]))
00415             else:
00416             s = name+"("+param+");\n"
00417             if (doc_string!=""): comment_block.append(doc_string)
00418             print_comment(def_spos)
00419             output(s,def_spos)
00420 #       output("{\n",(def_spos[0]+1,def_spos[1]))
00421             set_state(BUILD_DEF_BODY)
00422     # BUILD_DEF_BODY
00423     elif (state==BUILD_DEF_BODY):
00424         if (type!=token.INDENT and type!=token.NEWLINE \
00425             and type!=40 and type!=tokenize.NL \
00426             and (spos[1]<=def_spos[1])):
00427 #            output("} // end of method/function\n",(out_row+1,def_spos[1]))
00428             if private_member and protection_level != 'private':
00429                 private_member = False
00430                 output("  " + protection_level + ":\n",(def_spos[0]+2,def_spos[1]))
00431             pop_state()
00432 #       else:
00433 #            output(tok,spos)
00434 
00435 
00436 def dump(filename):
00437     f = open(filename)
00438     r = f.readlines()
00439     for s in r:
00440         sys.stdout.write(s)
00441 
00442 def filter(filename):
00443     global name, module_has_docstring
00444 
00445     path,name = os.path.split(filename)
00446     root,ext  = os.path.splitext(name)
00447 
00448     output("namespace "+root+" {\n",(0,0))
00449 
00450     # set module name for tok_eater to use if there's a module doc string
00451     name = root
00452 
00453     sys.stderr.write('Filtering "'+filename+'"...')
00454     f = open(filename)
00455     tokenize.tokenize(f.readline, tok_eater)
00456     f.close()
00457     print_comment((0,0))
00458 
00459     output("\n",(0,0))
00460     output("}  // end of namespace\n",(0,0))
00461 
00462     if not module_has_docstring:
00463         # Put in default namespace documentation
00464         output('/** \\namespace '+root+' \n',(0,0))
00465         output('    \\brief Module "%s" */\n'%(root),(0,0))
00466 
00467     for s in outbuffer:
00468         outfile.write(s)
00469 
00470 
00471 def filterFile(filename, out=sys.stdout):
00472     global outfile
00473 
00474     outfile = out
00475 
00476     try:
00477         root,ext  = os.path.splitext(filename)
00478 
00479         if ext==".py":
00480             filter(filename)
00481         else:
00482             dump(filename)
00483 
00484         sys.stderr.write("OK\n")
00485     except IOError,e:
00486         sys.stderr.write(e[1]+"\n")
00487 
00488 
00489 ######################################################################
00490 
00491 # preparePath
00492 def preparePath(path):
00493     """Prepare a path.
00494 
00495     Checks if the path exists and creates it if it does not exist.
00496     """
00497     if not os.path.exists(path):
00498         parent = os.path.dirname(path)
00499         if parent!="":
00500             preparePath(parent)
00501         os.mkdir(path)
00502 
00503 # isNewer
00504 def isNewer(file1,file2):
00505     """Check if file1 is newer than file2.
00506 
00507     file1 must be an existing file.
00508     """
00509     if not os.path.exists(file2):
00510         return True
00511     return os.stat(file1)[ST_MTIME]>os.stat(file2)[ST_MTIME]
00512 
00513 # convert
00514 def convert(srcpath, destpath):
00515     """Convert a Python source tree into a C+ stub tree.
00516 
00517     All *.py files in srcpath (including sub-directories) are filtered
00518     and written to destpath. If destpath exists, only the files
00519     that have been modified are filtered again. Files that were deleted
00520     from srcpath are also deleted in destpath if they are still present.
00521     The function returns the number of processed *.py files.
00522     """
00523     count=0
00524     sp = os.path.join(srcpath,"*")
00525     sfiles = glob.glob(sp)
00526     dp = os.path.join(destpath,"*")
00527     dfiles = glob.glob(dp)
00528     leftovers={}
00529     for df in dfiles:
00530         leftovers[os.path.basename(df)]=1
00531 
00532     for srcfile in sfiles:
00533         basename = os.path.basename(srcfile)
00534         if basename in leftovers:
00535             del leftovers[basename]
00536 
00537         # Is it a subdirectory?
00538         if os.path.isdir(srcfile):
00539             sdir = os.path.join(srcpath,basename)
00540             ddir = os.path.join(destpath,basename)
00541             count+=convert(sdir, ddir)
00542             continue
00543         # Check the extension (only *.py will be converted)
00544         root, ext = os.path.splitext(srcfile)
00545         if ext.lower()!=".py":
00546             continue
00547 
00548         destfile = os.path.join(destpath,basename)
00549         if destfile==srcfile:
00550             print "WARNING: Input and output names are identical!"
00551             sys.exit(1)
00552 
00553         count+=1
00554 #        sys.stdout.write("%s\015"%(srcfile))
00555 
00556         if isNewer(srcfile, destfile):
00557             preparePath(os.path.dirname(destfile))
00558 #            out=open(destfile,"w")
00559 #            filterFile(srcfile, out)
00560 #            out.close()
00561             os.system("python %s -f %s>%s"%(sys.argv[0],srcfile,destfile))
00562 
00563     # Delete obsolete files in destpath
00564     for df in leftovers:
00565         dname=os.path.join(destpath,df)
00566         if os.path.isdir(dname):
00567             try:
00568                 shutil.rmtree(dname)
00569             except:
00570                 print "Can't remove obsolete directory '%s'"%dname
00571         else:
00572             try:
00573                 os.remove(dname)
00574             except:
00575                 print "Can't remove obsolete file '%s'"%dname
00576 
00577     return count
00578 
00579 
00580 ######################################################################
00581 ######################################################################
00582 ######################################################################
00583 
00584 filter_file = False
00585 
00586 try:
00587     opts, args = getopt.getopt(sys.argv[1:], "hf", ["help"])
00588 except getopt.GetoptError,e:
00589     print e
00590     sys.exit(1)
00591 
00592 for o,a in opts:
00593     if o=="-f":
00594         filter_file = True
00595 
00596 if filter_file:
00597     # Filter the specified file and print the result to stdout
00598     filename = string.join(args)
00599     filterFile(filename)
00600 else:
00601 
00602     if len(args)!=2:
00603         sys.stderr.write("%s options input output\n"%(os.path.basename(sys.argv[0])))
00604         sys.exit(1)
00605 
00606     # Filter an entire Python source tree
00607     print '"%s" -> "%s"\n'%(args[0],args[1])
00608     c=convert(args[0],args[1])
00609     print "%d files"%(c)
00610