""" doc2dot: adjust Microsoft Word document extension to match document type i.e., change the extension of doc-files that are really word templates to dot and vice versa Version 20.11.2001 (c) AS INFORMATIK AG www.as-info.ch Michael Gfeller www.mgfeller.net Sometimes, users save Microsoft Word Documents with the wrong extension, intending to change the document type by doing that. This results in a Word Document having an extension (dot or doc) which does not match its true type (document or template). The resulting behaviour when, e,g., double clicking such a document in Explorer, can be weird (try it out!). This script adjusts the extension according to the type proper of the document. This script is provided AS IS (as usual), with no warranty whatsoever. Study the code, if you like, and test it before running it through a directory tree with thousands of Word documents (there is a test switch, see below). This script assumes Word 97 compatible formats. For a description of the Word 97 file format, go to http://www.wotsit.org/search.asp and search for 'word'. To look at the structure of a OLE Structured Storage Files (DocFiles), find STG.EXE, the MFC DocFile Viewer (TechNet Article Nr Q139545). USAGE: python doc2dot.py [-t] startDir -t testmode: print list of files that would be changed startDir start from here """ import os import os.path import string import sys import re import getopt import pythoncom from win32com import storagecon flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE docExtRE = re.compile( '^\.doc$' ) dotExtRE = re.compile( '^\.dot$' ) counters = {} def changeFileExtension( fName, newExt ): """Replaces the extension of fName with newExt (w/o '.') and returns the result""" (root,ext) = os.path.splitext( fName ) return root + '.' + newExt def incCounter( counterName ): "Increment the counter 'counterName', create and initialize it if it does not exist" global counters if not counterName in counters.keys(): counters[ counterName ] = 0 counters[ counterName ] += 1 def printCounters(): "Print all counters and their values." print '' print 'statistics' print '----------' print '' keys = counters.keys() keys.sort() for k in keys: print '%-50s: %u' % (k, counters[k]) def adjustExtension2WordDocumentType( fName, fExt, testMode ): """ Takes the file name 'fName' of the file to analyze, its extension 'fExt', and the test mode flag 'testMode' as input. Assumes that the file extension is either 'dot' or 'doc'. Analyzes the file and changes its extension as appropriate, incrementing various counters to document successes and failures of the various tests and actions. fName: the name of the file to check (incl. extensions) fExt: the extension of 'fname', for convenience testMode: 1 or None, depending on whether we're running in test mode or not """ newExt = '' if docExtRE.search( fExt ): incCounter( 'N .doc files: analysed' ) newExt = 'dot' if dotExtRE.search( fExt ): incCounter( 'N .dot files: analysed' ) newExt = 'doc' if not pythoncom.StgIsStorageFile(fName): print '++not a storage file: %s' % fName incCounter( 'N %s files: NOT a storage file' % fExt ) return try: stg = pythoncom.StgOpenStorage( fName, None, flags ) incCounter( 'N %s files: open storage OK' % fExt ) except: print '++unable to open file/storage: %s' % fName incCounter( 'N %s files: open storage FAILED' % fExt ) return try: stream = stg.OpenStream( 'WordDocument', None, flags ) incCounter( 'N %s files: open wordDocument stream OK' % fExt ) except: print '++unable to open stream "WordDocument" in: %s' % fName incCounter( 'N %s files: open wordDocument stream FAILED' % fExt ) return try: pos = stream.Seek(10,0) flag = stream.Read(1) template = (ord(flag) & 1) incCounter( 'N %s files: read wordDocument stream OK' % fExt ) except: print '++unable to read stream "WordDocument" in: %s' % fName incCounter( 'N %s files: read wordDocument stream FAILED' % fExt ) return del stream del stg if template: if dotExtRE.search( fExt ): incCounter( 'N .dot files: correct Word template file' ) return # it's a template all right incCounter( 'N .doc files: really a DOT' ) msg = '!!DOT as DOC: %8s %6s %s' else: if docExtRE.search( fExt ): incCounter( 'N .doc files: correct Word document file' ) return # it's a normal word document all right incCounter( 'N .dot files: really a DOC' ) msg = '!!DOC as DOT: %8s %6s %s' if testMode: mode = 'TEST' else: mode = 'RENAME' newFName = changeFileExtension( fName, newExt ) rc = '' if not testMode: try: os.rename( fName, newFName ) incCounter( 'N %s files: rename OK' % fExt ) rc = 'OK' except: incCounter( 'N %s files: rename FAILED' % fExt ) rc = 'FAILED' print msg % (mode,rc,fName) def _visit(arguments, dir, files): global lastdir if not dir==lastdir: lastdir = dir # sys.stderr.write('.') for file in files: if os.path.isfile( file ): (root,ext) = os.path.splitext( file ) if docExtRE.search( ext ) or dotExtRE.search( ext ): path = os.path.normpath(os.path.join(dir,file)) adjustExtension2WordDocumentType(path,ext,arguments[0]) def usage(): print 'doc2dot: adjust word document extension to match document type' print ' i.e., change the extension of doc-files that are really' print ' word templates to dot and vice versa' print '' print ' Version 20.11.2001 (c) AS INFORMATIK AG www.as-info.ch' print ' Michael Gfeller www.mgfeller.net' print '' print 'USAGE: doc2dot.exe [-t] startDir' print ' -t testmode: print list of files that would be changed' print ' startDir start from here' print '' if __name__ == '__main__': try: (opts, args) = getopt.getopt(sys.argv[1:], 't') except getopt.GetoptError: usage() sys.exit(2) try: startDir = args[0] except: usage() sys.exit(1) if not os.path.isdir( startDir ): print '++invalid start directory <%s>' % startDir sys.exit(1) options = {} for opt in opts: options[opt[0][1]] = opt[1] if 't' in options.keys(): testMode = 1 else: testMode = None print '--doc2dot: start directory = %s' % startDir if testMode: print '--test mode (files will not be renamed)' else: print '--real mode (files will be renamed)' print '' lastdir = '' os.path.walk( startDir, _visit, (testMode,) ) printCounters()