Package pyzmail :: Module utils
[hide private]
[frames] | no frames]

Source Code for Module pyzmail.utils

  1  # 
  2  # pyzmail/utils.py 
  3  # (c) Alain Spineux <alain.spineux@gmail.com> 
  4  # http://www.magiksys.net/pyzmail 
  5  # Released under LGPL 
  6   
  7  """ 
  8  Various functions used by other modules 
  9  @var invalid_chars_in_filename: a mix of characters not permitted in most used filesystems 
 10  @var invalid_windows_name: a list of unauthorized filenames under Windows 
 11  """ 
 12   
 13  import sys 
 14   
 15  invalid_chars_in_filename=b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f' \ 
 16                            b'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' \ 
 17                            b'<>:"/\\|?*\%\'' 
 18   
 19  invalid_windows_name=[b'CON', b'PRN', b'AUX', b'NUL', b'COM1', b'COM2', b'COM3',  
 20                        b'COM4', b'COM5', b'COM6', b'COM7', b'COM8', b'COM9',  
 21                        b'LPT1', b'LPT2', b'LPT3', b'LPT4', b'LPT5', b'LPT6', b'LPT7', 
 22                        b'LPT8', b'LPT9' ] 
 23   
24 -def sanitize_filename(filename, alt_name, alt_ext):
25 """ 26 Convert the given filename into a name that should work on all 27 platform. Remove non us-ascii characters, and drop invalid filename. 28 Use the I{alternative} filename if needed. 29 30 @type filename: unicode or None 31 @param filename: the originale filename or None. Can be unicode. 32 @type alt_name: str 33 @param alt_name: the alternative filename if filename is None or useless 34 @type alt_ext: str 35 @param alt_ext: the alternative filename extension (including the '.') 36 37 @rtype: str 38 @returns: a valid filename. 39 40 >>> sanitize_filename('document.txt', 'file', '.txt') 41 'document.txt' 42 >>> sanitize_filename('number1.txt', 'file', '.txt') 43 'number1.txt' 44 >>> sanitize_filename(None, 'file', '.txt') 45 'file.txt' 46 >>> sanitize_filename(u'R\\xe9pertoir.txt', 'file', '.txt') 47 'Rpertoir.txt' 48 >>> # the '\\xe9' has been removed 49 >>> sanitize_filename(u'\\xe9\\xe6.html', 'file', '.txt') 50 'file.html' 51 >>> # all non us-ascii characters have been removed, the alternative name 52 >>> # has been used the replace empty string. The originale extention 53 >>> # is still valid 54 >>> sanitize_filename(u'COM1.txt', 'file', '.txt') 55 'COM1A.txt' 56 >>> # if name match an invalid name or assimilated then a A is added 57 """ 58 59 if not filename: 60 return alt_name+alt_ext 61 62 if ((sys.version_info<(3, 0) and isinstance(filename, unicode)) or \ 63 (sys.version_info>=(3, 0) and isinstance(filename, str))): 64 filename=filename.encode('ascii', 'ignore') 65 66 filename=filename.translate(None, invalid_chars_in_filename) 67 filename=filename.strip() 68 69 upper=filename.upper() 70 for name in invalid_windows_name: 71 if upper==name: 72 filename=filename+b'A' 73 break 74 if upper.startswith(name+b'.'): 75 filename=filename[:len(name)]+b'A'+filename[len(name):] 76 break 77 78 if sys.version_info>=(3, 0): 79 # back to string 80 filename=filename.decode('us-ascii') 81 82 if filename.rfind('.')==0: 83 filename=alt_name+filename 84 85 return filename
86
87 -def handle_filename_collision(filename, filenames):
88 """ 89 Avoid filename collision, add a sequence number to the name when required. 90 'file.txt' will be renamed into 'file-01.txt' then 'file-02.txt' ... 91 until their is no more collision. The file is not added to the list. 92 93 Windows don't make the difference between lower and upper case. To avoid 94 "case" collision, the function compare C{filename.lower()} to the list. 95 If you provide a list in lower case only, then any collisions will be avoided. 96 97 @type filename: str 98 @param filename: the filename 99 @type filenames: list or set 100 @param filenames: a list of filenames. 101 102 @rtype: str 103 @returns: the I{filename} or the appropriately I{indexed} I{filename} 104 105 >>> handle_filename_collision('file.txt', [ ]) 106 'file.txt' 107 >>> handle_filename_collision('file.txt', [ 'file.txt' ]) 108 'file-01.txt' 109 >>> handle_filename_collision('file.txt', [ 'file.txt', 'file-01.txt',]) 110 'file-02.txt' 111 >>> handle_filename_collision('foo', [ 'foo',]) 112 'foo-01' 113 >>> handle_filename_collision('foo', [ 'foo', 'foo-01',]) 114 'foo-02' 115 >>> handle_filename_collision('FOO', [ 'foo', 'foo-01',]) 116 'FOO-02' 117 """ 118 if filename.lower() in filenames: 119 try: 120 basename, ext=filename.rsplit('.', 1) 121 ext='.'+ext 122 except ValueError: 123 basename, ext=filename, '' 124 125 i=1 126 while True: 127 filename='%s-%02d%s' % (basename, i, ext) 128 if filename.lower() not in filenames: 129 break 130 i+=1 131 132 return filename
133
134 -def is_usascii(value):
135 """" 136 test if string contains us-ascii characters only 137 138 >>> is_usascii('foo') 139 True 140 >>> is_usascii(u'foo') 141 True 142 >>> is_usascii(u'Fran\xe7ais') 143 False 144 >>> is_usascii('bad\x81') 145 False 146 """ 147 try: 148 # if value is byte string, it will be decoded first using us-ascii 149 # and will generate UnicodeEncodeError, this is fine too 150 value.encode('us-ascii') 151 except UnicodeError: 152 return False 153 154 return True
155