Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import re 

2import os 

3import time 

4import tornado.gen 

5import gramex.data 

6import sqlalchemy as sa 

7from string import ascii_lowercase, digits 

8from random import choice 

9from mimetypes import guess_type 

10from tornado.web import HTTPError 

11from gramex.config import objectpath 

12from gramex.http import NOT_FOUND, REQUEST_ENTITY_TOO_LARGE, UNSUPPORTED_MEDIA_TYPE 

13from .formhandler import FormHandler 

14 

15 

16class DriveHandler(FormHandler): 

17 ''' 

18 Lets users manage files. Here's a typical configuration:: 

19 

20 path: $GRAMEXDATA/apps/appname/ # Save files here 

21 user_fields: [id, role, hd] # user attributes to store 

22 tags: [tag] # <input name=""> to store 

23 allow: [.doc, .docx] # Only allow these files 

24 ignore: [.pdf] # Don't allow these files 

25 max_file_size: 100000 # Files must be smaller than this 

26 redirect: # After uploading the file, 

27 query: next # ... redirect to ?next= 

28 url: /$YAMLURL/ # ... else to this directory 

29 

30 File metadata is stored in <path>/.meta.db as SQLite 

31 ''' 

32 @classmethod 

33 def setup(cls, path, user_fields=None, tags=None, allow=None, ignore=None, max_file_size=None, 

34 **kwargs): 

35 cls.path = path 

36 cls.user_fields = cls._ensure_type('user_fields', user_fields) 

37 cls.tags = cls._ensure_type('tags', tags) 

38 cls.allow = allow or [] 

39 cls.ignore = ignore or [] 

40 cls.max_file_size = max_file_size or 0 

41 if not os.path.exists(path): 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 os.makedirs(path, exist_ok=True) 

43 

44 # Set up the parent FormHandler with a single SQLite URL and table 

45 url, table = 'sqlite:///' + os.path.join(path, '.meta.db'), 'drive' 

46 kwargs.update(url=url, table=table, id='id') 

47 cls.special_keys += ['path', 'user_fields', 'tags', 'allow', 'ignore', 'max_file_size'] 

48 super().setup(**kwargs) 

49 

50 # Ensure all tags and user_fields are present in "drive" table 

51 engine = sa.create_engine(url) 

52 meta = sa.MetaData(bind=engine) 

53 meta.reflect() 

54 cls._db_cols = { 

55 'id': sa.Column('id', sa.Integer, primary_key=True, autoincrement=True), 

56 'file': sa.Column('file', sa.Text), # Original file name 

57 'ext': sa.Column('ext', sa.Text), # Original file extension 

58 'path': sa.Column('path', sa.Text), # Saved file relative path 

59 'size': sa.Column('size', sa.Integer), # File size 

60 'mime': sa.Column('mime', sa.Text), # MIME type 

61 'date': sa.Column('date', sa.Integer), # Uploaded date 

62 } 

63 for s in cls.user_fields: 

64 cls._db_cols['user_%s' % s] = sa.Column('user_%s' % s, sa.String) 

65 for s in cls.tags: 

66 cls._db_cols.setdefault(s, sa.Column(s, sa.String)) 

67 if table in meta.tables: 67 ↛ 74line 67 didn't jump to line 74, because the condition on line 67 was never false

68 with engine.connect() as conn: 

69 with conn.begin(): 

70 for col, coltype in cls._db_cols.items(): 

71 if col not in meta.tables[table].columns: 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true

72 conn.execute('ALTER TABLE %s ADD COLUMN %s TEXT' % (table, col)) 

73 else: 

74 sa.Table(table, meta, *cls._db_cols.values()).create(engine) 

75 

76 # If ?_download=...&id=..., then download the file via modify: 

77 def download_plugin(data, key, handler): 

78 data = original_modify(data, key, handler) 

79 ids = handler.args.get('id', []) 

80 if len(ids) != 1 or '_download' not in handler.args: 

81 return data 

82 if len(data) == 0: 

83 raise HTTPError(NOT_FOUND, 'No file record with id=%s' % ids[0]) 

84 path = os.path.join(handler.path, data['path'][0]) 

85 if not os.path.exists(path): 85 ↛ 86line 85 didn't jump to line 86, because the condition on line 85 was never true

86 raise HTTPError(NOT_FOUND, 'Missing file for id=%s' % ids[0]) 

87 handler.set_header('Content-Type', data['mime'][0]) 

88 handler.set_header('Content-Length', os.stat(path).st_size) 

89 handler.set_header( 

90 'Content-Disposition', 'attachment; filename="%s"' % data['file'][0]) 

91 with open(path, 'rb') as handle: 

92 return handle.read() 

93 

94 original_modify = cls.datasets['data'].get('modify', lambda v, *args: v) 94 ↛ exitline 94 didn't run the lambda on line 94

95 cls.datasets['data']['modify'] = download_plugin 

96 

97 def check_filelimits(self): 

98 allow = set(ext.lower() for ext in self.allow) 

99 ignore = set(ext.lower() for ext in self.ignore) 

100 for name, ext, size in zip(self.args['file'], self.args['ext'], self.args['size']): 

101 if self.max_file_size and size > self.max_file_size: 

102 raise HTTPError(REQUEST_ENTITY_TOO_LARGE, '%s: %d > %d' % ( 

103 name, size, self.max_file_size)) 

104 if ext in ignore or (allow and ext not in allow): 

105 raise HTTPError(UNSUPPORTED_MEDIA_TYPE, name) 

106 

107 @tornado.gen.coroutine 

108 def post(self, *path_args, **path_kwargs): 

109 '''Saves uploaded files, then updates metadata DB''' 

110 user = self.current_user or {} 

111 uploads = self.request.files.get('file', []) 

112 n = len(uploads) 

113 # Initialize all DB columns (except ID) to have the same number of rows as uploads 

114 for key, col in list(self._db_cols.items())[1:]: 

115 self.args[key] = self.args.get(key, []) + [col.type.python_type()] * n 

116 for key in self.args: 

117 self.args[key] = self.args[key][:n] 

118 for i, upload in enumerate(uploads): 

119 file = os.path.basename(upload.get('filename', '')) 

120 ext = os.path.splitext(file)[1] 

121 path = re.sub(r'[^!#$%&()+,.0-9;<=>@A-Z\[\]^`a-z{}~]', '-', file) 

122 while os.path.exists(os.path.join(self.path, path)): 

123 path = os.path.splitext(path)[0] + choice(digits + ascii_lowercase) + ext 

124 self.args['file'][i] = file 

125 self.args['ext'][i] = ext.lower() 

126 self.args['path'][i] = path 

127 self.args['size'][i] = len(upload['body']) 

128 self.args['date'][i] = int(time.time()) 

129 # Guess MIME type from filename if it's unknown 

130 self.args['mime'][i] = upload['content_type'] 

131 if self.args['mime'][i] == 'application/unknown': 

132 self.args['mime'][i] = guess_type(file, strict=False)[0] 

133 # Append user attributes 

134 for s in self.user_fields: 

135 self.args['user_%s' % s.replace('.', '_')][i] = objectpath(user, s) 

136 self.check_filelimits() 

137 yield super().post(*path_args, **path_kwargs) 

138 for upload, path in zip(uploads, self.args['path']): 

139 with open(os.path.join(self.path, path), 'wb') as handle: 

140 handle.write(upload['body']) 

141 

142 @tornado.gen.coroutine 

143 def delete(self, *path_args, **path_kwargs): 

144 '''Deletes files from metadata DB and from file system''' 

145 conf = self.datasets.data 

146 files = gramex.data.filter(conf.url, table=conf.table, args=self.args) 

147 result = yield super().delete(*path_args, **path_kwargs) 

148 for index, row in files.iterrows(): 

149 path = os.path.join(self.path, row['path']) 

150 if os.path.exists(path): 150 ↛ 148line 150 didn't jump to line 148, because the condition on line 150 was never false

151 os.remove(path) 

152 return result 

153 

154 @tornado.gen.coroutine 

155 def put(self, *path_args, **path_kwargs): 

156 '''Update attributes and files''' 

157 # PUT can update only 1 ID at a time. Use only the first upload, if any 

158 uploads = self.request.files.get('file', [])[:1] 

159 id = self.args.get('id', [-1]) 

160 # User cannot change the path, size, date or user attributes 

161 for s in ('path', 'size', 'date'): 

162 self.args.pop(s, None) 

163 for s in self.user_fields: 

164 self.args.pop('user_%s' % s, None) 

165 # These are updated only when a file is uploaded 

166 if len(uploads): 

167 user = self.current_user or {} 

168 self.args.setdefault('size', []).append(len(uploads[0]['body'])) 

169 self.args.setdefault('date', []).append(int(time.time())) 

170 for s in self.user_fields: 

171 self.args.setdefault('user_%s' % s.replace('.', '_'), []).append( 

172 objectpath(user, s)) 

173 conf = self.datasets.data 

174 files = gramex.data.filter(conf.url, table=conf.table, args={'id': id}) 

175 result = yield super().put(*path_args, **path_kwargs) 

176 if len(uploads) and len(files): 

177 path = os.path.join(self.path, files['path'].iloc[0]) 

178 with open(path, 'wb') as handle: 

179 handle.write(uploads[0]['body']) 

180 return result 

181 

182 @classmethod 

183 def _ensure_type(cls, field, values): 

184 if isinstance(values, dict): 

185 return values 

186 if isinstance(values, (list, tuple)): 186 ↛ 188line 186 didn't jump to line 188, because the condition on line 186 was never false

187 return {v: 'str' for v in values if v} 

188 if isinstance(values, str) and values: 

189 return {values: 'str'} 

190 if not values: 

191 return {} 

192 raise TypeError('%s: %s should be a dict, not %s' % (cls.name, field, values))