Coverage for jutil/format.py : 82%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import csv
2import html
3import json
4import logging
5import os
6import re
7import tempfile
8from collections import OrderedDict
9from datetime import timedelta
10from decimal import Decimal
11import subprocess
12from io import StringIO
13from typing import List, Any, Optional, Union, Dict, Sequence, Tuple, TypeVar
14from django.conf import settings
15from django.core.exceptions import ValidationError
16from django.utils.functional import lazy
17import xml.dom.minidom # type: ignore
18from django.utils.safestring import mark_safe
19from django.utils.text import capfirst
21logger = logging.getLogger(__name__)
23S = TypeVar('S')
26def format_full_name(first_name: str, last_name: str, max_length: int = 20) -> str:
27 """
28 Limits name length to specified length. Tries to keep name as human-readable an natural as possible.
29 :param first_name: First name
30 :param last_name: Last name
31 :param max_length: Maximum length
32 :return: Full name of shortened version depending on length
33 """
34 # dont allow commas in limited names
35 first_name = first_name.replace(',', ' ')
36 last_name = last_name.replace(',', ' ')
38 # accept short full names as is
39 original_full_name = first_name + ' ' + last_name
40 if len(original_full_name) <= max_length:
41 return original_full_name
43 # drop middle names
44 first_name = first_name.split(' ')[0]
45 full_name = first_name + ' ' + last_name
46 if len(full_name) <= max_length:
47 return full_name
49 # drop latter parts of combined first names
50 first_name = re.split(r'[\s\-]', first_name)[0]
51 full_name = first_name + ' ' + last_name
52 if len(full_name) <= max_length:
53 return full_name
55 # drop latter parts of multi part last names
56 last_name = re.split(r'[\s\-]', last_name)[0]
57 full_name = first_name + ' ' + last_name
58 if len(full_name) <= max_length:
59 return full_name
61 # shorten last name to one letter
62 last_name = last_name[:1]
64 full_name = first_name + ' ' + last_name
65 if len(full_name) > max_length:
66 raise Exception('Failed to shorten name {}'.format(original_full_name))
67 return full_name
70def format_timedelta(dt: timedelta, days_label: str = 'd', hours_label: str = 'h',
71 minutes_label: str = 'min', seconds_label: str = 's') -> str:
72 """
73 Formats timedelta to readable format, e.g. 1h30min15s.
74 :param dt: timedelta
75 :param days_label: Label for days. Leave empty '' if value should be skipped / ignored.
76 :param hours_label: Label for hours. Leave empty '' if value should be skipped / ignored.
77 :param minutes_label: Label for minutes. Leave empty '' if value should be skipped / ignored.
78 :param seconds_label: Label for seconds. Leave empty '' if value should be skipped / ignored.
79 :return: str
80 """
81 parts = (
82 (86400, days_label),
83 (3600, hours_label),
84 (60, minutes_label),
85 (1, seconds_label),
86 )
87 out = ""
88 seconds_f = dt.total_seconds()
89 seconds = int(seconds_f)
90 for n_secs, label in parts:
91 n, remainder = divmod(seconds, n_secs)
92 if n > 0 and label:
93 out += str(n) + label
94 seconds = remainder
95 out_str = out.strip()
96 if not out_str:
97 if seconds_f >= 0.001: 97 ↛ 100line 97 didn't jump to line 100, because the condition on line 97 was never false
98 out_str = '{:0.3f}'.format(int(seconds_f * 1000.0) * 0.001) + seconds_label
99 else:
100 out_str = '0' + seconds_label
101 return out_str.strip()
104def format_xml(content: str, encoding: str = 'UTF-8', exceptions: bool = False) -> str:
105 """
106 Formats XML document as human-readable plain text.
107 If settings.XMLLINT_PATH is defined xmllint is used for formatting (higher quality). Otherwise minidom toprettyxml is used.
108 :param content: XML data as str
109 :param encoding: XML file encoding
110 :param exceptions: Raise exceptions on error
111 :return: str (Formatted XML str)
112 """
113 assert isinstance(content, str)
114 try:
115 if hasattr(settings, 'XMLLINT_PATH') and settings.XMLLINT_PATH: 115 ↛ 121line 115 didn't jump to line 121, because the condition on line 115 was never false
116 with tempfile.NamedTemporaryFile() as fp:
117 fp.write(content.encode(encoding=encoding))
118 fp.flush()
119 out = subprocess.check_output([settings.XMLLINT_PATH, '--format', fp.name])
120 return out.decode(encoding=encoding)
121 return xml.dom.minidom.parseString(content).toprettyxml()
122 except Exception as e:
123 logger.error('format_xml failed: %s', e)
124 if exceptions:
125 raise
126 return content
129def format_xml_bytes(content: bytes, encoding: str = 'UTF-8', exceptions: bool = False) -> bytes:
130 """
131 Formats XML document as human-readable plain text and returns result in bytes.
132 If settings.XMLLINT_PATH is defined xmllint is used for formatting (higher quality). Otherwise minidom toprettyxml is used.
133 :param content: XML data as bytes
134 :param encoding: XML file encoding
135 :param exceptions: Raise exceptions on error
136 :return: bytes (Formatted XML as bytes)
137 """
138 assert isinstance(content, bytes)
139 try:
140 if hasattr(settings, 'XMLLINT_PATH') and settings.XMLLINT_PATH: 140 ↛ 146line 140 didn't jump to line 146, because the condition on line 140 was never false
141 with tempfile.NamedTemporaryFile() as fp:
142 fp.write(content)
143 fp.flush()
144 out = subprocess.check_output([settings.XMLLINT_PATH, '--format', fp.name])
145 return out
146 return xml.dom.minidom.parseString(content.decode(encoding=encoding)).toprettyxml(encoding=encoding)
147 except Exception as e:
148 logger.error('format_xml_bytes failed: %s', e)
149 if exceptions:
150 raise
151 return content
154def format_xml_file(full_path: str, encoding: str = 'UTF-8', exceptions: bool = False) -> bytes:
155 """
156 Formats XML file as human-readable plain text and returns result in bytes.
157 Tries to format XML file first, if formatting fails the file content is returned as is.
158 If the file does not exist empty bytes is returned.
159 If settings.XMLLINT_PATH is defined xmllint is used for formatting (higher quality). Otherwise minidom toprettyxml is used.
160 :param full_path: Full path to XML file
161 :param encoding: XML file encoding
162 :param exceptions: Raise exceptions on error
163 :return: bytes
164 """
165 try:
166 if hasattr(settings, 'XMLLINT_PATH') and settings.XMLLINT_PATH:
167 return subprocess.check_output([settings.XMLLINT_PATH, '--format', full_path])
168 with open(full_path, 'rb') as fp:
169 return xml.dom.minidom.parse(fp).toprettyxml(encoding=encoding)
170 except Exception as e:
171 logger.error('format_xml_file failed (1): %s', e)
172 if exceptions:
173 raise
174 try:
175 with open(full_path, 'rb') as fp:
176 return fp.read()
177 except Exception as e:
178 logger.error('format_xml_file failed (2): %s', e)
179 return b''
182def format_as_html_json(value: Any) -> str:
183 """
184 Returns value as JSON-formatted value in HTML.
185 :param value: Any value which can be converted to JSON by json.dumps
186 :return: str
187 """
188 return mark_safe(html.escape(json.dumps(value, indent=4)).replace('\n', '<br/>').replace(' ', ' '))
191def _format_dict_as_html_key(k: str) -> str:
192 if k.startswith('@'):
193 k = k[1:]
194 k = k.replace('_', ' ')
195 k = re.sub(r'((?<=[a-z])[A-Z]|(?<!\A)[A-Z](?=[a-z]))', r' \1', k)
196 parts = k.split(' ')
197 out: List[str] = [str(capfirst(parts[0].strip()))]
198 for p in parts[1:]:
199 p2 = p.strip().lower()
200 if p2: 200 ↛ 198line 200 didn't jump to line 198, because the condition on line 200 was never false
201 out.append(p2)
202 return ' '.join(out)
205def _format_dict_as_html_r(data: Dict[str, Any], margin: str = '', format_keys: bool = True) -> str:
206 if not isinstance(data, dict): 206 ↛ 207line 206 didn't jump to line 207, because the condition on line 206 was never true
207 return '{}{}\n'.format(margin, data)
208 out = ''
209 for k, v in OrderedDict(sorted(data.items())).items():
210 if isinstance(v, dict):
211 out += '{}{}:\n'.format(margin, _format_dict_as_html_key(k) if format_keys else k)
212 out += _format_dict_as_html_r(v, margin + ' ', format_keys=format_keys)
213 out += '\n'
214 elif isinstance(v, list): 214 ↛ 215line 214 didn't jump to line 215, because the condition on line 214 was never true
215 for v2 in v:
216 out += '{}{}:\n'.format(margin, _format_dict_as_html_key(k) if format_keys else k)
217 out += _format_dict_as_html_r(v2, margin + ' ', format_keys=format_keys)
218 out += '\n'
219 else:
220 out += '{}{}: {}\n'.format(margin, _format_dict_as_html_key(k) if format_keys else k, v)
221 return out
224def format_dict_as_html(data: Dict[str, Any], format_keys: bool = True) -> str:
225 """
226 Formats dict to simple human readable pre-formatted html (<pre> tag).
227 :param data: dict
228 :param format_keys: Re-format 'additionalInfo' and 'additional_info' type of keys as 'Additional info'
229 :return: str (html)
230 """
231 return '<pre>' + _format_dict_as_html_r(data, format_keys=format_keys) + '</pre>'
234def format_csv(rows: List[List[Any]], dialect: str = 'excel') -> str:
235 """
236 Formats rows to CSV string content.
237 :param rows: List[List[Any]]
238 :param dialect: See csv.writer dialect
239 :return: str
240 """
241 f = StringIO()
242 writer = csv.writer(f, dialect=dialect)
243 for row in rows:
244 writer.writerow(row)
245 return f.getvalue()
248def format_table(rows: List[List[Any]], max_col: Optional[int] = None, max_line: Optional[int] = 200, # noqa
249 col_sep: str = '|', row_sep: str = '-', row_begin: str = '|', row_end: str = '|',
250 has_label_row: bool = False,
251 left_align: Optional[List[int]] = None, center_align: Optional[List[int]] = None) -> str:
252 """
253 Formats "ASCII-table" rows by padding column widths to longest column value, optionally limiting column widths.
254 Optionally separates colums with ' | ' character and header row with '-' characters.
255 Supports left, right and center alignment. Useful for console apps / debugging.
257 :param rows: List[List[Any]]
258 :param max_col: Max column value width. Pass None for unlimited length.
259 :param max_line: Maximum single line length. Exceeding columns truncated. Pass None for unlimited length.
260 :param col_sep: Column separator string.
261 :param row_sep: Row separator character used before first row, end, after first row (if has_label_row).
262 :param row_begin: Row begin string, inserted before each row.
263 :param row_end: Row end string, appended after each row.
264 :param has_label_row: Set to True if table starts with column label row.
265 :param left_align: Indexes of left-aligned columns. By default all are right aligned.
266 :param center_align: Indexes of center-aligned columns. By default all are right aligned.
267 :return: str
268 """
269 # validate parameters
270 assert max_col is None or max_col > 2
271 if left_align is None:
272 left_align = []
273 if center_align is None:
274 center_align = []
275 if left_align:
276 if set(left_align) & set(center_align): 276 ↛ 277line 276 didn't jump to line 277, because the condition on line 276 was never true
277 raise ValidationError('Left align columns {} overlap with center align {}'.format(left_align, center_align))
279 # find out number of columns
280 ncols = 0
281 for row in rows:
282 ncols = max(ncols, len(row))
284 # find out full-width column lengths
285 col_lens0: List[int] = [0] * ncols
286 for row in rows:
287 for ix, v in enumerate(row):
288 v = str(v)
289 col_lens0[ix] = max(col_lens0[ix], len(v))
291 # adjust max_col if needed
292 if max_line and (not max_col or sum(col_lens0) > max_line): 292 ↛ 293line 292 didn't jump to line 293, because the condition on line 292 was never true
293 max_col = max_line // ncols
295 # length limited lines and final column lengths
296 col_lens = [0] * ncols
297 lines: List[List[str]] = []
298 for row in rows:
299 line = []
300 for ix, v in enumerate(row):
301 v = str(v)
302 if max_col and len(v) > max_col:
303 v = v[:max_col-2] + '..'
304 line.append(v)
305 col_lens[ix] = max(col_lens[ix], len(v))
306 while len(line) < ncols: 306 ↛ 307line 306 didn't jump to line 307, because the condition on line 306 was never true
307 line.append('')
308 lines.append(line)
310 # padded lines
311 lines2: List[List[str]] = []
312 for line in lines:
313 line2 = []
314 for ix, v in enumerate(line):
315 col_len = col_lens[ix]
316 if len(v) < col_len:
317 if ix in left_align:
318 v = v + ' ' * (col_len - len(v))
319 elif ix in center_align:
320 pad = col_len - len(v)
321 lpad = int(pad/2)
322 rpad = pad - lpad
323 v = ' ' * lpad + v + ' '*rpad
324 else:
325 v = ' '*(col_len-len(v)) + v
326 line2.append(v)
327 lines2.append(line2)
329 # calculate max number of columns and max line length
330 max_line_len = 0
331 col_sep_len = len(col_sep)
332 ncols0 = ncols
333 for line in lines2:
334 if max_line is not None: 334 ↛ 333line 334 didn't jump to line 333, because the condition on line 334 was never false
335 line_len = len(row_begin) + sum(len(v)+col_sep_len for v in line[:ncols]) - col_sep_len + len(row_end)
336 while line_len > max_line:
337 ncols -= 1
338 line_len = len(row_begin) + sum(len(v)+col_sep_len for v in line[:ncols]) - col_sep_len + len(row_end)
339 max_line_len = max(max_line_len, line_len)
341 # find out how we should terminate lines/rows
342 line_term = ''
343 row_sep_term = ''
344 if ncols0 > ncols:
345 line_term = '..'
346 row_sep_term = row_sep * int(2 / len(row_sep))
348 # final output with row and column separators
349 lines3 = []
350 if row_sep: 350 ↛ 352line 350 didn't jump to line 352, because the condition on line 350 was never false
351 lines3.append(row_sep * max_line_len + row_sep_term)
352 for line_ix, line in enumerate(lines2):
353 while len(line) > ncols:
354 line.pop()
355 line_out = col_sep.join(line)
356 lines3.append(row_begin + line_out + row_end + line_term)
357 if line_ix == 0 and row_sep and has_label_row:
358 lines3.append(row_sep * max_line_len + row_sep_term)
359 if row_sep: 359 ↛ 361line 359 didn't jump to line 361, because the condition on line 359 was never false
360 lines3.append(row_sep * max_line_len + row_sep_term)
361 return '\n'.join(lines3)
364def ucfirst(v: str) -> str:
365 """
366 Converts first character of the string to uppercase.
367 :param v: str
368 :return: str
369 """
370 return v[0:1].upper() + v[1:]
373ucfirst_lazy = lazy(ucfirst, str)
376def dec1(a: Union[float, int, Decimal, str]) -> Decimal:
377 """
378 Converts number to Decimal with 1 decimal digits.
379 :param a: Number
380 :return: Decimal with 1 decimal digits
381 """
382 return Decimal(a).quantize(Decimal('1.0'))
385def dec2(a: Union[float, int, Decimal, str]) -> Decimal:
386 """
387 Converts number to Decimal with 2 decimal digits.
388 :param a: Number
389 :return: Decimal with 2 decimal digits
390 """
391 return Decimal(a).quantize(Decimal('1.00'))
394def dec3(a: Union[float, int, Decimal, str]) -> Decimal:
395 """
396 Converts number to Decimal with 3 decimal digits.
397 :param a: Number
398 :return: Decimal with 3 decimal digits
399 """
400 return Decimal(a).quantize(Decimal('1.000'))
403def dec4(a: Union[float, int, Decimal, str]) -> Decimal:
404 """
405 Converts number to Decimal with 4 decimal digits.
406 :param a: Number
407 :return: Decimal with 4 decimal digits
408 """
409 return Decimal(a).quantize(Decimal('1.0000'))
412def dec5(a: Union[float, int, Decimal, str]) -> Decimal:
413 """
414 Converts number to Decimal with 5 decimal digits.
415 :param a: Number
416 :return: Decimal with 4 decimal digits
417 """
418 return Decimal(a).quantize(Decimal('1.00000'))
421def dec6(a: Union[float, int, Decimal, str]) -> Decimal:
422 """
423 Converts number to Decimal with 6 decimal digits.
424 :param a: Number
425 :return: Decimal with 4 decimal digits
426 """
427 return Decimal(a).quantize(Decimal('1.000000'))
430def is_media_full_path(file_path: str) -> bool:
431 """
432 Checks if file path is under (settings) MEDIA_ROOT.
433 """
434 return hasattr(settings, 'MEDIA_ROOT') and settings.MEDIA_ROOT and os.path.isabs(file_path) and \
435 os.path.realpath(file_path).startswith(settings.MEDIA_ROOT)
438def strip_media_root(file_path: str) -> str:
439 """
440 If file path starts with (settings) MEDIA_ROOT,
441 the MEDIA_ROOT part gets stripped and only relative path is returned.
442 Otherwise file path is returned as is. This enabled stored file names in more
443 portable format for different environment / storage.
444 If MEDIA_ROOT is missing or empty, the filename is returned as is.
445 Reverse operation of this is get_media_full_path().
446 :param file_path: str
447 :return: str
448 """
449 full_path = os.path.realpath(file_path)
450 if not is_media_full_path(file_path): 450 ↛ 451line 450 didn't jump to line 451, because the condition on line 450 was never true
451 logger.error('strip_media_root() expects absolute path under MEDIA_ROOT, got %s (%s)', file_path, full_path)
452 raise ValueError('strip_media_root() expects absolute path under MEDIA_ROOT')
453 file_path = full_path[len(settings.MEDIA_ROOT):]
454 if file_path.startswith('/'): 454 ↛ 456line 454 didn't jump to line 456, because the condition on line 454 was never false
455 return file_path[1:]
456 return file_path
459def get_media_full_path(file_path: str) -> str:
460 """
461 Returns the absolute path from a (relative) path to (settings) MEDIA_ROOT.
462 This enabled stored file names in more portable format for different environment / storage.
463 If MEDIA_ROOT is missing or non-media path is passed to function, exception is raised.
464 Reverse operation of this is strip_media_root().
465 :param file_path: str
466 :return: str
467 """
468 full_path = os.path.realpath(file_path) if os.path.isabs(file_path) else os.path.join(settings.MEDIA_ROOT, file_path)
469 if not is_media_full_path(full_path): 469 ↛ 470line 469 didn't jump to line 470, because the condition on line 469 was never true
470 logger.error('get_media_full_path() expects relative path to MEDIA_ROOT, got %s (%s)', file_path, full_path)
471 raise ValueError('get_media_full_path() expects relative path to MEDIA_ROOT')
472 return full_path
475def camel_case_to_underscore(s: str) -> str:
476 """
477 Converts camelCaseWord to camel_case_word.
478 :param s: str
479 :return: str
480 """
481 if s: 481 ↛ 485line 481 didn't jump to line 485, because the condition on line 481 was never false
482 s = re.sub(r"([A-Z]+)([A-Z][a-z])", r'\1_\2', s)
483 s = re.sub(r"([a-z\d])([A-Z])", r'\1_\2', s)
484 s = s.replace("-", "_")
485 return s.lower()
488def underscore_to_camel_case(s: str) -> str:
489 """
490 Converts under_score_word to underScoreWord.
491 :param s: str
492 :return: str
493 """
494 if s: 494 ↛ 497line 494 didn't jump to line 497, because the condition on line 494 was never false
495 p = s.split('_')
496 s = p[0] + ''.join([ucfirst(w) for w in p[1:]])
497 return s
500def choices_label(choices: Sequence[Tuple[S, str]], value: S) -> str:
501 """
502 Iterates (value,label) list and returns label matching the choice
503 :param choices: [(choice1, label1), (choice2, label2), ...]
504 :param value: Value to find
505 :return: label or None
506 """
507 for key, label in choices: 507 ↛ 510line 507 didn't jump to line 510, because the loop on line 507 didn't complete
508 if key == value: 508 ↛ 507line 508 didn't jump to line 507, because the condition on line 508 was never false
509 return label
510 return ''