Source code for upsies.utils.string

"""
String formatting and parsing
"""

import os
import re
import sys

from .. import errors, utils

import logging  # isort:skip
_log = logging.getLogger(__name__)


[docs] def normalize_comma_separated_list(text, *, lower=False, sort=False): """ Split and re-join `text` at ``,`` :param bool lower: Whether `text` is lower-cased :param bool sort: Whether items are sorted """ if lower: text = text.lower() # Split at ",". lst = (item.strip() for item in text.split(',')) # Remove empty items. lst = (item for item in lst if item) # Deduplicate items while keeping order. lst = dict.fromkeys(lst) if sort: lst = sorted(lst) return ', '.join(item for item in lst)
_max_nfo_size = 1048576
[docs] def read_nfo(path, *, strip=False): """ Recursively search directory for ``*.nfo`` files and read the first one found `path` may also be an nfo file. The nfo file is decoded with :func:`decode_nfo`. Files larger than 1 MiB are ignored. If no ``*.nfo`` file is found, return `None`. :param bool strip: See :func:`decode_nfo` :raise ContentError: if the nfo file is not readable """ if os.path.isdir(path): # Look only for *.nfo files. extensions = ('nfo', 'NFO') else: # `path` is a file. Ignore its extension. extensions = () # Try all *.nfo files before raising an exception. We don't want to raise on the first file if # the second file is perfectly fine. failures = [] for nfo_filepath in utils.fs.file_list(path, extensions=extensions): try: if os.path.getsize(nfo_filepath) <= _max_nfo_size: with open(nfo_filepath, 'rb') as f: return decode_nfo(f.read(), strip=strip) except OSError as e: failures.append(( nfo_filepath, e.strerror or str(e), )) if failures: nfo_filepath, msg = failures[0] raise errors.ContentError(f'Failed to read nfo: {nfo_filepath}: {msg}')
[docs] def decode_nfo(bytes, *, strip=False): r""" Return decoded `bytes` Try to decode as UTF-8 first. If that fails, decode as CP437 and replace invalid characters with "�" (U+FFFD). All line breaks (e.g. CR+LF) are converted to "\n". :param bool strip: Whether to remove whitespace at the beginning and end .. note:: To preserve ASCII art, spaces at the beginning of the first non-empty line are kept. """ try: text = bytes.decode('utf8', 'strict') except UnicodeDecodeError: text = bytes.decode('cp437', 'replace') # Replace all line breaks (e.g. CR+LF) with "\n". # NOTE: str.splitlines() does not preserve trailing line breaks. for linebreak in ('\r\n', '\r'): text = text.replace(linebreak, '\n') if strip: # Remove any whitespace at the end. text = text.rstrip() # Remove empty lines at the beginning while keeping any leading spaces # on the first non-empty line to preserve ASCII art. text = re.sub(r'^\s*?\n(\s*)', r'\1', text) return text
_capitalize_regex = re.compile(r'(\s*)(\S+)(\s*)')
[docs] def capitalize(text): """ Capitalize each word in `text` Unlike :meth:`str.title`, only words at in front of a space or at the beginning of `text` are capitalized. """ return ''.join( match.group(1) + match.group(2).capitalize() + match.group(3) for match in re.finditer(_capitalize_regex, text) )
[docs] def star_rating(rating, max_rating=10): """ Return star rating string with the characters "★" (U+2605), "⯪" (U+2BEA) and "☆" (U+2605) :param float,int rating: Number between 0 and `max_rating` :param float,int max_rating: Maximum rating """ import math rating = min(max_rating, max(0, rating)) left = '\u2605' * math.floor(rating) if rating >= max_rating: middle = '' # Avoid floating point precision issues by rounding to 1 digit after comma elif round(rating % 1, 1) <= 0.3: middle = '\u2606' # Empty star elif round(rating % 1, 1) < 0.7: middle = '\u2bea' # Half star else: middle = '\u2605' # Full star right = '\u2606' * (math.ceil(max_rating - rating) - 1) return f'{left}{middle}{right}'
if sys.version_info >= (3, 9, 0): def remove_prefix(string, prefix): return string.removeprefix(prefix) def remove_suffix(string, suffix): return string.removesuffix(suffix) else:
[docs] def remove_prefix(string, prefix): if string.startswith(prefix): return string[len(prefix):] else: return string
[docs] def remove_suffix(string, suffix): if string.endswith(suffix): return string[:-len(suffix)] else: return string
[docs] def evaluate_fstring(template, **variables): """ Deferred f-string evaluation Unlike ``"Bar: {foo}".format(foo="bar")``, this function will also evaluate function calls inside the curly braces. .. warning:: Because `template` is passed to :func:`eval`, this function must NEVER be called with code that comes from the user. Only use this function to evaluate hardcoded strings. :param str template: Unevaluated f-string (i.e. a normal string containing curly braces without the ``f`` in front of the quote) :param dict variables: :class:`dict` instance that maps names that can be used in `template` in curly braces to objects that replace the curly braced name. Example: >>> template = 'Evaluate {x}: {sum((1, 2, 3)) * 2}' >>> evaluate_fstring(template, x='this') Evaluate this: 12 >>> evaluate_fstring(template, x='that') Evaluate that: 12 References: https://pypi.org/project/f-yeah/ https://stackoverflow.com/a/42497694 """ fstring = 'f' + repr(template) return eval(fstring, variables)
[docs] class CaseInsensitiveString(str): """:class:`str` that ignores case when compared or sorted""" def __hash__(self): return hash(self.casefold()) def __eq__(self, other): if not isinstance(other, str): return NotImplemented else: return self.casefold() == other.casefold() def __ne__(self, other): return not self.__eq__(other) def __lt__(self, other): return self.casefold() < other.casefold() def __le__(self, other): return self.casefold() <= other.casefold() def __gt__(self, other): return self.casefold() > other.casefold() def __ge__(self, other): return self.casefold() >= other.casefold()
[docs] def get_youtube_id(text): """ Return YouTube video ID or `None` The matching is very lenient and does not require an URL. Anything that reasonably looks like it could be a YouTube ID will match, e.g. "yt/_d34d-_B33F". If `text` is an arbitrary string, make sure it's an URL first. """ if match := re.search(r'\b(?i:youtube|youtu\.be|yt)\b.*?(?:v=|/)([a-zA-Z0-9_-]{9,})', text): return match.group(1)
[docs] def get_vimeo_id(text): """Same as :func:`get_youtube_id` but for Vimeo""" if match := re.search(r'\b(?i:vimeo)\b.*/(\d+)', text): return match.group(1)