Source code for upsies.utils.string

"""
String formatting and parsing
"""

import os
import re
import sys

from .. import errors, utils

import logging  # isort:skip
_log = logging.getLogger(__name__)



[docs]
def normalize_comma_separated_list(text, *, lower=False, sort=False):
    """
    Split and re-join `text` at ``,``

    :param bool lower: Whether `text` is lower-cased
    :param bool sort: Whether items are sorted
    """
    if lower:
        text = text.lower()
    # Split at ",".
    lst = (item.strip() for item in text.split(','))
    # Remove empty items.
    lst = (item for item in lst if item)
    # Deduplicate items while keeping order.
    lst = dict.fromkeys(lst)
    if sort:
        lst = sorted(lst)
    return ', '.join(item for item in lst)



_max_nfo_size = 1048576


[docs]
def read_nfo(path, *, strip=False):
    """
    Recursively search directory for ``*.nfo`` files and read the first one found

    `path` may also be an nfo file.

    The nfo file is decoded with :func:`decode_nfo`.

    Files larger than 1 MiB are ignored.

    If no ``*.nfo`` file is found, return `None`.

    :param bool strip: See :func:`decode_nfo`

    :raise ContentError: if the nfo file is not readable
    """
    if os.path.isdir(path):
        # Look only for *.nfo files.
        extensions = ('nfo', 'NFO')
    else:
        # `path` is a file. Ignore its extension.
        extensions = ()

    # Try all *.nfo files before raising an exception. We don't want to raise on the first file if
    # the second file is perfectly fine.
    failures = []
    for nfo_filepath in utils.fs.file_list(path, extensions=extensions):
        try:
            if os.path.getsize(nfo_filepath) <= _max_nfo_size:
                with open(nfo_filepath, 'rb') as f:
                    return decode_nfo(f.read(), strip=strip)
        except OSError as e:
            failures.append((
                nfo_filepath,
                e.strerror or str(e),
            ))

    if failures:
        nfo_filepath, msg = failures[0]
        raise errors.ContentError(f'Failed to read nfo: {nfo_filepath}: {msg}')




[docs]
def decode_nfo(bytes, *, strip=False):
    r"""
    Return decoded `bytes`

    Try to decode as UTF-8 first. If that fails, decode as CP437 and replace invalid characters with
    "�" (U+FFFD).

    All line breaks (e.g. CR+LF) are converted to "\n".

    :param bool strip: Whether to remove whitespace at the beginning and end

        .. note:: To preserve ASCII art, spaces at the beginning of the first
                  non-empty line are kept.
    """
    try:
        text = bytes.decode('utf8', 'strict')
    except UnicodeDecodeError:
        text = bytes.decode('cp437', 'replace')

    # Replace all line breaks (e.g. CR+LF) with "\n".
    # NOTE: str.splitlines() does not preserve trailing line breaks.
    for linebreak in ('\r\n', '\r'):
        text = text.replace(linebreak, '\n')

    if strip:
        # Remove any whitespace at the end.
        text = text.rstrip()
        # Remove empty lines at the beginning while keeping any leading spaces
        # on the first non-empty line to preserve ASCII art.
        text = re.sub(r'^\s*?\n(\s*)', r'\1', text)

    return text



_capitalize_regex = re.compile(r'(\s*)(\S+)(\s*)')


[docs]
def capitalize(text):
    """
    Capitalize each word in `text`

    Unlike :meth:`str.title`, only words at in front of a space or at the
    beginning of `text` are capitalized.
    """
    return ''.join(
        match.group(1) + match.group(2).capitalize() + match.group(3)
        for match in re.finditer(_capitalize_regex, text)
    )




[docs]
def star_rating(rating, max_rating=10):
    """
    Return star rating string with the characters "★" (U+2605), "⯪" (U+2BEA) and
    "☆" (U+2605)

    :param float,int rating: Number between 0 and `max_rating`
    :param float,int max_rating: Maximum rating
    """
    import math
    rating = min(max_rating, max(0, rating))
    left = '\u2605' * math.floor(rating)
    if rating >= max_rating:
        middle = ''
    # Avoid floating point precision issues by rounding to 1 digit after comma
    elif round(rating % 1, 1) <= 0.3:
        middle = '\u2606'  # Empty star
    elif round(rating % 1, 1) < 0.7:
        middle = '\u2bea'  # Half star
    else:
        middle = '\u2605'  # Full star
    right = '\u2606' * (math.ceil(max_rating - rating) - 1)
    return f'{left}{middle}{right}'



if sys.version_info >= (3, 9, 0):
    def remove_prefix(string, prefix):
        return string.removeprefix(prefix)

    def remove_suffix(string, suffix):
        return string.removesuffix(suffix)

else:

[docs]
    def remove_prefix(string, prefix):
        if string.startswith(prefix):
            return string[len(prefix):]
        else:
            return string



[docs]
    def remove_suffix(string, suffix):
        if string.endswith(suffix):
            return string[:-len(suffix)]
        else:
            return string




[docs]
def evaluate_fstring(template, **variables):
    """
    Deferred f-string evaluation

    Unlike ``"Bar: {foo}".format(foo="bar")``, this function will also evaluate function calls
    inside the curly braces.

    .. warning:: Because `template` is passed to :func:`eval`, this function
        must NEVER be called with code that comes from the user. Only use this
        function to evaluate hardcoded strings.

    :param str template: Unevaluated f-string (i.e. a normal string containing
        curly braces without the ``f`` in front of the quote)
    :param dict variables: :class:`dict` instance that maps names that can be
        used in `template` in curly braces to objects that replace the curly
        braced name.

    Example:

    >>> template = 'Evaluate {x}: {sum((1, 2, 3)) * 2}'
    >>> evaluate_fstring(template, x='this')
    Evaluate this: 12
    >>> evaluate_fstring(template, x='that')
    Evaluate that: 12

    References:

        https://pypi.org/project/f-yeah/
        https://stackoverflow.com/a/42497694
    """
    fstring = 'f' + repr(template)
    return eval(fstring, variables)




[docs]
class CaseInsensitiveString(str):
    """:class:`str` that ignores case when compared or sorted"""

    def __hash__(self):
        return hash(self.casefold())

    def __eq__(self, other):
        if not isinstance(other, str):
            return NotImplemented
        else:
            return self.casefold() == other.casefold()

    def __ne__(self, other):
        return not self.__eq__(other)

    def __lt__(self, other):
        return self.casefold() < other.casefold()

    def __le__(self, other):
        return self.casefold() <= other.casefold()

    def __gt__(self, other):
        return self.casefold() > other.casefold()

    def __ge__(self, other):
        return self.casefold() >= other.casefold()




[docs]
def get_youtube_id(text):
    """
    Return YouTube video ID or `None`

    The matching is very lenient and does not require an URL. Anything that reasonably looks like it
    could be a YouTube ID will match, e.g. "yt/_d34d-_B33F". If `text` is an arbitrary string,
    make sure it's an URL first.
    """
    if match := re.search(r'\b(?i:youtube|youtu\.be|yt)\b.*?(?:v=|/)([a-zA-Z0-9_-]{9,})', text):
        return match.group(1)




[docs]
def get_vimeo_id(text):
    """Same as :func:`get_youtube_id` but for Vimeo"""
    if match := re.search(r'\b(?i:vimeo)\b.*/(\d+)', text):
        return match.group(1)