"""
Abstract base class for online databases
"""
import abc
import asyncio
import copy
import re
import string
import unidecode
from .. import country, http
from .common import Query
import logging # isort:skip
_log = logging.getLogger(__name__)
[docs]
class WebDbApiBase(abc.ABC):
"""
Base class for all web DB APIs
Not all DBs provide all information. Methods that take an `id` argument may
return an empty string, an empty tuple or `None`.
"""
def __init__(self, config=None):
self._config = copy.deepcopy(self.default_config)
if config is not None:
self._config.update(config.items())
@property
@abc.abstractmethod
def name(self):
"""Unique name of this DB"""
@property
@abc.abstractmethod
def label(self):
"""User-facing name of this DB"""
@property
def no_results_info(self):
"""
Hints for the user to find something
This should be displayed if there are no search results.
"""
return ''
@property
def config(self):
"""
User configuration
This is a deep copy of :attr:`default_config` that is updated with the
`config` argument from initialization.
"""
return self._config
@property
@abc.abstractmethod
def default_config(self):
"""Default user configuration as a dictionary"""
[docs]
def sanitize_query(self, query):
"""
Modify :class:`.Query` for specific DB
If :meth:`get_id_from_text` finds an ID in :attr:`.Query.title`,
:attr:`.Query.id` is set to that ID, which means all other query
parameters are ignored.
"""
if not isinstance(query, Query):
raise TypeError(f'Not a Query instance: {query!r}')
else:
id_from_url = self.get_id_from_text(query.title)
if id_from_url:
query.id = id_from_url
return query
[docs]
@abc.abstractmethod
def get_id_from_text(self, text):
"""Return ID found in `text` or `None`"""
[docs]
@abc.abstractmethod
async def search(self, query):
"""
Search DB
:param query: :class:`~.common.Query` instance
:return: List of :class:`~.common.SearchResult` instances
"""
[docs]
@abc.abstractmethod
async def cast(self, id):
"""Return list of cast names"""
[docs]
async def countries(self, id):
"""Return list of country names"""
countries = await self._countries(id)
return country.name(countries)
@abc.abstractmethod
async def _countries(self, id):
pass
[docs]
@abc.abstractmethod
async def languages(self, id):
"""Return list of spoken languages"""
[docs]
@abc.abstractmethod
async def creators(self, id):
"""Return list of creator names (usually empty for movies and episodes)"""
[docs]
@abc.abstractmethod
async def directors(self, id):
"""Return list of director names (usually empty for series)"""
[docs]
@abc.abstractmethod
async def genres(self, id):
"""Return list of genres"""
[docs]
@abc.abstractmethod
async def poster_url(self, id, season=None):
"""
Return URL of poster image or empty string
:param season: Return poster for specific season
If this is not supported by the concrete implementation, default to
the URL for the main poster.
"""
[docs]
async def poster(self, id, season=None):
"""
Return poster image as binary data or `None`
:param season: Return poster for specific season
If this is not supported by the concrete implementation, default to
the main poster.
"""
poster_url = await self.poster_url(id, season=season)
if poster_url:
response = await http.get(
poster_url,
user_agent='BROWSER',
cache=True,
)
return response.bytes
return None
[docs]
@abc.abstractmethod
async def rating(self, id):
"""Return rating as a number or `None`"""
@property
@abc.abstractmethod
async def rating_min(self):
"""Minimum :meth:`rating` value"""
@property
@abc.abstractmethod
async def rating_max(self):
"""Maximum :meth:`rating` value"""
[docs]
async def runtimes(self, id):
"""
Return mapping of runtimes
Keys are descriptive strings (e.g. "Director's Cut", "Ultimate Cut",
etc) and values are the runtime in minutes (:class:int).
The key of the default cut is ``default``.
"""
runtimes = {}
for key, runtime in (await self._runtimes(id)).items():
country_name = country.name(key)
if country_name not in runtimes and runtime not in runtimes.values():
runtimes[country_name] = runtime
return runtimes
@abc.abstractmethod
async def _runtimes(self, id):
pass
[docs]
@abc.abstractmethod
async def summary(self, id):
"""Return short plot description"""
@abc.abstractmethod
async def _title_original(self, id):
"""Return original title"""
@abc.abstractmethod
async def _titles_english(self, id):
"""
Return sequence of English titles (e.g. for different
English-speaking countries)
:meth:`title_english` picks one that is not too similar to the original
title.
"""
[docs]
async def title_original(self, id):
"""
Return original title
See also :meth:`title_english`.
"""
if id:
return await self._title_original(id)
return ''
[docs]
async def title_english(self, id, *, default_to_original=False):
"""
Return English title (AKA) or empty string
If the English title is too similar to the original title, return an
empty string.
Titles are considered too similar if they are equal or either one
contains the other after normalization.
Titles are normalized by casefolding, removing whitespace, translating
roman numerals to arabic, etc.
For example, if the original title is "Föö & Bár II" and the English
title is "The Foo and Bar 2", the titles are too similar.
:param default_to_original: Instead of defaulting to an empty string if
no appropriate English title is found, default to the original title
"""
if id:
english_titles = await self._titles_english(id)
original_title = await self._title_original(id)
for english_title in english_titles:
# Don't return English title if it is similar to original title
# (e.g. english_title="The Foo", original_title="Föó")
if not self._titles_are_similar(english_title, original_title):
# _log.debug('Using English title: %r', english_title)
return english_title
# else:
# _log.debug('English title is too similar to original: %r == %r', english_title, original_title)
# Return the first English title if we can't return empty string
if default_to_original:
# _log.debug('Defaulting to original title: %r', original_title)
return original_title
return ''
def _titles_are_similar(self, a, b):
"""Whether normalized `a` contains normalized `b` or vice versa"""
an = self._normalize_title(a)
bn = self._normalize_title(b)
return an and bn and (an in bn or bn in an)
def _normalize_title(self, title):
"""Return casefolded `title` normalized punctuation, whitespace, etc"""
# Replace special characters with similar ASCII
title = title.replace('&', 'and')
title = unidecode.unidecode(title)
# Remove all punctuation
title = re.sub(rf'[{string.punctuation}]+', '', title)
# Deduplicate whitespace into spaces (U+0020)
title = ' '.join(title.split())
# Remove difference between arabic and roman numbers
def normalize_part(match):
num = match.group(1)
if num.isdigit():
num_arabic = num
num_roman = int(num) * 'I'
else:
num_arabic = len(num)
num_roman = num
return f'{num_arabic}/{num_roman}'
title = re.sub(r'\b((?i:I+|\d+))\b', normalize_part, title)
# Remove all whitespace
title = ''.join(title.split())
# Case-insensitivize
title = title.casefold()
return title
[docs]
@abc.abstractmethod
async def type(self, id):
"""Return :class:`~.types.ReleaseType`"""
[docs]
@abc.abstractmethod
async def url(self, id):
"""Return URL for `id`"""
[docs]
@abc.abstractmethod
async def year(self, id):
"""Return release year or empty string"""
[docs]
async def gather(self, id, *methods):
"""
Fetch information concurrently
:param id: Valid ID for this DB
:param methods: Names of coroutine methods of this class
:type methods: sequence of :class:`str`
:return: Dictionary that maps `methods` to return values
"""
corofuncs = (getattr(self, method) for method in methods)
awaitables = (corofunc(id) for corofunc in corofuncs)
results = await asyncio.gather(*awaitables)
dct = {'id': id}
# "The order of result values corresponds to the order of awaitables in `awaitables`."
# https://docs.python.org/3/library/asyncio-task.html#running-tasks-concurrently
dct.update((method, result) for method, result in zip(methods, results))
return dct