# -*- coding: utf-8 -*-
"""
Burst filtering class and methods
"""
import re
import string
import hashlib
from urllib import unquote
from parser.HTMLParser import HTMLParser
from elementum.provider import log, get_setting
from providers.definitions import definitions
from providers.helpers import t411season, t411episode
from utils import Magnet, get_int, get_float, clean_number, size_int, get_alias
try:
from collections import OrderedDict
except:
from ordereddict import OrderedDict
[docs]class Filtering:
"""
Filtering class
Attributes:
resolutions (OrderedDict): Ordered dictionary of resolution filters to be used depending on settings
resolutions_allow (list): List of resolutions to allow in search results
release_types (dict): Dictionary of release types to be used depending on settings
releases_allow (list): List of release types to allow in search results
releases_deny (list): List of release types to deny in search results
require_keywords (list): List of keywords to require in search results
min_size (float): Minimum required size
max_size (float): Maximum possible size
filter_title (bool): Whether or not this provider needs titles to be double-checked,
typically used for providers that return too many results from their search
engine when no results are found (ie. TorLock and TorrentZ)
queries (list): List of queries to be filtered
extras (list): List of extras to be filtered
info (dict): Payload from Elementum
kodi_language (str): Language code from Kodi if kodi_language setting is enabled
language_exceptions (list): List of providers for which not to apply ``kodi_language`` setting
url (str): URL of this filtering request
get_data (dict): GET data for client request
post_data (dict): POST data for client request
title (str): Result title to be used when matching with ``filter_title`` enabled
reason (str): Rejection reason when result does not match
results (list): Filtered, accepted results
"""
def __init__(self):
resolutions = OrderedDict()
resolutions['filter_240p'] = ['240p', '_tvrip_', 'satrip', 'vhsrip']
resolutions['filter_480p'] = ['480p', 'xvid', 'dvd', 'dvdrip', 'hdtv']
resolutions['filter_720p'] = ['720p', 'hdrip', 'bluray', 'brrip', 'bdrip']
resolutions['filter_1080p'] = ['1080p', 'fullhd', '_fhd_']
resolutions['filter_2k'] = ['_2k_', '1440p']
resolutions['filter_4k'] = ['_4k_', '2160p']
self.resolutions = resolutions
self.release_types = {
'filter_brrip': ['brrip', 'bdrip', 'bluray'],
'filter_webdl': ['webdl', 'webrip', 'web_dl', 'dlrip', '_yts_'],
'filter_hdrip': ['hdrip'],
'filter_hdtv': ['hdtv'],
'filter_dvd': ['_dvd_', 'dvdrip'],
'filter_dvdscr': ['dvdscr'],
'filter_screener': ['screener', '_scr_'],
'filter_3d': ['_3d_'],
'filter_telesync': ['telesync', '_ts_', '_tc_'],
'filter_cam': ['_cam_', 'hdcam'],
'filter_tvrip': ['_tvrip_', 'satrip'],
'filter_vhsrip': ['vhsrip'],
'filter_trailer': ['trailer'],
'filter_workprint': ['workprint']
}
require = []
resolutions_allow = []
releases_allow = []
releases_deny = []
for resolution in self.resolutions:
if get_setting(resolution, bool):
resolutions_allow.append(resolution)
# Add enabled resolutions to allowed release types to match
# previous versions' behavior with certain providers
# with no release types in torrent names, ie. YTS
releases_allow.extend(self.resolutions[resolution])
self.resolutions_allow = resolutions_allow
# Skip resolution filtering if we're allowing all of them anyway
self.filter_resolutions = True
if len(self.resolutions_allow) == len(self.resolutions):
self.filter_resolutions = False
for release_type in self.release_types:
if get_setting(release_type, bool):
releases_allow.extend(self.release_types[release_type])
else:
releases_deny.extend(self.release_types[release_type])
self.releases_allow = releases_allow
self.releases_deny = releases_deny
if get_setting('additional_filters', bool):
accept = get_setting('accept').strip().lower()
if accept:
accept = re.split(r',\s?', accept)
releases_allow.extend(accept)
block = get_setting('block').strip().lower()
if block:
block = re.split(r',\s?', block)
releases_deny.extend(block)
require = get_setting('require').strip().lower()
if require:
require = re.split(r',\s?', require)
self.require_keywords = require
self.min_size = get_float(get_setting('min_size'))
self.max_size = get_float(get_setting('max_size'))
self.check_sizes()
self.filter_title = False
self.queries = []
self.extras = []
self.info = dict(title="", titles=[])
self.kodi_language = ''
self.language_exceptions = []
self.get_data = {}
self.post_data = {}
self.url = ''
self.title = ''
self.reason = ''
self.results = []
[docs] def use_general(self, provider, payload):
""" Setup method to define general search parameters
Args:
provider (str): Provider ID
payload (dict): Elementum search payload
"""
definition = definitions[provider]
definition = get_alias(definition, get_setting("%s_alias" % provider))
general_query = definition['general_query'] if definition['general_query'] else ''
log.debug("General URL: %s%s" % (definition['base_url'], general_query))
self.info = payload
self.url = u"%s%s" % (definition['base_url'], general_query)
if definition['general_keywords']:
self.queries = [definition['general_keywords']]
self.extras = [definition['general_extra']]
[docs] def use_movie(self, provider, payload):
""" Setup method to define movie search parameters
Args:
provider (str): Provider ID
payload (dict): Elementum search payload
"""
definition = definitions[provider]
definition = get_alias(definition, get_setting("%s_alias" % provider))
movie_query = definition['movie_query'] if definition['movie_query'] else ''
log.debug("Movies URL: %s%s" % (definition['base_url'], movie_query))
if get_setting('separate_sizes', bool):
self.min_size = get_float(get_setting('min_size_movies'))
self.max_size = get_float(get_setting('max_size_movies'))
self.check_sizes()
self.info = payload
self.url = u"%s%s" % (definition['base_url'], movie_query)
if definition['movie_keywords']:
self.queries = ["%s" % definition['movie_keywords']]
self.extras = ["%s" % definition['movie_extra']]
[docs] def use_episode(self, provider, payload):
""" Setup method to define episode search parameters
Args:
provider (str): Provider ID
payload (dict): Elementum search payload
"""
definition = definitions[provider]
definition = get_alias(definition, get_setting("%s_alias" % provider))
show_query = definition['show_query'] if definition['show_query'] else ''
log.debug("Episode URL: %s%s" % (definition['base_url'], show_query))
if get_setting('separate_sizes', bool):
self.min_size = get_float(get_setting('min_size_episodes'))
self.max_size = get_float(get_setting('max_size_episodes'))
self.check_sizes()
self.info = payload
self.url = u"%s%s" % (definition['base_url'], show_query)
if definition['tv_keywords']:
self.queries = ["%s" % definition['tv_keywords']]
self.extras = ["%s" % definition['tv_extra'] if definition['tv_extra'] else '']
# TODO this sucks, tv_keywords should be a list from the start..
if definition['tv_keywords2']:
self.queries.append(definition['tv_keywords2'])
self.extras.append(definition['tv_extra2'] if definition['tv_extra2'] else '')
[docs] def use_season(self, provider, info):
""" Setup method to define season search parameters
Args:
provider (str): Provider ID
payload (dict): Elementum search payload
"""
definition = definitions[provider]
definition = get_alias(definition, get_setting("%s_alias" % provider))
season_query = definition['season_query'] if definition['season_query'] else ''
log.debug("Season URL: %s%s" % (definition['base_url'], season_query))
if get_setting('separate_sizes', bool):
self.min_size = get_float(get_setting('min_size_seasons'))
self.max_size = get_float(get_setting('max_size_seasons'))
self.check_sizes()
self.info = info
self.url = u"%s%s" % (definition['base_url'], season_query)
if definition['season_keywords']:
self.queries = ["%s" % definition['season_keywords']]
self.extras = ["%s" % definition['season_extra'] if definition['season_extra'] else '']
if definition['season_keywords2']:
self.queries.append("%s" % definition['season_keywords2'])
self.extras.append("%s" % definition['season_extra2'] if definition['season_extra2'] else '')
[docs] def use_anime(self, provider, info):
""" Setup method to define anime search parameters
Args:
provider (str): Provider ID
payload (dict): Elementum search payload
"""
definition = definitions[provider]
definition = get_alias(definition, get_setting("%s_alias" % provider))
anime_query = definition['anime_query'] if definition['anime_query'] else ''
log.debug("Anime URL: %s%s" % (definition['base_url'], anime_query))
if get_setting('separate_sizes', bool):
self.min_size = get_float(get_setting('min_size_episodes'))
self.max_size = get_float(get_setting('max_size_episodes'))
self.check_sizes()
self.info = info
self.url = u"%s%s" % (definition['base_url'], anime_query)
if self.info['absolute_number']:
self.info['episode'] = self.info['absolute_number']
if definition['anime_keywords']:
self.queries = ["%s" % definition['anime_keywords']]
self.extras = ["%s" % definition['anime_extra'] if definition['anime_extra'] else '']
[docs] def check_sizes(self):
""" Internal method to make sure size range settings are valid
"""
if self.min_size > self.max_size:
log.warning("Minimum size above maximum, using max size minus 1 GB")
self.min_size = self.max_size - 1
[docs] def read_keywords(self, keywords):
""" Create list from keywords where the values are marked between curly brackets, ie. {title}
Args:
keywords (str): String with all the keywords, ie. '{title} {year} movie'
Returns:
list: List of keywords, ie. ['{title}', '{year}']
"""
results = []
if keywords:
for value in re.findall('{(.*?)}', keywords):
results.append(value)
return results
[docs] def process_keywords(self, provider, text):
""" Processes the query payload from a provider's keyword definitions
Args:
provider (str): Provider ID
text (str): Keyword placeholders from definitions, ie. {title}
Returns:
str: Processed query keywords
"""
keywords = self.read_keywords(text)
for keyword in keywords:
keyword = keyword.lower()
if 'title' in keyword:
title = self.info["title"]
language = definitions[provider]['language']
use_language = None
if ':' in keyword:
use_language = keyword.split(':')[1]
if provider not in self.language_exceptions and \
(use_language or self.kodi_language) and \
'titles' in self.info and self.info['titles']:
try:
if self.kodi_language and self.kodi_language in self.info['titles']:
use_language = self.kodi_language
if use_language not in self.info['titles']:
use_language = language
if use_language in self.info['titles'] and self.info['titles'][use_language]:
title = self.info['titles'][use_language]
title = self.normalize_name(title)
log.info("[%s] Using translated '%s' title %s" % (provider, use_language,
repr(title)))
log.debug("[%s] Translated titles from Elementum: %s" % (provider,
repr(self.info['titles'])))
except Exception as e:
import traceback
log.error("%s failed with: %s" % (provider, repr(e)))
map(log.debug, traceback.format_exc().split("\n"))
text = text.replace('{%s}' % keyword, title)
if 'year' in keyword:
text = text.replace('{%s}' % keyword, str(self.info["year"]))
if 'season' in keyword:
if '+' in keyword:
keys = keyword.split('+')
if keys[1] == "t411season":
season = str(t411season(self.info['season']))
else:
season = str(self.info["season"] + get_int(keys[1]))
elif ':' in keyword:
keys = keyword.split(':')
season = ('%%.%sd' % keys[1]) % self.info["season"]
else:
season = '%s' % self.info["season"]
text = text.replace('{%s}' % keyword, season)
if 'episode' in keyword:
if '+' in keyword:
keys = keyword.split('+')
if keys[1] == "t411episode":
episode = str(t411episode(self.info['episode']))
else:
episode = str(self.info["episode"] + get_int(keys[1]))
elif ':' in keyword:
keys = keyword.split(':')
episode = ('%%.%sd' % keys[1]) % self.info["episode"]
else:
episode = '%s' % self.info["episode"]
text = text.replace('{%s}' % keyword, episode)
return text
[docs] def verify(self, provider, name, size):
""" Main filtering method to match torrent names, resolutions, release types and size filters
Args:
provider (str): Provider ID
name (str): Torrent name
size (str): Arbitrary torrent size to be parsed
Returns:
bool: ``True`` if torrent name passed filtering, ``False`` otherwise.
"""
if not name:
self.reason = '[%s] %s' % (provider, '*** Empty name ***')
return False
name = self.exception(name)
name = self.normalize_name(name)
if self.filter_title and self.title:
self.title = self.normalize_name(self.title)
self.reason = "[%s] %70s ***" % (provider, name)
if self.filter_resolutions:
resolution = self.determine_resolution(name)
if resolution not in self.resolutions_allow:
self.reason += " Resolution not allowed"
return False
if self.filter_title:
if not all(map(lambda match: match in name, re.split(r'\s', self.title))):
self.reason += " Name mismatch"
return False
if self.require_keywords:
for required in self.require_keywords:
if not self.included(name, keys=[required]):
self.reason += " Missing required keyword"
return False
if not self.included(name, keys=self.releases_allow):
self.reason += " Missing release type keyword"
return False
if self.included(name, keys=self.releases_deny):
self.reason += " Blocked by release type keyword"
return False
if size and not self.in_size_range(size):
self.reason += " Size out of range"
return False
return True
[docs] def in_size_range(self, size):
""" Compares size ranges
Args:
size (str): File size string, ie. ``1.21 GB``
Returns:
bool: ``True`` if file size is within desired range, ``False`` otherwise
"""
res = False
value = size_int(clean_number(size))
min_size = self.min_size * 1e9
max_size = self.max_size * 1e9
if min_size <= value <= max_size:
res = True
return res
[docs] def determine_resolution(self, name):
""" Determine torrent resolution from defined filters. Defaults to ``filter_480p``.
Args:
name (str): Name of the torrent to determine the resolution for
Returns:
str: The filter key of the determined resolution, see self.resolutions
"""
res = 'filter_480p' # Default to 480p
for resolution in self.resolutions:
if self.included(name, keys=self.resolutions[resolution], strict=True):
res = resolution
return res
[docs] def normalize_name(self, value):
""" Method to normalize strings
Replaces punctuation with spaces, unquotes and unescapes HTML characters.
Args:
value (str): File name or directory string to convert
Returns:
str: Converted file name or directory string
"""
value = unquote(value)
value = self.unescape(value)
value = value.lower()
for p in string.punctuation:
value = value.replace(p, ' ')
value = ' '.join(value.split())
return value
[docs] def included(self, value, keys, strict=False):
""" Check if the keys are present in the string
Args:
value (str): Name of the torrent to check
keys (list): List of strings that must be included in ``value``
strict (bool): Boolean flag to accept or not partial results
Returns:
bool: True if any (or all if ``strict``) keys are included, False otherwise.
"""
value = ' ' + value + ' '
if '*' in keys:
res = True
else:
res1 = []
for key in keys:
res2 = []
for item in re.split(r'\s', key):
item = item.replace('_', ' ')
if strict:
item = ' ' + item + ' '
if item in value:
res2.append(True)
else:
res2.append(False)
res1.append(all(res2))
res = any(res1)
return res
[docs] def unescape(self, name):
""" Unescapes all HTML entities from a string using
HTMLParser().unescape()
Args:
name (str): String to convert
Returns:
str: Converted string
"""
name = name.replace('<![CDATA[', '').replace(']]', '')
name = HTMLParser().unescape(name.lower())
return name
[docs] def exception(self, title=None):
""" Change the title to the standard name in torrent sites
Args:
title (str): Title to check
Returns:
str: Standard title
"""
if title:
title = title.lower()
title = title.replace('csi crime scene investigation', 'CSI')
title = title.replace('law and order special victims unit', 'law and order svu')
title = title.replace('law order special victims unit', 'law and order svu')
title = title.replace('S H I E L D', 'SHIELD')
return title
[docs]def apply_filters(results_list):
""" Applies final result de-duplicating, hashing and sorting
Args:
results_list (list): Formatted results in any order
Returns:
list: Filtered and sorted results
"""
results_list = cleanup_results(results_list)
log.debug("Filtered results: %s" % repr(results_list))
return results_list
[docs]def cleanup_results(results_list):
""" Remove duplicate results, hash results without an info_hash, and sort by seeders
Args:
results_list (list): Results to clean-up
Returns:
list: De-duplicated, hashed and sorted results
"""
if len(results_list) == 0:
return []
hashes = []
filtered_list = []
for result in results_list:
if not result['seeds']:
continue
if not result['uri']:
if not result['name']:
continue
try:
log.warning('[%s] No URI for %s' % (result['provider'][16:-8], repr(result['name'])))
except Exception as e:
import traceback
log.warning("%s logging failed with: %s" % (result['provider'], repr(e)))
map(log.debug, traceback.format_exc().split("\n"))
continue
hash_ = result['info_hash'].upper()
if not hash_:
if result['uri'] and result['uri'].startswith('magnet'):
hash_ = Magnet(result['uri']).info_hash.upper()
else:
hash_ = hashlib.md5(result['uri']).hexdigest()
try:
log.debug("[%s] Hash for %s: %s" % (result['provider'][16:-8], repr(result['name']), hash_))
except Exception as e:
import traceback
log.warning("%s logging failed with: %s" % (result['provider'], repr(e)))
map(log.debug, traceback.format_exc().split("\n"))
if not any(existing == hash_ for existing in hashes):
filtered_list.append(result)
hashes.append(hash_)
return sorted(filtered_list, key=lambda r: (get_int(r['seeds'])), reverse=True)