"""
This module provides some commonly used processors for Item Loaders.
See documentation in docs/topics/loaders.rst
"""
from collections import ChainMap
from scrapy.utils.misc import arg_to_iter
from scrapy.loader.common import wrap_loader_context
[文档]class MapCompose:
def __init__(self, *functions, **default_loader_context):
self.functions = functions
self.default_loader_context = default_loader_context
def __call__(self, value, loader_context=None):
values = arg_to_iter(value)
if loader_context:
context = ChainMap(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
next_values = []
for v in values:
try:
next_values += arg_to_iter(func(v))
except Exception as e:
raise ValueError("Error in MapCompose with "
"%s value=%r error='%s: %s'" %
(str(func), value, type(e).__name__,
str(e)))
values = next_values
return values
[文档]class Compose:
def __init__(self, *functions, **default_loader_context):
self.functions = functions
self.stop_on_none = default_loader_context.get('stop_on_none', True)
self.default_loader_context = default_loader_context
def __call__(self, value, loader_context=None):
if loader_context:
context = ChainMap(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
if value is None and self.stop_on_none:
break
try:
value = func(value)
except Exception as e:
raise ValueError("Error in Compose with "
"%s value=%r error='%s: %s'" %
(str(func), value, type(e).__name__, str(e)))
return value
[文档]class TakeFirst:
def __call__(self, values):
for value in values:
if value is not None and value != '':
return value
[文档]class Identity:
def __call__(self, values):
return values
[文档]class SelectJmes:
"""
Query the input string for the jmespath (given at instantiation),
and return the answer
Requires : jmespath(https://github.com/jmespath/jmespath)
Note: SelectJmes accepts only one input element at a time.
"""
def __init__(self, json_path):
self.json_path = json_path
import jmespath
self.compiled_path = jmespath.compile(self.json_path)
def __call__(self, value):
"""Query value for the jmespath query and return answer
:param value: a data structure (dict, list) to extract from
:return: Element extracted according to jmespath query
"""
return self.compiled_path.search(value)
[文档]class Join:
def __init__(self, separator=u' '):
self.separator = separator
def __call__(self, values):
return self.separator.join(values)