aleph.crawlers.crawler

Crawler

class aleph.crawlers.crawler.Crawler

Imports

  • object
get_id(cls)
__init__(self)
__repr__(self)
collection(self)
crawl(self)
create_document(self)
execute(self)
increment_count(self)
load_collection(self, data)
make_meta(self)
save_data(self, data)

Store a lump object of data to a temporary file.

save_response(self, res)

Store the return data from a requests response to a file.

skip_incremental(self, foreign_id)
to_dict(self)

CrawlerException

class aleph.crawlers.crawler.CrawlerException

Imports

  • exceptions.Exception

CrawlerMetadata

class aleph.crawlers.crawler.CrawlerMetadata

Imports

  • <UNKNOWN>
__init__(self, data)

DocumentCrawler

class aleph.crawlers.crawler.DocumentCrawler

Imports

  • aleph.crawlers.crawler.Crawler
emit_file(self, document, file_path)
emit_url(self, document, url)
execute(self)

RunLimitException

class aleph.crawlers.crawler.RunLimitException

Imports

  • aleph.crawlers.crawler.CrawlerException