aleph.crawlers.crawler

Crawler

class aleph.crawlers.crawler.Crawler

Imports

  • object
get_id(cls)
__init__(self)
__repr__(self)
collection(self)
crawl(self)
execute(self)
increment_count(self)
load_collection(self, data)
make_meta(self)
save_data(self, data)

Store a lump object of data to a temporary file.

save_response(self, res)

Store the return data from a requests response to a file.

skip_incremental(self, foreign_id)
to_dict(self)

CrawlerException

class aleph.crawlers.crawler.CrawlerException

Imports

  • exceptions.Exception

DocumentCrawler

class aleph.crawlers.crawler.DocumentCrawler

Imports

  • aleph.crawlers.crawler.Crawler
emit_file(self, meta, file_path)
emit_url(self, meta, url)
execute(self)

EntityCrawler

class aleph.crawlers.crawler.EntityCrawler

Imports

  • aleph.crawlers.crawler.Crawler
emit_entity(self, collection, data)

RunLimitException

class aleph.crawlers.crawler.RunLimitException

Imports

  • aleph.crawlers.crawler.CrawlerException