scrapingant-client
is the official library to access ScrapingAnt API from your Python applications. It provides useful features like parameters encoding to improve the ScrapingAnt usage experience. Requires python 3.6+.
from scrapingant_client import ScrapingAntClient client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') # Scrape the example.com site result = client.general_request('https://example.com') print(result.content)
pip install scrapingant-client
If you need async support:
pip install scrapingant-client[async]
In order to get API token you'll need to register at ScrapingAnt Service
All public classes, methods and their parameters can be inspected in this API reference.
Main class of this library.
https://docs.scrapingant.com/request-response-format#available-parameters
Param Type Default urlstring
method string
GET cookies List[Cookie]
None headers List[Dict[str, str]]
None js_snippet string
None proxy_type ProxyType
datacenter proxy_country str
None wait_for_selector str
None browser boolean
True return_page_source boolean
False data same as requests param 'data' None json same as requests param 'json' None
IMPORTANT NOTE: js_snippet
will be encoded to Base64 automatically by the ScrapingAnt client library.
Class defining cookie. Currently it supports only name and value
Param Type namestring
value string
Class defining response from API.
Param Type contentstring
cookies List[Cookie]
status_code int
text string
ScrapingantClientException
is base Exception class, used for all errors.
from scrapingant_client import ScrapingAntClient from scrapingant_client import Cookie client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') result = client.general_request( 'https://httpbin.org/cookies', cookies=[ Cookie(name='cookieName1', value='cookieVal1'), Cookie(name='cookieName2', value='cookieVal2'), ] ) print(result.content) # Response cookies is a list of Cookie objects # They can be used in next requests response_cookies = result.cookiesExecuting custom JS snippet
from scrapingant_client import ScrapingAntClient client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') customJsSnippet = """ var str = 'Hello, world!'; var htmlElement = document.getElementsByTagName('html')[0]; htmlElement.innerHTML = str; """ result = client.general_request( 'https://example.com', js_snippet=customJsSnippet, ) print(result.content)Exception handling and retries
from scrapingant_client import ScrapingAntClient, ScrapingantClientException, ScrapingantInvalidInputException client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') RETRIES_COUNT = 3 def parse_html(html: str): ... # Implement your data extraction here parsed_data = None for retry_number in range(RETRIES_COUNT): try: scrapingant_response = client.general_request( 'https://example.com', ) except ScrapingantInvalidInputException as e: print(f'Got invalid input exception: {{repr(e)}}') break # We are not retrying if request params are not valid except ScrapingantClientException as e: print(f'Got ScrapingAnt exception {repr(e)}') except Exception as e: print(f'Got unexpected exception {repr(e)}') # please report this kind of exceptions by creating a new issue else: try: parsed_data = parse_html(scrapingant_response.content) break # Data is parsed successfully, so we dont need to retry except Exception as e: print(f'Got exception while parsing data {repr(e)}') if parsed_data is None: print(f'Failed to retrieve and parse data after {RETRIES_COUNT} tries') # Can sleep and retry later, or stop the script execution, and research the reason else: print(f'Successfully parsed data: {parsed_data}')
from scrapingant_client import ScrapingAntClient client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') result = client.general_request( 'https://httpbin.org/headers', headers={ 'test-header': 'test-value' } ) print(result.content) # Http basic auth example result = client.general_request( 'https://jigsaw.w3.org/HTTP/Basic/', headers={'Authorization': 'Basic Z3Vlc3Q6Z3Vlc3Q='} ) print(result.content)
import asyncio from scrapingant_client import ScrapingAntClient client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') async def main(): # Scrape the example.com site result = await client.general_request_async('https://example.com') print(result.content) asyncio.run(main())
from scrapingant_client import ScrapingAntClient client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') # Sending POST request with json data result = client.general_request( url="https://httpbin.org/post", method="POST", json={"test": "test"}, ) print(result.content) # Sending POST request with bytes data result = client.general_request( url="https://httpbin.org/post", method="POST", data=b'test_bytes', ) print(result.content)
from scrapingant_client import ScrapingAntClient client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>') # Sending POST request with json data result = client.markdown_request( url="https://example.com", ) print(result.markdown)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4