Scrape other websites (Home Depot, Idealo, Zillow, Yandex, Baidu, etc.) with our universal
source. It accepts URLs along with additional parameters.
In this example, the API will retrieve an e-commerce product page.
curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
"source": "universal",
"url": "https://sandbox.oxylabs.io/products/1"
}'
import requests
from pprint import pprint
# Structure payload.
payload = {
'source': 'universal',
'url': 'https://sandbox.oxylabs.io/products/1',
}
# Get response.
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('USERNAME', 'PASSWORD'),
json=payload,
)
# Instead of response with job status and results url, this will return the
# JSON response with the result.
pprint(response.json())
const https = require("https");
const username = "USERNAME";
const password = "PASSWORD";
const body = {
source: "universal",
url: "https://sandbox.oxylabs.io/products/1",
};
const options = {
hostname: "realtime.oxylabs.io",
path: "/v1/queries",
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization:
"Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
},
};
const request = https.request(options, (response) => {
let data = "";
response.on("data", (chunk) => {
data += chunk;
});
response.on("end", () => {
const responseData = JSON.parse(data);
console.log(JSON.stringify(responseData, null, 2));
});
});
request.on("error", (error) => {
console.error("Error:", error);
});
request.write(JSON.stringify(body));
request.end();
# The whole string you submit has to be URL-encoded.
https://realtime.oxylabs.io/v1/queries?source=universal&url=https%3A%2F%2Fsandbox.oxylabs.io%2Fproducts%2F1&access_token=12345abcde
<?php
$params = array(
'source' => 'universal',
'url' => 'https://sandbox.oxylabs.io/products/1',
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "USERNAME"
const Password = "PASSWORD"
payload := map[string]interface{}{
"source": "universal",
"url": "https://sandbox.oxylabs.io/products/1",
}
jsonValue, _ := json.Marshal(payload)
client := &http.Client{}
request, _ := http.NewRequest("POST",
"https://realtime.oxylabs.io/v1/queries",
bytes.NewBuffer(jsonValue),
)
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "USERNAME";
const string Password = "PASSWORD";
var parameters = new {
source = "universal",
url = "https://sandbox.oxylabs.io/products/1"
};
var client = new HttpClient();
Uri baseUri = new Uri("https://realtime.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package org.example;
import okhttp3.*;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "USERNAME";
public static final String PASSWORD = "PASSWORD";
public void run() {
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "universal");
jsonObject.put("url", "https://sandbox.oxylabs.io/products/1");
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.readTimeout(180, TimeUnit.SECONDS)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://realtime.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
if (response.body() != null) {
try (var responseBody = response.body()) {
System.out.println(responseBody.string());
}
}
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
{
"source": "universal",
"url": "https://sandbox.oxylabs.io/products/1"
}
Output example
{
"results": [
{
"content": "<!DOCTYPE html><html lang=\"en\">
CONTENT
</html>",
"created_at": "2024-07-01 11:35:14",
"updated_at": "2024-07-01 11:35:15",
"page": 1,
"url": "https://sandbox.oxylabs.io/products/1",
"job_id": "7213505428280329217",
"status_code": 200
}
]
}
We use synchronous Realtime integration method in our examples. If you would like to use Proxy Endpoint or asynchronous Push-Pull integration, refer to the integration methods section.
Direct URL (link) to any page.
- mandatory parameter
These are the parameters of our features.
Sets the proxy's geo location to retrieve data. Find supported locations here .
Enables JavaScript rendering when set to html
. More info. NOTE: If you are observing low success rates or retrieve empty content, please try adding this parameter.
Define your own browser instructions that are executed when rendering JavaScript. More info.
Returns parsed data when set to true
, as long as a dedicated parser exists for the submitted URL's page type.
Define your own parsing and data transformation logic that will be executed on an HTML scraping result. Read more: Parsing instructions examples.
Pass your own headers. Learn more here.
Pass your own cookies. Learn more here.
If you want to use the same proxy with multiple requests, you can do so by using this parameter. Just set your session to any string you like, and we will assign a proxy to this ID, and keep it for up to 10 minutes. After that, if you make another request with the same session ID, a new proxy will be assigned to that particular session ID.
Set it to post
if you would like to make a POST
request to your target URL via E-commerce Scraper API. Learn more here.
Device type and browser. The full list can be found here.
Base64-encoded POST
request body. It is only useful if http_method
is set to post
.
Add this parameter if you are downloading images. Learn more here.
context
: follow_redirects
Set to true
to enable scraper to follow redirects. By default, redirects are followed up to a limit of 10 links, treating the entire chain as one scraping job.
context
: successful_status_codes
Define a custom HTTP response code (or a few of them), upon which we should consider the scrape successful and return the content to you. May be useful if you want us to return the 503 error page or in some other non-standard cases.
All parameters
In this example, all available parameters are included (though not always necessary or compatible within the same request), to give you an idea on how to format your requests.
{
"source": "universal",
"url": "https://example.com",
"user_agent_type": "desktop",
"geo_location": "United States",
"parse": true,
"context": [
{
"key": "headers",
"value": {
"Content-Type": "application/octet-stream",
"Custom-Header-Name": "custom header content"
}
},
{
"key": "cookies",
"value": [
{
"key": "NID",
"value": "1234567890"
},
{
"key": "1P JAR",
"value": "0987654321"
}]
},
{
"key": "follow_redirects",
"value": true
},
{
"key": "http_method", "value": "get"
},
{
"key": "content",
"value": "YmFzZTY0RW5jb2RlZFBPU1RCb2R5"
},
{
"key": "successful_status_codes",
"value": [808, 909]
}]
}
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4