A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://developers.oxylabs.io/scraping-solutions/web-scraper-api/targets/youtube/youtube-transcript below:

YouTube Transcript | Oxylabs Documentation

YouTube Transcript | Oxylabs Documentation Documentation has been updated: see help center and changelog in one place.
  1. Scraping Solutions
  2. Web Scraper API
  3. Targets
  4. YouTube
YouTube Transcript

Learn how to scrape transcripts from YouTube videos using Web Scraper API. Find out more about its parameters and practical examples.

You can get YouTube video transcripts by providing a YouTube video ID and a language_code to the youtube_transcript source.

Transcripts are separate from subtitles and closed captions (CC). To extract subtitles and closed captions, use the YouTube Subtitles source.

The following examples demonstrate how to retrieve the English transcript of a YouTube video, specifically when the transcript was provided by the uploader.

curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
        "source": "youtube_transcript",
        "query": "SLoqvcnwwN4",
        "context": [
            {
                "key": "language_code",
                "value": "en"
            },
            {
                "key": "transcript_origin",
                "value": "uploader_provided"
            }
        ]
    }'
import requests
from pprint import pprint

# Structure payload.
payload = {
    'source': 'youtube_transcript',
    'query': 'SLoqvcnwwN4',
    'context': [
        {
            'key': 'language_code',
            'value': 'en'
        },
        {
            'key': 'transcript_origin',
            'value': 'uploader_provided'
        }
    ]
}

# Get response.
response = requests.request(
    'POST',
    'https://realtime.oxylabs.io/v1/queries',
    auth=('user', 'pass1'),
    json=payload,
)

# Print the JSON response with the result.
pprint(response.json())
const https = require("https");

const username = "USERNAME";
const password = "PASSWORD";
const body = {
    source: "youtube_transcript",
    query: "SLoqvcnwwN4",
    context: [
        {
            key: "language_code",
            value: "en",
        },
        {
            key: "transcript_origin",
            value: "uploader_provided",
        },
    ],
};

const options = {
    hostname: "realtime.oxylabs.io",
    path: "/v1/queries",
    method: "POST",
    headers: {
        "Content-Type": "application/json",
        Authorization:
            "Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
    },
};

const request = https.request(options, (response) => {
    let data = "";

    response.on("data", (chunk) => {
        data += chunk;
    });

    response.on("end", () => {
        const responseData = JSON.parse(data);
        console.log(JSON.stringify(responseData, null, 2));
    });
});

request.on("error", (error) => {
    console.error("Error:", error);
});

request.write(JSON.stringify(body));
request.end();
# The whole string you submit has to be URL-encoded.

https://realtime.oxylabs.io/v1/queries?source=youtube_transcript&query=SLoqvcnwwN4&context=%5B%7B%22key%22%3A%22language_code%22%2C%22value%22%3A%22en%22%7D%2C%7B%22key%22%3A%22transcript_origin%22%2C%22value%22%3A%22uploader_provided%22%7D%5D&access_token=12345abcde
<?php

$params = array(
    'source' => 'youtube_transcript',
    'query' => 'SLoqvcnwwN4',
    'context' => array(
        array(
            'key' => 'language_code',
            'value' => 'en'
        ),
        array(
            'key' => 'transcript_origin',
            'value' => 'uploader_provided'
        )
    )
);

$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");

$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

$result = curl_exec($ch);
echo $result;

if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}

curl_close($ch);
package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"net/http"
)

func main() {
	const Username = "USERNAME"
	const Password = "PASSWORD"

	// Define the payload with query and context parameters.
	payload := map[string]interface{}{
		"source": "youtube_transcript",
		"query":  "SLoqvcnwwN4",
		"context": []map[string]string{
			{
				"key":   "language_code",
				"value": "en",
			},
			{
				"key":   "transcript_origin",
				"value": "uploader_provided",
			},
		},
	}

	jsonValue, err := json.Marshal(payload)
	if err != nil {
		fmt.Println("Error marshalling JSON:", err)
		return
	}

	client := &http.Client{}
	request, err := http.NewRequest("POST", "https://realtime.oxylabs.io/v1/queries", bytes.NewBuffer(jsonValue))
	if err != nil {
		fmt.Println("Error creating request:", err)
		return
	}

	request.SetBasicAuth(Username, Password)
	request.Header.Set("Content-Type", "application/json")

	response, err := client.Do(request)
	if err != nil {
		fmt.Println("Error making request:", err)
		return
	}
	defer response.Body.Close()

	responseText, err := ioutil.ReadAll(response.Body)
	if err != nil {
		fmt.Println("Error reading response:", err)
		return
	}

	fmt.Println(string(responseText))
}
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Text;
using System.Threading.Tasks;

namespace OxyApi
{
    class Program
    {
        static async Task Main()
        {
            const string Username = "USERNAME";
            const string Password = "PASSWORD";

            var parameters = new
            {
                source = "youtube_transcript",
                query = "SLoqvcnwwN4",
                context = new[]
                {
                    new { key = "language_code", value = "en" },
                    new { key = "transcript_origin", value = "uploader_provided" }
                }
            };

            var client = new HttpClient();

            Uri baseUri = new Uri("https://realtime.oxylabs.io");
            client.BaseAddress = baseUri;

            var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
            requestMessage.Content = JsonContent.Create(parameters);

            var authenticationString = $"{Username}:{Password}";
            var base64EncodedAuthenticationString = Convert.ToBase64String(Encoding.UTF8.GetBytes(authenticationString));
            requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);

            try
            {
                var response = await client.SendAsync(requestMessage);
                response.EnsureSuccessStatusCode();

                var contents = await response.Content.ReadAsStringAsync();
                Console.WriteLine(contents);
            }
            catch (HttpRequestException e)
            {
                Console.WriteLine($"Request error: {e.Message}");
            }
        }
    }
}
package org.example;

import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;

public class Main implements Runnable {
    private static final String AUTHORIZATION_HEADER = "Authorization";
    public static final String USERNAME = "USERNAME";
    public static final String PASSWORD = "PASSWORD";

    public void run() {
        // Construct JSON payload with context parameters
        JSONObject jsonObject = new JSONObject();
        jsonObject.put("source", "youtube_transcript");
        jsonObject.put("query", "SLoqvcnwwN4");

        JSONArray contextArray = new JSONArray();
        contextArray.put(new JSONObject().put("key", "language_code").put("value", "en"));
        contextArray.put(new JSONObject().put("key", "transcript_origin").put("value", "uploader_provided"));

        jsonObject.put("context", contextArray);

        Authenticator authenticator = (route, response) -> {
            String credential = Credentials.basic(USERNAME, PASSWORD);
            return response
                    .request()
                    .newBuilder()
                    .header(AUTHORIZATION_HEADER, credential)
                    .build();
        };

        var client = new OkHttpClient.Builder()
                .authenticator(authenticator)
                .readTimeout(180, TimeUnit.SECONDS)
                .build();

        var mediaType = MediaType.parse("application/json; charset=utf-8");
        var body = RequestBody.create(jsonObject.toString(), mediaType);
        var request = new Request.Builder()
                .url("https://realtime.oxylabs.io/v1/queries")
                .post(body)
                .build();

        try (var response = client.newCall(request).execute()) {
            if (response.body() != null) {
                try (var responseBody = response.body()) {
                    System.out.println(responseBody.string());
                }
            }
        } catch (Exception exception) {
            System.out.println("Error: " + exception.getMessage());
        }

        System.exit(0);
    }

    public static void main(String[] args) {
        new Thread(new Main()).start();
    }
}
{
    "source": "youtube_transcript",
    "query": "SLoqvcnwwN4",
    "context": [
        {
            "key": "language_code",
            "value": "en"
        },
        {
            "key": "transcript_origin",
            "value": "uploader_provided"
        }
    ]
}

We use synchronous Realtime integration method in our examples. If you would like to use Proxy Endpoint, or asynchronous Push-Pull integration, refer to the Integration Methods section.

Specifies the language of the transcript. Find available values here . NOTE: If the provided language_code has no matching transcript in the YouTube video, the result returns a 404 status.

context: transcript_origin (optional)

Specifies whether to retrieve transcripts that are auto_generated or uploader_provided.

- mandatory parameter


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4