Source code for youtube_scraping_api.__main__

from youtube_scraping_api.parser.search_result import SearchResult, cleanupData as cleanupVideoData
from youtube_scraping_api.parser.playlist import Playlist, cleanupData as cleanupPlaylistData
from youtube_scraping_api.constants import PAYLOAD, HEADERS
from youtube_scraping_api.parser.channel import Channel
from youtube_scraping_api.parser.video import Video
from youtube_scraping_api.decorators import *
from youtube_scraping_api.filter import *
from youtube_scraping_api.utils import *
from youtube_scraping_api.urls import *

import itertools
import requests

[docs]class YoutubeAPI: """Core developer interface for Youtube Scraping API :param debug_level: Which level must be reached in order to print out log messages :type debug_level: str, optional """ _data = PAYLOAD def __init__(self, debug_level="ERROR"): self._session = requests.Session() self._session.headers = HEADERS self._debug_level = debug_level try: raw = self._session.get("https://www.youtube.com").text except: raise RuntimeError("Please check your internet connection") if len(raw) < 10000: self._session.proxies = get_proxy() self.DEBUG_LEVEL = dict([i[::-1] for i in enumerate(["INFO", "SUCCESS", "WARNING", "ERROR"])]) self.API_TOKEN = find_snippet(raw, "innertubeApiKey", ",", (3, 1))
[docs] def search(self, query=None, continuation_token=None, raw=False, filter=None): """Parse YouTube search results of specific query or continuation token :param query: A query string to search on YouTube :type query: str, optional :param continuation_token: A token generated by YouTube to fetch more search results :type continuation_token: str, optional :param raw: Whether to return search results in raw format. Default set to False :type raw: bool, optional :param filter: Filter for search results :type filter: SearchFilter, optinal :return: Search results :rtype: SearchResult """ if not (query or continuation_token): return None if query: base_url = SEARCH_BASE_URL+"+".join(query.split()) if filter and isinstance(filter, SearchFilter): final_url = get_filtered_url(self._session, base_url, filter) else: final_url = base_url html = self._session.get(final_url).text response = get_initial_data(html) elif continuation_token: self._data["continuation"] = continuation_token response = self._session.post(SEARCH_CONTINUATION_URL+self.API_TOKEN, json=self._data).json() nextCT = parse_continuation_token(response) if query: data = [next(search_dict(i, "contents")) for i in search_dict(response,"itemSectionRenderer")] result = SearchResult(itertools.chain(*[cleanupVideoData(i) for i in data]), nextCT) result.url = final_url if continuation_token: try: data = next(search_dict(response, "contents")) except: data = next(search_dict(response, "continuationItems")) result = cleanupVideoData(data, nextCT, to_object=True) if not raw: return result else: return result.raw
[docs] def playlist(self, playlist_id=None, continuation_token=None): """Parse playlist metadata and videos :param playlistId: ID of playlist :type playlistId: str, optional :param continuation_token: A token generated by YouTube to fetch more playlist videos :type continuation_token: str, optional :return: Playlist Object :rtype: Playlist """ if not (playlist_id or continuation_token): return None if playlist_id: html = self._session.get(PLAYLIST_BASE_URL+playlist_id).text response = get_initial_data(html) elif continuation_token: if not continuation_token: return {}, None self._data["continuation"] = continuation_token response = self._session.post(PLAYLIST_CONTINUTION_URL+self.API_TOKEN, json=self._data).json() nextCT = parse_continuation_token(response) if playlist_id: result = Playlist(response) elif continuation_token: data = next(search_dict(response, "continuationItems")) result = cleanupPlaylistData(data) while nextCT: response, nextCT = self.playlist(continuation_token = nextCT) result += response if playlist_id: return result elif continuation_token: return result, nextCT
[docs] def channel(self, channel_id=None, username=None): """Parse channel metadata and all it's videos :param channel_id: ID of channel :type channel_id: str, optional :param username: Username of channel owner :type username: str, optional :return: Channel Object :rtype: Channel """ result = Channel(channel_id=channel_id, username=username) return result
[docs] def video(self, video_id: str) -> Video: """Parse video metadata, captions, download link, etc. :param video_id: ID of Youtube video :type video_id: str :return: Video object :rtype: Video """ return Video(video_id)
[docs] def query_suggestions(self, query=None, language='en', country='gb'): """Return a list of query suggestions for given query string :param query: A string of query, defaults set to None :type query: str, optional :param language: Language of results, defaults set to 'en' :type language: str, optional :param country: Country code for more accurate suggestions, defaults set to 'gb' :type country: str, optional :return: A list of query suggestions :rtype: list """ parameter = dict(client='youtube', hl=language, gl=country, q=query) raw_data = self._session.get(QUERY_SUGGESTIONS_URL, params=parameter).text data = eval(raw_data[raw_data.find('('):])[1] result = [i[0] for i in data] return result