Last Updated : 23 Jul, 2025
We can scrape the IMDb movie ratings and their details with the help of the BeautifulSoup library of Python.
Modules Needed:Below is the list of modules required to scrape from IMDB.
Steps to implement web scraping in python to extract IMDb movie ratings and its ratings:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd
# Downloading imdb top 250 movie's data
url = 'https://www.imdb.com/chart/top/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
movies = soup.select('td.titleColumn')
crew = [a.attrs.get('title') for a in soup.select('td.titleColumn a')]
ratings = [b.attrs.get('data-value')
for b in soup.select('td.posterColumn span[name=ir]')]
# create a empty list for storing
# movie information
list = []
# Iterating over movies to extract
# each movie's details
for index in range(0, len(movies)):
# Separating movie into: 'place',
# 'title', 'year'
movie_string = movies[index].get_text()
movie = (' '.join(movie_string.split()).replace('.', ''))
movie_title = movie[len(str(index))+1:-7]
year = re.search('\((.*?)\)', movie_string).group(1)
place = movie[:len(str(index))-(len(movie))]
data = {"place": place,
"movie_title": movie_title,
"rating": ratings[index],
"year": year,
"star_cast": crew[index],
}
list.append(data)
for movie in list:
print(movie['place'], '-', movie['movie_title'], '('+movie['year'] +
') -', 'Starring:', movie['star_cast'], movie['rating'])
#saving the list as dataframe
#then converting into .csv file
df = pd.DataFrame(list)
df.to_csv('imdb_top_250_movies.csv',index=False)
Implementation: Complete Code
Python
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd
# Downloading imdb top 250 movie's data
url = 'https://www.imdb.com/chart/top/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
movies = soup.select('td.titleColumn')
crew = [a.attrs.get('title') for a in soup.select('td.titleColumn a')]
ratings = [b.attrs.get('data-value')
for b in soup.select('td.posterColumn span[name=ir]')]
# create a empty list for storing
# movie information
list = []
# Iterating over movies to extract
# each movie's details
for index in range(0, len(movies)):
# Separating movie into: 'place',
# 'title', 'year'
movie_string = movies[index].get_text()
movie = (' '.join(movie_string.split()).replace('.', ''))
movie_title = movie[len(str(index))+1:-7]
year = re.search('\((.*?)\)', movie_string).group(1)
place = movie[:len(str(index))-(len(movie))]
data = {"place": place,
"movie_title": movie_title,
"rating": ratings[index],
"year": year,
"star_cast": crew[index],
}
list.append(data)
# printing movie details with its rating.
for movie in list:
print(movie['place'], '-', movie['movie_title'], '('+movie['year'] +
') -', 'Starring:', movie['star_cast'], movie['rating'])
##.......##
df = pd.DataFrame(list)
df.to_csv('imdb_top_250_movies.csv',index=False)
Output:
Along with this in the terminal, a .csv file with a given name is saved in the same file and the data in the .csv file will be as shown in the following image.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4