Author Topic: Udemy Video downloader Python script  (Read 2518 times)

0 Members and 1 Guest are viewing this topic.

Offline TeamVega

  • Serf
  • *
  • Posts: 32
  • Cookies: -2
    • View Profile
Udemy Video downloader Python script
« on: July 15, 2015, 03:41:14 pm »
Hi guys

does anybody have an updated udemy video downloader as i have check out this project:
github.com/gaganpreet/udemy-dl

However it seems as though Udemy has updated their login pages. csrfmiddlewaretoken is introduced instead of csrf and it throws up CSRF verification failed.Request aborted.And it seems as though they are also using a csrftoken cookie as well.

I know there was an old one here on the forum but that one is out of date.

Offline Sherlock Holmes

  • Peasant
  • *
  • Posts: 97
  • Cookies: 4
    • View Profile
Re: Udemy Video downloader Python script
« Reply #1 on: July 15, 2015, 04:07:28 pm »
I have not used this udemy-dl but what's wrong with the simple IDM (or any other download manager like speedbit, etc) ? Why not use those. .
« Last Edit: July 15, 2015, 04:08:27 pm by Sherlock Holmes »

Offline 0E 800

  • Not a VIP
  • VIP
  • Baron
  • *
  • Posts: 895
  • Cookies: 131
  • • тнε ιηтεяηεт ιs мү яεcүcℓε-вιη •
    • View Profile
Re: Udemy Video downloader Python script
« Reply #2 on: July 15, 2015, 08:54:21 pm »
The invariable mark of wisdom is to see the miraculous in the common.

Offline TeamVega

  • Serf
  • *
  • Posts: 32
  • Cookies: -2
    • View Profile
Re: Udemy Video downloader Python script
« Reply #3 on: July 16, 2015, 06:37:33 am »
Thanks Guys.  8)

Offline TeamVega

  • Serf
  • *
  • Posts: 32
  • Cookies: -2
    • View Profile
Re: Udemy Video downloader Python script
« Reply #4 on: July 16, 2015, 11:19:08 am »
HI Guys

This guy fixed it github.com/nishad:




Code: [Select]
#!/usr/bin/env python
# -*- coding: utf8 -*-

import requests
import requests.sessions
import argparse
import getpass
import sys
import re
import os
import json
from .download import download, DLException


class Session:
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0) Gecko/20100101 Firefox/39.0',
               'X-Requested-With': 'XMLHttpRequest',
               'Host': 'www.udemy.com',
               'Referer': 'https://www.udemy.com/join/login-popup'}

    def __init__(self):
        self.session = requests.sessions.Session()
       
    def set_auth_headers(self, access_token, client_id):
        self.headers['X-Udemy-Bearer-Token'] = access_token
        self.headers['X-Udemy-Client-Id'] = client_id

    def get(self, url):
        return self.session.get(url, headers=self.headers)

    def post(self, url, data):
        return self.session.post(url, data, headers=self.headers)


session = Session()


def get_csrf_token():
    response = session.get('https://www.udemy.com/join/login-popup')
    match = re.search("name=\'csrfmiddlewaretoken\' value=\'(.*)\'", response.text)
    return match.group(1)

def login(username, password):
    locale = 'en_US'
    login_url = 'https://www.udemy.com/join/login-popup'
    csrf_token = get_csrf_token()
    payload = {'isSubmitted': 1, 'email': username, 'password': password,
               'displayType': 'json', 'csrfmiddlewaretoken': csrf_token, 'locale': locale, 'DEBUG':True }
    session.headers.update({'referer': 'https://www.udemy.com/join/login-popup'})
    response = session.post(login_url, payload)

    access_token = response.cookies.get('access_token')
    client_id = response.cookies.get('client_id')
    session.set_auth_headers(access_token, client_id)

    response = response.text
    if 'error' in response:
        print(response)
        sys.exit(1)


def get_course_id(course_link):
    response = session.get(course_link)
    matches = re.search('data-courseid="(\d+)"', response.text, re.IGNORECASE)
    return matches.groups()[0] if matches else None


def parse_video_url(lecture_id, hd=False):
    '''A hacky way to find the json used to initalize the swf object player'''
    embed_url = 'https://www.udemy.com/embed/{0}'.format(lecture_id)
    html = session.get(embed_url).text

    data = re.search(r'\$\("#player"\).jwplayer\((.*?)\);.*</script>', html,
                     re.MULTILINE | re.DOTALL).group(1)
    video = json.loads(data)

    if 'playlist' in video and 'sources' in video['playlist'][0]:
        if hd:
            for source in video['playlist'][0]['sources']:
                if '720' in source['label'] or 'HD' in source['label']:
                    return source['file']

        # The 360p case and fallback if no HD version
        source = video['playlist'][0]['sources'][0]
        return source['file']
    else:
        print("Failed to parse video url")
        return None


def get_video_links(course_id, hd=False):
    course_url = 'https://www.udemy.com/api-1.1/courses/{0}/curriculum?fields[lecture]=@min,completionRatio,progressStatus&fields[quiz]=@min,completionRatio'.format(course_id)
    course_data = session.get(course_url).json()

    chapter = None
    video_list = []

    lecture_number = 0
    chapter_number = 0
    # A udemy course has chapters, each having one or more lectures
    for item in course_data:
        if item['__class'] == 'chapter':
            chapter = item['title']
            chapter_number += 1
            lecture_number = 1
        elif item['__class'] == 'lecture' and item['assetType'] == 'Video':
            lecture = item['title']
            try:
                lecture_id = item['id']
                video_url = parse_video_url(lecture_id, hd)
                video_list.append({'chapter': chapter,
                                   'lecture': lecture,
                                   'video_url': video_url,
                                   'lecture_number': lecture_number,
                                   'chapter_number': chapter_number})
            except:
                print('Cannot download lecture "%s"' % (lecture))
            lecture_number += 1
    return video_list


def sanitize_path(s):
    return "".join([c for c in s if c.isalpha() or c.isdigit() or c in ' .-_,']).rstrip()


def mkdir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)


def get_video(directory, filename, link):
    print('Downloading %s  ' % (filename.encode('utf-8')))
    previous_dir = os.getcwd()
    mkdir(directory)
    os.chdir(directory)
    try:
        download(link, filename)
    except DLException as e:
        print('Couldn\'t download this lecture: {0}'.format(e))
    os.chdir(previous_dir)
    print('\n'),

def udemy_dl(username, password, course_link, dest_dir=""):
    login(username, password)

    course_id = get_course_id(course_link)
    if not course_id:
        print('Failed to get course ID')
        return

    for video in get_video_links(course_id, hd=True):
        directory = '%02d %s' % (video['chapter_number'], video['chapter'])
        directory = sanitize_path(directory)

        if dest_dir:
            directory = os.path.join(dest_dir, directory)

        filename = '%03d %s.mp4' % (video['lecture_number'], video['lecture'])
        filename = sanitize_path(filename)

        get_video(directory, filename, video['video_url'])

    session.get('http://www.udemy.com/user/logout')


def main():
    parser = argparse.ArgumentParser(description='Fetch all the videos for a udemy course')
    parser.add_argument('link', help='Link for udemy course', action='store')
    parser.add_argument('-u', '--username', help='Username/Email', default=None, action='store')
    parser.add_argument('-p', '--password', help='Password', default=None, action='store')
    parser.add_argument('-o', '--output-dir', help='Output directory', default=None, action='store')

    args = vars(parser.parse_args())

    username = args['username']
    password = args['password']
    link = args['link'].rstrip('/')

    if args['output_dir']:
        # Normalize the output path if specified
        output_dir = os.path.normpath( args['output_dir'] )
    else:
        # Get output dir name from the URL
        output_dir = os.path.join( ".", link.rsplit('/', 1)[1] )

    if not username:
        try:
            username = raw_input("Username/Email: ")  # Python 2
        except NameError:
            username = input("Username/Email: ")  # Python 3

    if not password:
        password = getpass.getpass(prompt='Password: ')

    print('Downloading to: %s\n' % (os.path.abspath(output_dir)) )

    udemy_dl(username, password, link, output_dir)


if __name__ == '__main__':
    main()