7
« on: February 28, 2015, 06:17:27 pm »
Updates: - Short description for each title.
- Refined Torrent URL search.
Still needs if anyone wants to add on:
- Text Formatting Within Terminal. (different colors, bold text, ect)
- Critic or User Rating For Each Title (should be an easy one)
- Replace ascii characters that are returned in titles.
import urllib
import re
#Rejected Strings To Exclude In The Found Titles (Review 7,8,11,12,13,14,15,16,19,20,21)
reject = ["Register or login to rate this title","Go to IMDbPro","PG_13","R","Delete","PG","image of title","Subscribe","image of ","G","TV_PG","image of character","image of name","TV_14","NOT_RATED","TV_MA"]
genre_dic = {1:"Action",2:"Adventure",3:"Animation",4:"Biography",5:"Comedy",6:"Crime",7:"Documentary",8:"Drama",9:"Family",10:"Fantasy",11:"Film-Noir",12:"History",13:"Horror",14:"Music",15:"Musical",16:"Mystery",17:"Romance",18:"Sci-Fi",19:"Sport",20:"Thriller",21:"War",22:"Western"}
#Scrapes Page For Movie Titles and Prints Them
#To Do: Replace Ascii Characters With Human Readable Text!
def find_list(url):
movie_count = 1
dis_count = 0
title_list = []
opened_url = urllib.urlopen(url)
url_read = opened_url.read()
descrip = '<span class="outline">(.+?)<'
descrip_comp = re.compile(descrip)
descrip_found = re.findall(descrip_comp,url_read)
titles_reg = 'title="(.+?)"'
titles_comp = re.compile(titles_reg)
titles_found = re.findall(titles_comp,url_read)
print "The Top 25 Titles Found In "+genre_dic[user_choice]+" Are:"
print
for titles in titles_found:
if titles not in reject:
if titles not in title_list:
if titles[:4] != "User":
regex = '(.+?) \('
regex = re.compile(regex)
regex = re.findall(regex, titles)
print "Movie Number "+str(movie_count)+":"
print "Title: "+titles
print "Description: "+descrip_found[dis_count]
print "Torrent URL: http://kickass.to/usearch/"+regex[0]+"%20category:movies%20lang_id:2/"
print "--------------------------------------"
title_list.append(titles)
dis_count += 1
movie_count += 1
print
#Intro & Help
print 'Welcome to the IMDB movie finder; A faster way to find movies.'
print 'Please select a number to view the corresponding genre'
print
print 'Action: 1\nAdventure: 2\nAnimation: 3\nBiography: 4\nComedy: 5\nCrime: 6\nDocumentary: 7\nDrama: 8\nFamily: 9\nFantasy: 10\nFilm-Noir: 11\nHistory: 12\nHorror: 13\nMusic: 14\nMusical: 15\nMystery: 16\nRomance: 17\nSci-Fi: 18\nSport: 19\nThriller: 20\nWar: 21\nWestern: 22\n'
print
#User Selects Genre
def genre_sel():
global user_choice
user_choice = raw_input("Enter number here or type 'exit': ")
if user_choice != 'exit':
user_choice = int(user_choice)
print
else:
exit()
#Processes User Selection
def program_loop():
genre_sel()
if user_choice <= len(genre_dic):
url = 'http://www.imdb.com/genre/'+genre_dic[user_choice].lower()+'/?ref_=gnr_mn_ac_mp'
find_list(url)
program_loop()
else:
print "Invalid Selection!"
program_loop()
program_loop()