As the title suggests, I just finished making a basic web scraper that searches for sales on a certain web site and then takes that info and writes it into your html file for automated web page editing.
I'm still new at this so if you guys have any any suggestions on what I could do better next time please do let me know.
import urllib
import re
import fileinput
filez = "index.html"
#Finds Product Links @
url = ""
htmlfile = urllib.urlopen(url)
htmltext =
regex = '<h2 class="product-name"><a href=""'
pattern = re.compile(regex)
new_url_text = re.findall(pattern, htmltext)
x = 0
while x in range(len(new_url_text)):
#Finds Product Title
new_url = url + str(new_url_text[x])
title_htmlfile = urllib.urlopen(new_url)
title_htmltext =
title_regex = '<strong itemprop="title">(.+?)</strong>'
title_pattern = re.compile(title_regex)
product_title = re.findall(title_pattern, title_htmltext)
#Finds Product Price
price_regex = '<span class="price">(.+?)</span>'
price_pattern = re.compile(price_regex)
price_title = re.findall(price_pattern, title_htmltext)
#Finds Product Savings
savings_regex = '<span class="save">(.+?)</span>'
savings_pattern = re.compile(savings_regex)
savings_title = re.findall(savings_pattern, title_htmltext)
#Stores Product Image Source Location
image_regex = '<img itemprop="image" src="(.+?)"'
image_pattern = re.compile(image_regex)
image_title = re.findall(image_pattern, title_htmltext)
print "Product Name:",product_title[0]
print "Price:",price_title[0]
print savings_title[0]
x += 1
#Re-Writes index.html with Wine Deals product info. (Only the product name as of now.)
#for line in fileinput.input(filez, inplace = 1):
#print line.replace("wine_deals_title",product_title[0]),