import BeautifulSoup as bs from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup #buyhist.html is the source code for your page at https://store.steampowered.com/account/ # you could probably grab it with requests, but I wanted to play it safe and quickly f = open('buyhist3.html') data = f.read() soup = BeautifulSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) #find all the sales, there are two lists: the first 6 or so, and the rest: # first, get the big list of sales store_trans = soup.findAll('div', attrs={'class': 'block'})[1] # then, get the visible ones visible_trans = store_trans.findAll('div', attrs={'class': 'transactions'})[0] # then, the hidden ones: hidden_trans = store_trans.findAll('div', attrs={'class': 'hidden_transactions'})[0] #create a list to catch all the sales sales = [] #loop over all the transactions, looking for each sale object: # first the visible for sale in visible_trans.findAll('div', attrs={'class': lambda x : x.startswith('transactionRow ')}): sales.append(sale) # then the hidden for sale in hidden_trans.findAll('div', attrs={'class': lambda x : x.startswith('transactionRow ')}): sales.append(sale) #now we should have a total number of sales: print 'total number of sales found:', len(sales) #lets just find the total number we were looking for in the html, under transaction_footer_element total_sales_tag = soup.findAll('div', attrs={'class': 'transaction_footer_element'})[0] # then take the last string in the group as our total: total_sales = total_sales_tag.text.split()[-1] print 'compared to our expected total:', total_sales #Now we parse the sale entries. allowed_fields = [] for field in ['Date', 'Price', 'Items']: allowed_fields.append('transactionRow'+field) #new way with saving vars to dicts sales_list = [] for sale in sales: #print 'Date\t\tPrice\t\tTitle' sale_dict = {} for tag in sale: try: if tag['class'] in allowed_fields: if tag.findChildren('div', attrs={'class' : "transactionRowTitle"}): #sale_dict['title'] = tag.findChildren('div', attrs={'class' : "transactionRowTitle"})[0].renderContents() title = '\t-->' + tag.findChildren('div', attrs={'class' : "transactionRowTitle"})[0].renderContents() sale_dict['title'] = '\n\t -->'.join(title.split(',')) + '\n' elif tag['class'] == 'transactionRowDate': sale_dict['date'] = tag.renderContents() elif tag['class'] == 'transactionRowPrice': sale_dict['price'] = tag.renderContents() else: print 'bah' except TypeError as e: pass sales_list.append(sale_dict) print 'success!' #printing nicely maxT, maxD, maxP = 0, 0, 0 for sale in sales_list: maxT = max(maxT, len(sale['title'])) maxD=max(maxD, len(sale['date'])) maxP = max(maxP, len(sale['price'])) for sale in sales_list: #pad the columns out. In this version it's not really needed anymore, as I # now split the columns up a bit. print "Date: %-*s\t Price: %*s\n %s" % (maxD, sale['date'], maxP, sale['price'], sale['title']) costs = [] count = 0 for sale in sales_list: try: costs.append(float(sale['price'].strip('$'))) except ValueError: count += 1 print '\nTotal costs from your pocket:', sum(costs), 'with probably', count, 'free games'