|
|
# coding=utf-8 import sys reload(sys) sys.setdefaultencoding('utf8')
import numpy as np import pandas as pd import matplotlib.pyplot as plt
import mpld3 from mpld3 import plugins, utils
from datetime import datetime, date, timedelta
products = pd.read_csv('datasets/fairMarket-products.csv')
#print products.tail(1)
#generate the full set of days from the first date to the last in the dataset months = [] days = [] dInit = date(2015, 5, 04) dEnd = date(2017, 8, 31) delta = dEnd - dInit for i in range(delta.days+1): day = dInit + timedelta(days=i) dayString = day.strftime("%d/%m/%y") dayDatetime = datetime.strptime(dayString, '%d/%m/%y') days.append(dayDatetime)
#add the dates of products creation to the days array for productDate in products['Creado en']: if isinstance(productDate, basestring): productDay = str.split(productDate)[0] productDayDatetime = datetime.strptime(productDay, '%d/%m/%y') days.append(productDayDatetime)
#count days frequency in days array unique, counts = np.unique(days, return_counts=True) countDays = dict(zip(unique, counts)) realCounts = [] for count in counts: realCounts.append(count-1)
#count the total acumulation of products created in each days totalCount = 0 globalCount = [] for k in realCounts: totalCount = totalCount + k globalCount.append(totalCount)
dates = countDays.values() counts = countDays.values()
#plot the data plt.title("New products published each day") plt.plot(unique, realCounts) plt.show()
plt.title("Total products in FairMarket each day") plt.plot(unique, globalCount) plt.show()
plt.title("New products and total products each day") plt.plot(unique, realCounts, label="new products offered each day") plt.plot(unique, globalCount, label="total products in FairMarket each day") plt.legend(loc='upper left') plt.show()
# now, product categories analytics categories = [] for category in products["Categoría pública/Display Name"]: if isinstance(category, basestring): categories.append(category)
categoriesNames, categoriesCount = np.unique(categories, return_counts=True) plt.title("Products categories") plt.pie(categoriesCount, labels=categoriesNames, autopct='%1.1f%%', shadow=True, startangle=90) plt.axis('equal') mpld3.show()
|