You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

83 lines
2.2 KiB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
  1. # coding=utf-8
  2. import sys
  3. reload(sys)
  4. sys.setdefaultencoding('utf8')
  5. import numpy as np
  6. import pandas as pd
  7. import matplotlib.pyplot as plt
  8. import mpld3
  9. from mpld3 import plugins, utils
  10. from datetime import datetime, date, timedelta
  11. products = pd.read_csv('datasets/fairMarket-products.csv')
  12. #print products.tail(1)
  13. #generate the full set of days from the first date to the last in the dataset
  14. months = []
  15. days = []
  16. dInit = date(2015, 5, 04)
  17. dEnd = date(2017, 8, 31)
  18. delta = dEnd - dInit
  19. for i in range(delta.days+1):
  20. day = dInit + timedelta(days=i)
  21. dayString = day.strftime("%d/%m/%y")
  22. dayDatetime = datetime.strptime(dayString, '%d/%m/%y')
  23. days.append(dayDatetime)
  24. #add the dates of products creation to the days array
  25. for productDate in products['Creado en']:
  26. if isinstance(productDate, basestring):
  27. productDay = str.split(productDate)[0]
  28. productDayDatetime = datetime.strptime(productDay, '%d/%m/%y')
  29. days.append(productDayDatetime)
  30. #count days frequency in days array
  31. unique, counts = np.unique(days, return_counts=True)
  32. countDays = dict(zip(unique, counts))
  33. realCounts = []
  34. for count in counts:
  35. realCounts.append(count-1)
  36. #count the total acumulation of products created in each days
  37. totalCount = 0
  38. globalCount = []
  39. for k in realCounts:
  40. totalCount = totalCount + k
  41. globalCount.append(totalCount)
  42. dates = countDays.values()
  43. counts = countDays.values()
  44. #plot the data
  45. plt.title("New products published each day")
  46. plt.plot(unique, realCounts)
  47. plt.show()
  48. plt.title("Total products in FairMarket each day")
  49. plt.plot(unique, globalCount)
  50. plt.show()
  51. plt.title("New products and total products each day")
  52. plt.plot(unique, realCounts, label="new products offered each day")
  53. plt.plot(unique, globalCount, label="total products in FairMarket each day")
  54. plt.legend(loc='upper left')
  55. plt.show()
  56. # now, product categories analytics
  57. categories = []
  58. for category in products["Categoría pública/Display Name"]:
  59. if isinstance(category, basestring):
  60. categories.append(category)
  61. categoriesNames, categoriesCount = np.unique(categories, return_counts=True)
  62. plt.title("Products categories")
  63. plt.pie(categoriesCount, labels=categoriesNames, autopct='%1.1f%%', shadow=True, startangle=90)
  64. plt.axis('equal')
  65. mpld3.show()