You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.6 KiB

6 years ago
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. from datetime import datetime, date, timedelta
  5. shops = pd.read_csv('datasets/fairMarket-shops.csv')
  6. print shops.tail(1)
  7. #generate the full set of days from the first date to the last in the dataset
  8. months = []
  9. days = []
  10. dInit = date(2015, 5, 04)
  11. dEnd = date(2017, 8, 31)
  12. delta = dEnd - dInit
  13. for i in range(delta.days+1):
  14. day = dInit + timedelta(days=i)
  15. dayString = day.strftime("%d/%m/%y")
  16. dayDatetime = datetime.strptime(dayString, '%d/%m/%y')
  17. days.append(dayDatetime)
  18. #add the dates of shops creation to the days array
  19. for shopDate in shops['Created on']:
  20. if isinstance(shopDate, basestring):
  21. shopDay = str.split(shopDate)[0]
  22. shopDayDatetime = datetime.strptime(shopDay, '%d/%m/%y')
  23. days.append(shopDayDatetime)
  24. #count days frequency in days array
  25. unique, counts = np.unique(days, return_counts=True)
  26. countDays = dict(zip(unique, counts))
  27. realCounts = []
  28. for count in counts:
  29. realCounts.append(count-1)
  30. #count the total acumulation of shops created in each days
  31. totalCount = 0
  32. globalCount = []
  33. for k in realCounts:
  34. totalCount = totalCount + k
  35. globalCount.append(totalCount)
  36. dates = countDays.values()
  37. counts = countDays.values()
  38. #plot the data
  39. plt.title("New shops opened each day")
  40. plt.plot(unique, realCounts)
  41. plt.show()
  42. plt.title("Total shops each day")
  43. plt.plot(unique, globalCount)
  44. plt.show()
  45. plt.title("New shops and total shops each day")
  46. plt.plot(unique, realCounts, label="new shops opened each day")
  47. plt.plot(unique, globalCount, label="total shops each day")
  48. plt.legend(loc='upper left')
  49. plt.show()