You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

77 lines
2.0 KiB

# coding=utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, date, timedelta
members = pd.read_csv('datasets/BotC.csv')
print members.head(1)
#generate the full set of days from the first date to the last in the dataset
months = []
days = []
dInit = date(2017, 6, 8)
dEnd = date(2017, 1, 9)
delta = dEnd - dInit
for i in range(delta.days+1):
day = dInit + timedelta(days=i)
dayString = day.strftime("%d/%m/%y")
dayDatetime = datetime.strptime(dayString, '%d/%m/%y')
days.append(dayDatetime)
#add the dates of shops creation to the days array
for memberDate in members['Date']:
if isinstance(memberDate, basestring):
memberDay = memberDate
memberDayDatetime = datetime.strptime(memberDay, '%B %d, %Y')
days.append(memberDayDatetime)
#count days frequency in days array
unique, counts = np.unique(days, return_counts=True)
countDays = dict(zip(unique, counts))
realCounts = []
for count in counts:
realCounts.append(count-1)
#count the total acumulation of shops created in each days
totalCount = 0
globalCount = []
for k in realCounts:
totalCount = totalCount + k
globalCount.append(totalCount)
dates = countDays.values()
counts = countDays.values()
#plot the data
plt.title("New members registered each day")
plt.plot(unique, realCounts)
plt.show()
plt.title("Total members each day")
plt.plot(unique, globalCount)
plt.show()
plt.title("New members and total members each day")
plt.plot(unique, realCounts, label="new members registered each day")
plt.plot(unique, globalCount, label="total members each day")
plt.legend(loc='upper left')
plt.show()
# place of the account
places = []
for place in members["Place"]:
if isinstance(place, basestring):
places.append(place)
placesNames, placesCount = np.unique(places, return_counts=True)
plt.title("Membership places")
plt.pie(placesCount, labels=placesNames, autopct='%1.1f%%', shadow=True, startangle=90)
plt.axis('equal')
plt.show()