import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly as py
import cufflinks as cf
cf.go_offline()
pd.set_option('display.max_columns', 30)
The data come from Kevin Zmith on Kaggle, inspired by the dataset Honey Production in the USA, extended to the period 1998-2017. Additionnaly, the data from USGS's Pesticide National Synthesis Project has been agregated, allowing evaluation of the statistical connections between Honey Production and the use of Neonicotinoid (neonic) pesticides.
data = pd.read_csv("./data/vHoneyNeonic_v03.csv")
data.head()
data.shape
data.columns
data.insert(loc=3, column='yieldpercol_kg', value=data["yieldpercol"]*0.45359237)
data.insert(loc=5, column='totalprod_kg', value=data["totalprod"]*0.45359237)
data.insert(loc=6, column='totalprod_to', value=data["totalprod"]*0.00045359237)
data.insert(loc=8, column='stocks_to', value=data["stocks"]*0.00045359237)
data.insert(loc=10, column='priceperkg', value=data["priceperlb"]/0.45359237)
data.insert(loc=11, column='pricepertonne', value=data["priceperlb"]/0.00045359237)
data.head()
data = data.rename(columns={"nCLOTHIANIDIN": "CLOTHIANIDIN", "nIMIDACLOPRID": "IMIDACLOPRID",
"nTHIAMETHOXAM": "THIAMETHOXAM", "nACETAMIPRID": "ACETAMIPRID",
"nTHIACLOPRID": "THIACLOPRID","nAllNeonic":"AllNeonic"})
data.to_csv('./data/vHoneyNeonic_v04.csv')
data.isnull().sum()
data = data.dropna()
data.shape
data.groupby("StateName")['totalprod_to'].sum().sort_values(ascending=False)[:10]
data.groupby("Region")['totalprod_kg'].sum().sort_values(ascending=False)
evo_price = data.groupby("year", as_index=False).agg({'priceperkg':'mean'})
evo_price.iplot(kind='line', x='year', xTitle='Year', color='orange',
yTitle='Price of honey (dollars)', title='Evolution of the price of honey')
The price of honey has seen a five-fold increase in 12 years !
prod_by_year = data.groupby("year", as_index=False).agg({'totalprod_to':'mean'})
prod_by_year.iplot(kind='bar', x='year', xTitle='Year', color='red',
yTitle='Production of honey (Tonne)', title='Evolution of the production of honey')
data['priceperkg'].corr(data['totalprod_kg'])
The production only has a 23% impact on the price of honey ! Other features should enter into account... The market ?
data.groupby("StateName")['AllNeonic'].sum().sort_values(ascending=False)
evo_neonic = data.groupby("year", as_index=False).agg({'AllNeonic':'mean'})
evo_neonic.iplot(kind='bar', x='year', xTitle='Year', color='green',
yTitle='Use of Neonic pesticides (kg)', title='Evolution of the use of Neonic pesticides')
There is an 4460% increase of use of Neonic between 1995 and 2014 !
data['totalprod_kg'].corr(data['AllNeonic'])
The correlation is low between the production of honey and the use of Neonic pesticides (11%)
evo_col = data.groupby("year", as_index=False).agg({'numcol':'count'})
evo_col.iplot(x='year', xTitle='Year', color='purple',
yTitle='Number of colonies', title='Evolution of the number of colonies')
data['numcol'].corr(data['AllNeonic'])
The correlation is low between the number of colonies and the use of Neonic pesticides (19%)