import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import seaborn as sns
sns.set(style="ticks", color_codes=True)
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn import preprocessing
from scipy.cluster.hierarchy import dendrogram
#La fonction "display_circles" nous permettra d'obtenir le cercle des corrélations lors de l'ACP:
def display_circles(pcs, n_comp, pca, axis_ranks, labels=None, label_rotation=0, lims=None):
for d1, d2 in axis_ranks:
if d2 < n_comp:
fig, ax = plt.subplots(figsize=(10,10))
if lims is not None :
xmin, xmax, ymin, ymax = lims
elif pcs.shape[1] < 30 :
xmin, xmax, ymin, ymax = -1, 1, -1, 1
else :
xmin, xmax, ymin, ymax = min(pcs[d1,:]), max(pcs[d1,:]), min(pcs[d2,:]), max(pcs[d2,:])
if pcs.shape[1] < 30 :
plt.quiver(np.zeros(pcs.shape[1]), np.zeros(pcs.shape[1]),
pcs[d1,:], pcs[d2,:],
angles='xy', scale_units='xy', scale=1, color="grey")
else:
lines = [[[0,0],[x,y]] for x,y in pcs[[d1,d2]].T]
ax.add_collection(LineCollection(lines, axes=ax, alpha=.1, color='black'))
if labels is not None:
for i,(x, y) in enumerate(pcs[[d1,d2]].T):
if x >= xmin and x <= xmax and y >= ymin and y <= ymax :
plt.text(x, y, labels[i], fontsize='14', ha='center', va='center',
rotation=label_rotation, color="sienna", alpha=0.5)
circle = plt.Circle((0,0), 1, facecolor='none', edgecolor='b')
plt.gca().add_artist(circle)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.plot([-1, 1], [0, 0], color='grey', ls='--')
plt.plot([0, 0], [-1, 1], color='grey', ls='--')
plt.xlabel('F{} ({}%)'.format(d1+1, round(100*pca.explained_variance_ratio_[d1],1)))
plt.ylabel('F{} ({}%)'.format(d2+1, round(100*pca.explained_variance_ratio_[d2],1)))
plt.title("Cercle des corrélations (F{} et F{})".format(d1+1, d2+1), fontsize=20)
plt.show(block=False)
#La fonction "display_factorial_planes" qui nous permettra de projeter les individus sur le plan factoriel:
def display_factorial_planes(X_projected, n_comp, pca, axis_ranks, labels=None, alpha=1, illustrative_var=None):
for d1,d2 in axis_ranks:
if d2 < n_comp:
fig = plt.figure(figsize=(7,6))
if illustrative_var is None:
plt.scatter(X_projected[:, d1], X_projected[:, d2], alpha=alpha)
else:
illustrative_var = np.array(illustrative_var)
for value in np.unique(illustrative_var):
selected = np.where(illustrative_var == value)
plt.scatter(X_projected[selected, d1], X_projected[selected, d2], alpha=alpha, label=value)
plt.legend()
if labels is not None:
for i,(x,y) in enumerate(X_projected[:,[d1,d2]]):
plt.text(x, y, labels[i],
fontsize='14', ha='center',va='center')
boundary = np.max(np.abs(X_projected[:, [d1,d2]])) * 1.1
plt.xlim([-boundary,boundary])
plt.ylim([-boundary,boundary])
plt.plot([-100, 100], [0, 0], color='grey', ls='--')
plt.plot([0, 0], [-100, 100], color='grey', ls='--')
plt.xlabel('F{} ({}%)'.format(d1+1, round(100*pca.explained_variance_ratio_[d1],1)))
plt.ylabel('F{} ({}%)'.format(d2+1, round(100*pca.explained_variance_ratio_[d2],1)))
plt.title("Projection des individus (sur F{} et F{})".format(d1+1, d2+1))
plt.show(block=False)
#La fonction "display_scree_plot" créera l'éboulis des valeurs propres:
def display_scree_plot(pca):
scree = pca.explained_variance_ratio_*100
plt.bar(np.arange(len(scree))+1, scree)
plt.plot(np.arange(len(scree))+1, scree.cumsum(),c="red",marker='o')
plt.xlabel("rang de l'axe d'inertie")
plt.ylabel("pourcentage d'inertie")
plt.title("Eboulis des valeurs propres")
plt.show(block=False)
def binaire(b):
if b==False:
return(0)
else: return(1)
def plot_dendrogram(Z, names):
plt.figure(figsize=(20, 5))
plt.title("Hierarchical Clustering Dendrogram", fontsize=20)
plt.xlabel("distance")
dendrogram(
Z,
labels = names,
orientation = "top", color_threshold=10)
plt.yticks(fontsize=12)
plt.show()
def plot_sortie_acf( y_acf, y_len, pacf=False):
"représentation de la sortie ACF"
if pacf:
y_acf = y_acf[1:]
plt.figure(figsize=(14,6))
plt.bar(range(len(y_acf)), y_acf, color="mediumvioletred", width = 0.2)
plt.xlabel("lag")
plt.ylabel("ACF", fontsize=20)
plt.axhline(y=0, color="black")
plt.axhline(y=-1.96/np.sqrt(y_len), color="goldenrod", linestyle="--", linewidth=2)
plt.axhline(y=1.96/np.sqrt(y_len), color="goldenrod", linestyle="--", linewidth=2)
plt.ylim(-1, 1)
plt.show()
return