python k-means一堆乱七八糟的程序

python k-means一堆乱七八糟的程序,第1张

概述pythonk-means F:\PythonProject\K-Means  importpandasaspdimportnumpyasnpfromsklearn.preprocessingimportStandardScalerfromsklearn.model_selectionimporttrain_test_splitimportmatplotlib.pyplotaspltfromsklearn.clusterimportKMe

python k-means

 

F:\PythonProject\K-Means

 

 

import pandas as pdimport numpy as npfrom sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import train_test_splitimport matplotlib.pyplot as pltfrom sklearn.cluster import KMeansthreshold_value = 0.85def main():    # load data    df_wine = pd.read_csv('d_1.txt', header=None)  # 本地加载    df_wine2 = pd.read_csv('f_1.txt', header=None)  # 本地加载        # split the data,train:test=7:3    #x, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values    #print(df_wine.iloc[:, 2:].values)    #print(df_wine.iloc[:, 1:2].values)    #print(df_wine.iloc[:, 0:1].values)    #x,y,z = df_wine.iloc[:, 2:].values, df_wine.iloc[:, 1:2].values, df_wine.iloc[:, 0:1].values    x=df_wine.iloc[:, 2:].values    y=df_wine.iloc[:, 1].values    z_frame=df_wine.iloc[:, 0:2].values    z_frame_f = df_wine2.iloc[:, 0:2].values    label_name_f = df_wine2.iloc[:, 2].values            List_len = 20    x=x[0:List_len]    y=y[0:List_len]    z_frame=z_frame[0:List_len]    #z_frame_f=z_frame_f[0:List_len]    #label_name_f=label_name_f[0:List_len]            #print(z_frame)    #print("-------------------------------------------")    #print(z_frame_f)    #print("{0}    {1}".format(x,y))    print("{0}    {1}".format(len(x),len(y)))    #print(x)        #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, stratify=y, random_state=0)    x_train = x[0:len(x)-5]    y_train = y[0:len(y)-5]    x_test = x[0:5]    y_test = y[0:5]    print(len(x_train))    print(x_train)    print("----------------------------------------")    #print(y_train)    # standardize the feature 标准化单位方差    sc = StandardScaler()    x_train_std = sc.fit_transform(x_train)    x_test_std = sc.fit_transform(x_test)    #print(x_train_std)    print(len(x_train_std))    # 构造协方差矩阵,得到特征向量和特征值    cov_matrix = np.cov(x_train_std.T)    eigen_val, eigen_vec = np.linalg.eig(cov_matrix)    # print("values\n ", eigen_val, "\nvector\n ", eigen_vec)    print(len(eigen_val))    print(len(eigen_vec))    # 解释方差比    tot = sum(eigen_val)  # 总特征值和    var_exp = [(i / tot) for i in sorted(eigen_val, reverse=True)]  # 计算解释方差比,降序    #print(var_exp)    #[0.3516026271036254, 0.2154102386841404, 0.09449164581680554,     #0.0919054990988971, 0.08265939106635344, 0.055431032435754,     #0.04012443059852082, 0.028756191609729642, 0.017827639508716207,     #0.011781879332959133, 0.008141811912227535, 0.0018676128322704462]            cum_var_exp = np.cumsum(var_exp)  # 累加方差比率    print(cum_var_exp)    #[0.35789126 0.56364606 0.66236146 0.7537545  0.83350328 0.88822259     #0.93227841 0.96230417 0.9793677  0.99038737 0.9981856  1.        ]    index_x0 = -1    for i in range(len(cum_var_exp)):        index_value = cum_var_exp[i]        if index_value >threshold_value:            index_x0 = i            break                    print("PCA:",index_x0)    # 特征变换    eigen_pairs = [(np.abs(eigen_val[i]), eigen_vec[:, i]) for i in range(len(eigen_val))]    eigen_pairs.sort(key=lambda k: k[0], reverse=True)  # (特征值,特征向量)降序排列        eigen_pairs2 = np.array(eigen_pairs)    print(type(eigen_pairs))    print(type(eigen_pairs2))    print(len(eigen_pairs))    #print(eigen_pairs)    print("====================================")    #print(eigen_pairs[0][1])    #print(eigen_pairs[1][1][0:4])        output_matrix = x         X =  np.array(output_matrix)    print("---------m----------------")    #print(eigen_pairs2[:,:2])        w = np.hstack((eigen_pairs[0][1][:, np.newaxis], eigen_pairs[1][1][:, np.newaxis]))  # 降维投影矩阵W    #print("-------------------------")    #print(w)    x_train_pca = x_train_std.dot(w)    print("-------------------------")    #print(x_train_pca)    color = ['r', 'g', 'b']    marker = ['s', 'x', 'o']    for i, c, m in zip(np.unique(y_train), color, marker):        #print("{0}   {1}".format(x_train_pca[y_train == i, 0],x_train_pca[y_train == i, 1]))        #print("{0}     {1}    {2}".format(len(x_train_pca[y_train == i, 0]),len(x_train_pca[y_train == i, 1]),len(y)))                plt.scatter(x_train_pca[y_train == i, 0],x_train_pca[y_train == i, 1],c=c, label=i, marker=m)            plt.Title('Result')    plt.xlabel('PC1')    plt.ylabel('PC2')    plt.legend(loc='lower left')    plt.show()    #print("============================")    estimator = KMeans(n_clusters=3)#构造聚类器    #print(estimator.labels_)    estimator.fit(X)#聚类    label_pred = estimator.labels_ #获取聚类标签    center_p = estimator.cluster_centers_  #聚类中心    #print(estimator.labels_)    #print(y_train)    print("============聚类中心================")    print(center_p)    print("============================")    print(label_pred)    #print(X)            #绘制k-means结果    '''    x0 = X[label_pred == 0]    x1 = X[label_pred == 1]    x2 = X[label_pred == 2]    '''    x0=[]    x1=[]    x2=[]    y0=[]    y1=[]    y2=[]    '''    for i in range(len(label_pred)):        if label_pred[i] == 0:            x0.append(X[i])            y0.append(y_train[i])        elif label_pred[i] == 1:            x1.append(X[i])            y1.append(y_train[i])        elif label_pred[i] == 2:            x2.append(X[i])            y2.append(y_train[i])    '''    for i in range(len(label_pred)):        if label_pred[i] == 0:            x0.append(X[i])            index_z = z_frame[i]            index_z_1 = index_z[0]            index_z_2 = index_z[1]            for m in range(len(z_frame_f)):                index_z_f = z_frame_f[m]                index_z_f_1 = index_z_f[0]                index_z_f_2 = index_z_f[1]                if index_z_f_1==index_z_1 and index_z_2==index_z_f_2:                    index_name1 = label_name_f[m]                    print("1   {0}  {1}  {2}".format(index_z_f_1,index_z_2,index_name1))                    y0.append(index_name1)                    elif label_pred[i] == 1:            x1.append(X[i])            index_z = z_frame[i]            index_z_1 = index_z[0]            index_z_2 = index_z[1]            for m in range(len(z_frame_f)):                index_z_f = z_frame_f[m]                index_z_f_1 = index_z_f[0]                index_z_f_2 = index_z_f[1]                if index_z_f_1==index_z_1 and index_z_2==index_z_f_2:                    index_name1 = label_name_f[m]                    print("2   {0}  {1}  {2}".format(index_z_f_1,index_z_2,index_name1))                    y1.append(index_name1)                    elif label_pred[i] == 2:            x2.append(X[i])            index_z = z_frame[i]            index_z_1 = index_z[0]            index_z_2 = index_z[1]            for m in range(len(z_frame_f)):                index_z_f = z_frame_f[m]                index_z_f_1 = index_z_f[0]                index_z_f_2 = index_z_f[1]                if index_z_f_1==index_z_1 and index_z_2==index_z_f_2:                    index_name1 = label_name_f[m]                    print("3   {0}  {1}  {2}".format(index_z_f_1,index_z_2,index_name1))                    y2.append(index_name1)                            print("=========================================")    #print(x0)    print("\n====1===")    print(y0)    print("====2===")    print(y1)    print("====3===")    print(y2)        x0=np.array(x0)    x1=np.array(x1)    x2=np.array(x2)    final_matrix = []    for i in range(len(y_train)):        #y_train[i] -=1        final_matrix.append(y_train[i])        final_matrix.append(label_pred[i])        final_matrix.append(x_train[i])        #print(final_matrix)    #print("{0}   {1}  \n  {2}  \n     {3}  \n".format(len(label_pred),len(y_train),label_pred,y_train))    print("\n\n\n\n\n============================")    print(label_pred)    print(y_train)    print("============================")        plt.scatter(x0[:, 0], x0[:, 1], c = "red", marker='o', label='label0')    plt.scatter(x1[:, 0], x1[:, 1], c = "green", marker='*', label='label1')    plt.scatter(x2[:, 0], x2[:, 1], c = "blue", marker='+', label='label2')    #plt.xlabel('petal length')    #plt.ylabel('petal wIDth')    plt.legend(loc=2)    plt.show()    if __name__ == '__main__':    main()

 

 

 

 

########################33

总结

以上是内存溢出为你收集整理的python k-means 一堆乱七八糟的程序全部内容,希望文章能够帮你解决python k-means 一堆乱七八糟的程序所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

欢迎分享,转载请注明来源:内存溢出

原文地址:https://54852.com/langs/1186182.html

(0)
打赏 微信扫一扫微信扫一扫 支付宝扫一扫支付宝扫一扫
上一篇 2022-06-03
下一篇2022-06-03

发表评论

登录后才能评论

评论列表(0条)

    保存