
测试代码为:
[plain]
view
plaincopy
train_data=load('sample_feature.txt')
train_label=load('train_label.txt')
test_data=load('features.txt')
k=knnclassify(test_data,train_data,train_label,3,'cosine','random')
train_data保存的是训练样本特征,要求是最能代表本类胡闭别的,不一定多,当然不能太少橡做戚;
train_label保存梁陵的是样本标号,如0,1,2等等,随便设置,只有能区分就行,具体格式可以为:
[plain]
view
plaincopy
1
1
2
2
3
3
test_data测试文件保存的是测试数据的特征;
function [ccr,pgroupt]=knnt(x,group,K,dist,xt,groupt)%#
%# AIM: to classify test set objects or unknown objects with the
%# K Nearest Neighbour method
%#
%# PRINCIPLE: KNN is a supervised, deterministic, non-parametric
%# classification method. It uses the majority rule to
%# assign new objects to a class.
%# It is assumed that the number of objects in each class
%# is similar.
%# There are no assumptions about the data distribution and
%# the variance-covariance matrices of each class.
%# There is no limitation of the number of variables when
%# the Euclidean distance is used.
%# However, when the correlation coefficient is used, the
%# number of variables must be larger than 1.
%# Ref: Massart D. L., Vandeginste B. G. M., Deming S. N.,
%# Michotte Y. and Kaufman L., Chemometrics: a textbook,
%# Chapter 23, 395-397, Elsevier Science Publishers B. V.,
%# Amsterdam 1988.
%#
%# INPUT: x: (mxn) data matrix with m objects and n variables,
%# containing samples of several classes (training set)
%# group: (mx1) column vector labelling the m objects from the
%# training set
%# K: integer, number of nearest neighbours
%# dist: integer,
%# = 1, Euclidean distance
%# = 2, Correlation coefficient, (No. of variables >1)
%# xt: (mtxn) data matrix with mt objects and n variables
%# (test set or unknowns)
%# groupt: (mtx1) column vector labelling the mt objects from
%# the test set
%# -->if the new objects are unknown, input [].
%#
%# OUTPUT: ccr: scalar, correct classification rate
%# pgroupt:row vector, predicted class label for the test set
%# 0 means that the object is not classified to any
%# class
%#
%# SUBROUTINES: sortlab.m: sorts the group label vector into classes
%#
%# AUTHOR: Wen Wu
%# Copyright(c) 1997 for ChemoAc
%# FABI, Vrije Universiteit Brussel
%# Laarbeeklaan 103 1090 Jette
%#
%# VERSION: 1.1 (28/02/1998)
%#
%# TEST: Andrea Candolfi
%#
function [ccr,pgroupt]=knnt(x,group,K,dist,xt,groupt)
if nargin==5, groupt=[]end % for unknown objects
distance=distclear dist % change variable
if size(group,1)>1,
group=group' % change column vector into row vector
groupt=groupt' % change column vector into row vector
end
[m,n]=size(x) % size of the training set
if distance==2 &n<2, error('Number of variables must >1'),end % to check the number of variables when using correlation coefficient
[mt,n]=size(xt) % size of the test set
dis=zeros(mt,m) % initial values for the distance (matrix of zeros)
% Calculation of the distance for each test set object
for i=1:mt
for j=1:m % between each training set object and each test set object
if distance==1
dis(i,j)=(xt(i,:)-x(j,:))*(xt(i,:)-x(j,:))' % Euclidian distance
else
r=corrcoef(xt(i,:)',x(j,:)') % Correlation coefficient matrix
r=r(1,2) % Correlation coefficient
dis(i,j)=1-r*r % 1 - the power of correlation coefficient
end
end
end
% Finding of the nearest neighbours
lab=zeros(1,mt) % initial values of lab
for i=1:mt % for each test object
[a,b]=sort(dis(i,:)) % sort distances
b=b(find(a<=a(K))) % to find the nearest neighbours indices
b=group(b) % the nearest neighbours objects
[ng,lgroup]=sortlab(b) % calculate the number of objects from each class in the nearest neighbours
a=find(ng==max(ng)) % find the class with the maximum number of objects
if length(a)==1 % only one class
lab(i)=lgroup(a) % class label
else
lab(i)=0 % more than one class
end
end
% Calculation of the success rate
if ~isempty(groupt)
dif=groupt-lab % difference between predicted class label and known class label
ccr=sum(dif==0)/mt % success rate
end
pgroupt=lab % the output vector
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)