'''
k:k值
testdata:測試數(shù)據(jù)集
traindata:訓(xùn)練數(shù)據(jù)集
labels:分類標(biāo)簽
'''

def knn(k, testdata, traindata, labels):
? ? '''定義算法'''
? ? traindatasize = traindata.shape[0] #計(jì)算訓(xùn)練集的長度
? ? dif = tile(testdata,(traindatasize,1)) - traindata #將測試集擴(kuò)展至訓(xùn)練集的長度，再求差值
? ? sqrdif = dif**2 #求差值的平方
? ? sumsqrdif = sqrdif.sum(axis=1) #求平方和
? ? distance = sumsqrdif**0.5 #再開根號，即所有的距離
? ? sorted_distance = distance.argsort() #對距離進(jìn)行排序，返回排序后的索引
? ? count = {} #準(zhǔn)備一個(gè)空字典，存放投票結(jié)果
? ? for i in range(0,k): ?
? ? ? ? vote = labels[sorted_distance[i]] #提取索引多對應(yīng)的標(biāo)簽值作為字典的key
? ? ? ? count[vote] = count.get(vote,0)+1 ?#票數(shù)作為字典的value
? ? sorted_count = sorted(count.items(),key=lambda x:x[1],reverse=True) #對最后的投票結(jié)果進(jìn)行排序
? ? return sorted_count[0][0] #返回得票最多的標(biāo)簽

二、準(zhǔn)備數(shù)據(jù)

用最笨的方法，手寫了一批png格式的數(shù)字圖片：

圖片的尺寸都是統(tǒng)一的：32*32像素
圖片的命名也是統(tǒng)一的：數(shù)字標(biāo)簽+"_"+第n張圖+“.png"

1、將圖片轉(zhuǎn)換成數(shù)組矩陣

訓(xùn)練數(shù)據(jù)集與測試數(shù)據(jù)集都是標(biāo)準(zhǔn)化后的數(shù)組矩陣，而我們的試驗(yàn)對象是手寫體數(shù)字圖片，首先需要將圖片進(jìn)行一下數(shù)據(jù)化處理。

def img2Model(originDataPath, modelpath):
? ? list = os.listdir(originDataPath)
? ? for child in list:
? ? ? ? s_list = os.listdir(originDataPath + child)
? ? ? ? for i in s_list:

? ? ? ? ? ? filepath=originDataPath + child + '/' + i
? ? ? ? ? ? # print(filepath)
? ? ? ? ? ? img = cv2.imdecode(np.fromfile(filepath, dtype=np.uint8),0)
? ? ? ? ? ? img = cv2.resize(img, (32, 32))
? ? ? ? ? ? img = np.asarray(img)
? ? ? ? ? ? img[img > 127] = 255
? ? ? ? ? ? img[img <= 127] = 1
? ? ? ? ? ? img[img == 255] = 0
? ? ? ? ? ? dstFileName = modelPath + i.split('.')[0] + '.txt'
? ? ? ? ? ? np.savetxt(dstFileName, img, fmt='%d', delimiter=' ')

三、處理數(shù)據(jù)：訓(xùn)練集與測試集

1、區(qū)分訓(xùn)練集和測試集

# 隨機(jī)分揀出測試集，其他文件為訓(xùn)練集
def shutildata(modelpath, trainpath, testpath):
? ? txtlist = os.listdir(modelpath)
? ? index = [random.randint(0, len(txtlist)) for i in range(10)]
? ? # print(index)
? ? arr = [txtlist[i].split('.')[0].split("_")[1] for i in index]
? ? for i in txtlist:
? ? ? ? try:
? ? ? ? ? ? if i.split(".")[0].split("_")[1] in arr:
? ? ? ? ? ? ? ? shutil.copy(modelpath + "/" + i, testpath)
? ? ? ? ? ? else:
? ? ? ? ? ? ? ? shutil.copy(modelpath + "/" + i, trainpath)
? ? ? ? except:
? ? ? ? ? ? pass

2、加載數(shù)據(jù)

# 加載數(shù)據(jù)

def load_data(dataFilePath):
? ? arr = np.loadtxt(dataFilePath, dtype=np.int)
? ? arr = arr.flatten()
? ? return arr

3、建立訓(xùn)練數(shù)據(jù)

# 建立訓(xùn)練數(shù)據(jù)集
def makeTrainData(trainpath):
? ? labels = []
? ? trainfile = os.listdir(trainpath)

? ? trainarr = np.zeros((len(trainfile), 1024))
? ? for i in range(0, len(trainfile)):
? ? ? ? # print(trainfile[i])
? ? ? ? thislabel = trainfile[i].split(".")[0].split("_")[0]

? ? ? ? if len(thislabel) != 0:
? ? ? ? ? ? labels.append(int(thislabel))
? ? ? ? trainarr[i, :] = load_data(trainpath + trainfile[i])
? ? return trainarr, labels

四、測試數(shù)據(jù)

# 驗(yàn)證
def validate(testpath, trainpath, k):
? ? trainarr, labels = makeTrainData(trainpath)
? ? testfiles = os.listdir(testpath)
? ? count = 0

? ? # 讀取字典表
? ? with open('num_char.json', 'r') as f:
? ? ? ? dict = json.loads(f.read())
? ? ? ? # print(dict)

? ? for i in range(0, len(testfiles)):
? ? ? ? testpicname = testfiles[i].split("_")[0]
? ? ? ? testarr = load_data(testpath + testfiles[i])
? ? ? ? result = knn(k, testarr, trainarr, labels)

? ? ? ? testpicname = dict[str(testpicname)]
? ? ? ? result = dict[str(result)]

? ? ? ? print("真正字母:"+testfiles[i] +" ?" + testpicname + " ?" + "測試結(jié)果為:{}".format(result))
? ? ? ? if str(testpicname) == str(result):
? ? ? ? ? ? count += 1
? ? print("-----------------------------")
? ? print("測試集為:{}個(gè),其中正確了{(lán)}個(gè)".format(len(testfiles),count))
? ? print("正確率為{}".format(count / len(testfiles)))
? ? print()

到此這篇關(guān)于基于Python手寫拼音識別的文章就介紹到這了,更多相關(guān)Python手寫拼音識別內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家！

欧美bbbwbbbw肥妇,免费乱码人妻系列日韩,一级黄片

軟件下載

源碼下載

軟件編程

網(wǎng)絡(luò)編程

在線工具

數(shù)據(jù)庫

CMS

常用工具

基于Python手寫拼音識別

目錄

一、算法構(gòu)造

1.簡單介紹一下knn算法

2.Python實(shí)現(xiàn)KNN

二、準(zhǔn)備數(shù)據(jù)

1、將圖片轉(zhuǎn)換成數(shù)組矩陣

三、處理數(shù)據(jù)：訓(xùn)練集與測試集

1、區(qū)分訓(xùn)練集和測試集

2、加載數(shù)據(jù)

3、建立訓(xùn)練數(shù)據(jù)

四、測試數(shù)據(jù)

相關(guān)文章

最新評論

大家感興趣的內(nèi)容

最近更新的內(nèi)容

常用在線小工具

基于Python手寫拼音識別

目錄

一、算法構(gòu)造

1.簡單介紹一下knn算法

2.Python實(shí)現(xiàn)KNN

二、準(zhǔn)備數(shù)據(jù)

1、將圖片轉(zhuǎn)換成數(shù)組矩陣

三、處理數(shù)據(jù)：訓(xùn)練集與測試集

1、區(qū)分訓(xùn)練集和測試集

2、加載數(shù)據(jù)

3、建立訓(xùn)練數(shù)據(jù)

四、測試數(shù)據(jù)

相關(guān)文章

最新評論

大家感興趣的內(nèi)容

最近更新的內(nèi)容

常用在線小工具

一、算法構(gòu)造

二、準(zhǔn)備數(shù)據(jù)

1、將圖片轉(zhuǎn)換成數(shù)組矩陣

三、處理數(shù)據(jù)：訓(xùn)練集與測試集

2、加載數(shù)據(jù)

3、建立訓(xùn)練數(shù)據(jù)

四、測試數(shù)據(jù)