pytorch geometric的GNN、GCN的節(jié)點(diǎn)分類方式
更新時(shí)間:2022年12月17日 09:49:06 作者:zhangztSky
這篇文章主要介紹了pytorch geometric的GNN、GCN的節(jié)點(diǎn)分類方式,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。如有錯(cuò)誤或未考慮完全的地方,望不吝賜教
pytorch geometric的GNN、GCN節(jié)點(diǎn)分類
# -*- coding: utf-8 -*- import os import torch import torch.nn.functional as F import torch.nn as nn from torch_geometric.datasets import Planetoid import torch_geometric.nn as pyg_nn import torch_geometric.transforms as T # load dataset def get_data(folder="node_classify/cora", data_name="cora"): # dataset = Planetoid(root=folder, name=data_name) dataset = Planetoid(root=folder, name=data_name, transform=T.NormalizeFeatures()) return dataset # create the graph cnn model class GraphCNN(nn.Module): def __init__(self, in_c, hid_c, out_c): super(GraphCNN, self).__init__() self.conv1 = pyg_nn.GCNConv(in_channels=in_c, out_channels=hid_c) self.conv2 = pyg_nn.GCNConv(in_channels=hid_c, out_channels=out_c) def forward(self, data): # data.x data.edge_index x = data.x # [N, C] edge_index = data.edge_index # [2 ,E] hid = self.conv1(x=x, edge_index=edge_index) # [N, D] hid = F.relu(hid) out = self.conv2(x=hid, edge_index=edge_index) # [N, out_c] out = F.log_softmax(out, dim=1) # [N, out_c] return out class OwnGCN(nn.Module): def __init__(self, in_c, hid_c, out_c): super(OwnGCN, self).__init__() self.in_ = pyg_nn.SGConv(in_c, hid_c, K=2) self.conv1 = pyg_nn.APPNP(K=2, alpha=0.1) self.conv2 = pyg_nn.APPNP(K=2, alpha=0.1) self.out_ = pyg_nn.SGConv(hid_c, out_c, K=2) def forward(self, data): x, edge_index = data.x, data.edge_index x = self.in_(x, edge_index) x = F.dropout(x, p=0.1, training=self.training) x = F.relu(self.conv1(x, edge_index)) x = F.dropout(x, p=0.1, training=self.training) x = F.relu(self.conv2(x, edge_index)) x = F.dropout(x, p=0.1, training=self.training) x = self.out_(x, edge_index) return F.log_softmax(x, dim=1) # todo list class YourOwnGCN(nn.Module): pass def analysis_data(dataset): print("Basic Info: ", dataset[0]) print("# Nodes: ", dataset[0].num_nodes) print("# Features: ", dataset[0].num_features) print("# Edges: ", dataset[0].num_edges) print("# Classes: ", dataset.num_classes) print("# Train samples: ", dataset[0].train_mask.sum().item()) print("# Valid samples: ", dataset[0].val_mask.sum().item()) print("# Test samples: ", dataset[0].test_mask.sum().item()) print("Undirected: ", dataset[0].is_undirected()) def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" cora_dataset = get_data() # todo list # my_net = GraphCNN(in_c=cora_dataset.num_features, hid_c=150, out_c=cora_dataset.num_classes) my_net = OwnGCN(in_c=cora_dataset.num_features, hid_c=300, out_c=cora_dataset.num_classes) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") my_net = my_net.to(device) data = cora_dataset[0].to(device) optimizer = torch.optim.Adam(my_net.parameters(), lr=1e-2, weight_decay=1e-3) """ # model train my_net.train() for epoch in range(500): optimizer.zero_grad() output = my_net(data) loss = F.nll_loss(output[data.train_mask], data.y[data.train_mask]) loss.backward() optimizer.step() _, prediction = output.max(dim=1) valid_correct = prediction[data.val_mask].eq(data.y[data.val_mask]).sum().item() valid_number = data.val_mask.sum().item() valid_acc = valid_correct / valid_number print("Epoch: {:03d}".format(epoch + 1), "Loss: {:.04f}".format(loss.item()), "Valid Accuracy:: {:.4f}".format(valid_acc)) """ # model test my_net = torch.load("node_classify/best.pth") my_net.eval() _, prediction = my_net(data).max(dim=1) target = data.y test_correct = prediction[data.test_mask].eq(target[data.test_mask]).sum().item() test_number = data.test_mask.sum().item() train_correct = prediction[data.train_mask].eq(target[data.train_mask]).sum().item() train_number = data.train_mask.sum().item() print("==" * 20) print("Accuracy of Train Samples: {:.04f}".format(train_correct / train_number)) print("Accuracy of Test Samples: {:.04f}".format(test_correct / test_number)) def test_main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") cora_dataset = get_data() data = cora_dataset[0].to(device) my_net = torch.load("node_classify/best.pth") my_net.eval() _, prediction = my_net(data).max(dim=1) target = data.y test_correct = prediction[data.test_mask].eq(target[data.test_mask]).sum().item() test_number = data.test_mask.sum().item() train_correct = prediction[data.train_mask].eq(target[data.train_mask]).sum().item() train_number = data.train_mask.sum().item() print("==" * 20) print("Accuracy of Train Samples: {:.04f}".format(train_correct / train_number)) print("Accuracy of Test Samples: {:.04f}".format(test_correct / test_number)) if __name__ == '__main__': test_main() # main() # dataset = get_data() # analysis_data(dataset)
pytorch下GCN代碼解讀
def main(): print("hello world") main() import os.path as osp import argparse import torch import torch.nn.functional as F from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import GCNConv, ChebConv # noqa #GCN用于提取圖的特征參數(shù)然后用于分類 #數(shù)據(jù)集初始化部分 parser = argparse.ArgumentParser() parser.add_argument('--use_gdc', action='store_true', help='Use GDC preprocessing.') args = parser.parse_args()#是否使用GDC優(yōu)化 dataset = 'CiteSeer'#訓(xùn)練用的數(shù)據(jù)集 path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)#數(shù)據(jù)集存放位置 dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())#數(shù)據(jù)初始化類,其dataset的基類是一個(gè)torch.utils.data.Dataset對(duì)象 data = dataset[0]#只有一個(gè)圖作為訓(xùn)練數(shù)據(jù) #print(data) #預(yù)處理和模型定義 if args.use_gdc: gdc = T.GDC(self_loop_weight=1, normalization_in='sym', normalization_out='col', diffusion_kwargs=dict(method='ppr', alpha=0.05), sparsification_kwargs=dict(method='topk', k=128, dim=0), exact=True) data = gdc(data)#圖擴(kuò)散卷積用于預(yù)處理 #搭建模型 class Net(torch.nn.Module): #放置參數(shù)層(一般為可學(xué)習(xí)層,不可學(xué)習(xí)層也可放置,若不放置,則在forward中用functional實(shí)現(xiàn)) def __init__(self): super(Net, self).__init__()#在不覆蓋Module的Init函數(shù)的情況下設(shè)置Net的init函數(shù) self.conv1 = GCNConv(dataset.num_features, 16, cached=True, normalize=not args.use_gdc)#第一層GCN卷積運(yùn)算輸入特征向量大小為num_features輸出大小為16 #GCNConv的def init需要in_channel和out_channel(卷積核的數(shù)量)的參數(shù),并對(duì)in_channel和out_channel調(diào)用linear函數(shù),而該函數(shù)的作用為構(gòu)建全連接層 self.conv2 = GCNConv(16, dataset.num_classes, cached=True, normalize=not args.use_gdc)#第二層GCN卷積運(yùn)算輸入為16(第一層的輸出)輸出為num_class # self.conv1 = ChebConv(data.num_features, 16, K=2) # self.conv2 = ChebConv(16, data.num_features, K=2) #實(shí)現(xiàn)模型的功能各個(gè)層之間的連接關(guān)系(具體實(shí)現(xiàn)) def forward(self): x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr#賦值data.x特征向量edge_index圖的形狀,edge_attr權(quán)重矩陣 x = F.relu(self.conv1(x, edge_index, edge_weight))#第一層用非線性激活函數(shù)relu #x,edge_index,edge_weight特征矩陣,鄰接矩陣,權(quán)重矩陣組成GCN核心公式 x = F.dropout(x, training=self.training)#用dropout函數(shù)防止過(guò)擬合 x = self.conv2(x, edge_index, edge_weight)#第二層輸出 return F.log_softmax(x, dim=1)#log_softmax激活函數(shù)用于最后一層返回分類結(jié)果 #Z=log_softmax(A*RELU(A*X*W0)*W1)A連接關(guān)系X特征矩陣W參數(shù)矩陣 #得到Z后即可用于分類 #softmax(dim=1)行和為1再取log x為節(jié)點(diǎn)的embedding device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')#指定設(shè)備 model, data = Net().to(device), data.to(device)#copy model,data到device上 #優(yōu)化算法 optimizer = torch.optim.Adam([ dict(params=model.conv1.parameters(), weight_decay=5e-4),#權(quán)重衰減避免過(guò)擬合 dict(params=model.conv2.parameters(), weight_decay=0)#需要優(yōu)化的參數(shù) ], lr=0.01) # Only perform weight-decay on first convolution. #lr步長(zhǎng)因子或者是學(xué)習(xí)率 #模型訓(xùn)練 def train(): model.train()#設(shè)置成train模式 optimizer.zero_grad()#清空所有被優(yōu)化的變量的梯度 F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()#損失函數(shù)訓(xùn)練參數(shù)用于節(jié)點(diǎn)分類 optimizer.step()#步長(zhǎng) @torch.no_grad()#不需要計(jì)算梯度,也不進(jìn)行反向傳播 #測(cè)試 def test(): model.eval()#設(shè)置成evaluation模式 logits, accs = model(), [] for _, mask in data('train_mask', 'val_mask', 'test_mask'):#mask矩陣,掩膜作用與之相應(yīng)的部分不會(huì)被更新 pred = logits[mask].max(1)[1]#mask對(duì)應(yīng)點(diǎn)的輸出向量最大值并取序號(hào)1 acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()#判斷是否相等計(jì)算準(zhǔn)確度 accs.append(acc) return accs best_val_acc = test_acc = 0 #執(zhí)行 for epoch in range(1, 201): train() train_acc, val_acc, tmp_test_acc = test()#訓(xùn)練準(zhǔn)確率,實(shí)際輸入的準(zhǔn)確率,測(cè)試準(zhǔn)確率 if val_acc > best_val_acc: best_val_acc = val_acc test_acc = tmp_test_acc log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'#類型及保留位數(shù) print(log.format(epoch, train_acc, best_val_acc, test_acc))#輸出格式化函數(shù)'''
總結(jié)
以上為個(gè)人經(jīng)驗(yàn),希望能給大家一個(gè)參考,也希望大家多多支持腳本之家。
相關(guān)文章
39條Python語(yǔ)句實(shí)現(xiàn)數(shù)字華容道
這篇文章主要為大家詳細(xì)介紹了39條Python語(yǔ)句實(shí)現(xiàn)數(shù)字華容道,文中示例代碼介紹的非常詳細(xì),具有一定的參考價(jià)值,感興趣的小伙伴們可以參考一下2021-04-04親測(cè)解決tensorflow和keras版本不匹配的問(wèn)題
這篇文章主要介紹了親測(cè)解決tensorflow和keras版本不匹配問(wèn)題,完美解決:ImportError: No module named 'tensorflow.python.eager'問(wèn)題,具有很好的參考價(jià)值,希望對(duì)大家有所幫助,如有錯(cuò)誤或未考慮完全的地方,望不吝賜教2024-03-03python對(duì)一個(gè)數(shù)向上取整的實(shí)例方法
在本篇文章中小編給大家整理了關(guān)于python對(duì)一個(gè)數(shù)向上取整的實(shí)例方法,需要的朋友們可以跟著學(xué)習(xí)下。2020-06-06使用pandas模塊讀取csv文件和excel表格,并用matplotlib畫圖的方法
今天小編就為大家分享一篇使用pandas模塊讀取csv文件和excel表格,并用matplotlib畫圖的方法,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。一起跟隨小編過(guò)來(lái)看看吧2018-06-06Python實(shí)例一個(gè)類背后發(fā)生了什么
Python實(shí)例一個(gè)類背后發(fā)生了什么,本文為大家一一列出,感興趣的朋友可以參考一下2016-02-02