基于Pytorch Geometric在昇腾上实现GAT、GraphSAGE、GCN图神经网络
全文给出三种模型的代码示例,以GraphSage为例主要讲解
GraphSage
因为昇腾暂时还不支持PyG的torch_scatter
,torch_sparse
等加速库,所以还不能使用mini-batch
方式训练GNN,此处都是in-memory的全图训练方式。
- 首先构建一个GraphSAGE网络
以下代码是一个两层的GraphSAGE网络,利用PyG提供的SAGEConv
进行构建。
class GraphSAGE_NET(torch.nn.Module):
def __init__(self, feature, hidden, classes):
super(GraphSAGE_NET, self).__init__()
self.sage1 = SAGEConv(feature, hidden)
self.sage2 = SAGEConv(hidden, classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.sage1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.sage2(x, edge_index)
return F.log_softmax(x, dim=1)
- 下载数据集,此处国内可能超时无法下载,可以修改url或者手动下载
print("===== begin Download Dadasat=====\n")
dataset = Planetoid(root='/root/data', name='CiteSeer')
print("===== Download Dadasat finished=====\n")
- 设置设备为
npu
,将数据迁移到设备上
device = 'npu'
model = GraphSAGE_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)
data = dataset[0].to(device)
- 进行模型训练
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
- 进行测试
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GraphSAGE Accuracy: {:.4f}'.format(acc))
代码
import torch
import torch.nn.functional as F
# 导入GraphSAGE层
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Planetoid
#导入Npu相关库
import torch_npu
from torch_npu.contrib import transfer_to_npu
class GraphSAGE_NET(torch.nn.Module):
def __init__(self, feature, hidden, classes):
super(GraphSAGE_NET, self).__init__()
self.sage1 = SAGEConv(feature, hidden)
self.sage2 = SAGEConv(hidden, classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.sage1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.sage2(x, edge_index)
return F.log_softmax(x, dim=1)
print("===== begin Download Dadasat=====\n")
dataset = Planetoid(root='/root/data', name='CiteSeer')
print("===== Download Dadasat finished=====\n")
device = 'npu'
model = GraphSAGE_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)
data = dataset[0].to(device)
# model = GraphSAGE_NET(dataset.num_node_features, 16, dataset.num_classes)
# data = dataset[0]
print(data)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GraphSAGE Accuracy: {:.4f}'.format(acc))
GCN
代码
# 导入torch及相关库,便于后续搭建网络调用基础算子模块
import torch
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import GCNConv
class GCN_NET(torch.nn.Module):
def __init__(self, features, hidden, classes):
super(GCN_NET, self).__init__()
# shape(输入的节点特征维度 * 中间隐藏层的维度)
self.conv1 = GCNConv(features, hidden)
# shaape(中间隐藏层的维度 * 节点类别)
self.conv2 = GCNConv(hidden, classes)
def forward(self, data):
# 加载节点特征和邻接关系
x, edge_index = data.x, data.edge_index
# 传入卷积层
x = self.conv1(x, edge_index)
# 激活函数
x = F.relu(x)
# dropout层,防止过拟合
x = F.dropout(x, training=self.training)
# 第二层卷积层
x = self.conv2(x, edge_index)
# 将经过两层卷积得到的特征输入log_softmax函数得到概率分布
return F.log_softmax(x, dim=1)
# 导入数据集
from torch_geometric.datasets import Planetoid
import numpy as np
# 加载数据,出错可自行下载,解决方案见下文
print("===== begin Download Dadasat=====\n")
dataset = Planetoid(root='/home/wzq/pyg_test/data', name='Cora')
print("===== Download Dadasat finished=====\n")
# # 导入NPU相关库
# import torch_npu
# from torch_npu.contrib import transfer_to_npu
# #设置成npu
# device = 'npu'
device = 'cuda:0'
print("device is: ", device)
# 构建模型,设置中间隐藏层维度为16
model = GCN_NET(dataset.num_node_features, 16, dataset.num_classes).to(device)
# 加载数据
data = dataset[0].to(device)
# 定义优化函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
# 梯度设为零
optimizer.zero_grad()
# 模型输出
out = model(data)
# 计算损失
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
# 反向传播计算梯度
loss.backward()
# 一步优化
optimizer.step()
# 评估模型
model.eval()
# 得到模型输出的类别
_, pred = model(data).max(dim=1)
# 计算正确的个数
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
# 得出准确率
acc = correct / int(data.test_mask.sum())
# 打印准确率及结果
print('GCN Accuracy: {:.4f}'.format(acc))
GAT
代码
import torch
import torch.nn.functional as F
# 导入GATConv层
from torch_geometric.nn import GATConv
from torch_geometric.datasets import Planetoid
#导入Npu相关库
import torch_npu
from torch_npu.contrib import transfer_to_npu
class GAT_NET(torch.nn.Module):
def __init__(self, features, hidden, classes, heads=4):
super(GAT_NET, self).__init__()
# 定义GAT层,使用多头注意力机制
self.gat1 = GATConv(features, hidden, heads=4)
# 因为多头注意力是将向量拼接,所以维度乘以头数。
self.gat2 = GATConv(hidden*heads, classes)
def forward(self, data):
# 从输入数据集中获取x与边集相关信息
x, edge_index = data.x, data.edge_index
# 将输入传入GAT层中,获得第一层Gat层的输出
x = self.gat1(x, edge_index)
# 经过非线性激活与dropout,减少过拟合现象,增加模型的泛化能力
x = F.relu(x)
x = F.dropout(x, training=self.training)
# 第二层GAT层,得到整个网络的输出送给分类器进行分类
x = self.gat2(x, edge_index)
return F.log_softmax(x, dim=1)
print("===== begin Download Dadasat=====\n")
dataset = Planetoid(root='/root/data', name='PubMed')
print("===== Download Dadasat finished=====\n")
device = 'cpu'
print(device)
model = GAT_NET(dataset.num_node_features, 16, dataset.num_classes).to(device) # 定义GraphSAGE
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(200):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
# 模型验证过程,对训练得到的模型效果进行评估,并打印准确率。
model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GAT Accuracy: {:.4f}'.format(acc))
- 随机文章
- 热门文章
- 热评文章
- 深入解析电脑性能测试工具:从基础知识到实际应用电脑性能测试工具箱
- 探索自我:通过心理测试了解你的内心世界心理测试小问题怎么解决
- 智力测试:科学方法与应用智力测试最权威的方法
- 深入解析:生辰八字姓名测试打分免费服务
- 测你情绪易怒性有多少
- 四双眼睛选一 测测你善良还是伪善
- Java 网络编程性能优化:高吞吐量的实现方法
- 鸿蒙系统升级了,开发者该如何“见招拆招”?——适配挑战与应对策略【华为根技术】
- 自动化测试赋能鸿蒙开发:效率与质量齐飞的未来【华为根技术】