PyTorch教程-6.5. 自定义图层

深度学习成功背后的一个因素是广泛的层的可用性，这些层可以以创造性的方式组合以设计适合各种任务的架构。例如，研究人员发明了专门用于处理图像、文本、循环顺序数据和执行动态规划的层。迟早，您会遇到或发明深度学习框架中尚不存在的层。在这些情况下，您必须构建自定义层。在本节中，我们将向您展示如何操作。
import torchfrom torch import nnfrom torch.nn import functional as ffrom d2l import torch as d2l
from mxnet import np, npxfrom mxnet.gluon import nnfrom d2l import mxnet as d2lnpx.set_np()
import jaxfrom flax import linen as nnfrom jax import numpy as jnpfrom d2l import jax as d2l
no gpu/tpu found, falling back to cpu. (set tf_cpp_min_log_level=0 and rerun for more info.)
import tensorflow as tffrom d2l import tensorflow as d2l
6.5.1. 没有参数的图层
首先，我们构建一个自定义层，它自己没有任何参数。如果您还记得我们在第 6.1 节中对模块的介绍，这应该看起来很熟悉。以下 centeredlayer类只是从其输入中减去平均值。要构建它，我们只需要继承基础层类并实现前向传播功能。
class centeredlayer(nn.module): def __init__(self): super().__init__() def forward(self, x): return x - x.mean()
class centeredlayer(nn.block): def __init__(self, **kwargs): super().__init__(**kwargs) def forward(self, x): return x - x.mean()
class centeredlayer(nn.module): def __call__(self, x): return x - x.mean()
class centeredlayer(tf.keras.model): def __init__(self): super().__init__() def call(self, x): return x - tf.reduce_mean(x)
让我们通过提供一些数据来验证我们的层是否按预期工作。
layer = centeredlayer()layer(torch.tensor([1.0, 2, 3, 4, 5]))
tensor([-2., -1., 0., 1., 2.])
layer = centeredlayer()layer(np.array([1.0, 2, 3, 4, 5]))
array([-2., -1., 0., 1., 2.])
layer = centeredlayer()layer(jnp.array([1.0, 2, 3, 4, 5]))
array([-2., -1., 0., 1., 2.], dtype=float32)
layer = centeredlayer()layer(tf.constant([1.0, 2, 3, 4, 5]))
我们现在可以将我们的层合并为构建更复杂模型的组件。
net = nn.sequential(nn.lazylinear(128), centeredlayer())
net = nn.sequential()net.add(nn.dense(128), centeredlayer())net.initialize()
net = nn.sequential([nn.dense(128), centeredlayer()])
net = tf.keras.sequential([tf.keras.layers.dense(128), centeredlayer()])
作为额外的健全性检查，我们可以通过网络发送随机数据并检查均值实际上是否为 0。因为我们处理的是浮点数，由于量化，我们可能仍然会看到非常小的非零数。
y = net(torch.rand(4, 8))y.mean()
tensor(0., grad_fn=)
y = net(np.random.rand(4, 8))y.mean()
array(3.783498e-10)
here we utilize the init_with_output method which returns both the output of the network as well as the parameters. in this case we only focus on the output.
y, _ = net.init_with_output(d2l.get_key(), jax.random.uniform(d2l.get_key(), (4, 8)))y.mean()
array(5.5879354e-09, dtype=float32)
y = net(tf.random.uniform((4, 8)))tf.reduce_mean(y)
6.5.2. 带参数的图层
现在我们知道如何定义简单的层，让我们继续定义具有可通过训练调整的参数的层。我们可以使用内置函数来创建参数，这些参数提供了一些基本的内务处理功能。特别是，它们管理访问、初始化、共享、保存和加载模型参数。这样，除了其他好处之外，我们将不需要为每个自定义层编写自定义序列化例程。
现在让我们实现我们自己的全连接层版本。回想一下，该层需要两个参数，一个代表权重，另一个代表偏差。在此实现中，我们将 relu 激活作为默认值进行烘焙。该层需要两个输入参数： in_units和units，分别表示输入和输出的数量。
class mylinear(nn.module): def __init__(self, in_units, units): super().__init__() self.weight = nn.parameter(torch.randn(in_units, units)) self.bias = nn.parameter(torch.randn(units,)) def forward(self, x): linear = torch.matmul(x, self.weight.data) + self.bias.data return f.relu(linear)
接下来，我们实例化该类mylinear并访问其模型参数。
linear = mylinear(5, 3)linear.weight
parameter containing:tensor([[-1.2894e+00, 6.5869e-01, -1.3933e+00], [ 7.2590e-01, 7.1593e-01, 1.8115e-03], [-1.5900e+00, 4.1654e-01, -1.3358e+00], [ 2.2732e-02, -2.1329e+00, 1.8811e+00], [-1.0993e+00, 2.9763e-01, -1.4413e+00]], requires_grad=true)
class mydense(nn.block): def __init__(self, units, in_units, **kwargs): super().__init__(**kwargs) self.weight = self.params.get('weight', shape=(in_units, units)) self.bias = self.params.get('bias', shape=(units,)) def forward(self, x): linear = np.dot(x, self.weight.data(ctx=x.ctx)) + self.bias.data( ctx=x.ctx) return npx.relu(linear)
next, we instantiate the mydense class and access its model parameters.
dense = mydense(units=3, in_units=5)dense.params
mydense0_ ( parameter mydense0_weight (shape=(5, 3), dtype=) parameter mydense0_bias (shape=(3,), dtype=))
class mydense(nn.module): in_units: int units: int def setup(self): self.weight = self.param('weight', nn.initializers.normal(stddev=1), (self.in_units, self.units)) self.bias = self.param('bias', nn.initializers.zeros, self.units) def __call__(self, x): linear = jnp.matmul(x, self.weight) + self.bias return nn.relu(linear)
next, we instantiate the mydense class and access its model parameters.
dense = mydense(5, 3)params = dense.init(d2l.get_key(), jnp.zeros((3, 5)))params
frozendict({ params: { weight: array([[-0.02040312, 1.0439496 , -2.3386796 ], [ 1.1002127 , -1.780812 , -0.32284564], [-0.6944499 , -1.8438653 , -0.5338283 ], [ 1.3954164 , 1.5816483 , 0.0469989 ], [-0.12351853, 1.2818031 , 0.7964193 ]], dtype=float32), bias: array([0., 0., 0.], dtype=float32), },})
class mydense(tf.keras.model): def __init__(self, units): super().__init__() self.units = units def build(self, x_shape): self.weight = self.add_weight(name='weight', shape=[x_shape[-1], self.units], initializer=tf.random_normal_initializer()) self.bias = self.add_weight( name='bias', shape=[self.units], initializer=tf.zeros_initializer()) def call(self, x): linear = tf.matmul(x, self.weight) + self.bias return tf.nn.relu(linear)
next, we instantiate the mydense class and access its model parameters.
dense = mydense(3)dense(tf.random.uniform((2, 5)))dense.get_weights()
[array([[-0.08860754, -0.04000078, -0.03810905], [-0.0543257 , -0.01143957, -0.06748273], [-0.05273567, -0.01696461, -0.00552523], [ 0.00193098, 0.0662979 , -0.05486313], [-0.08595717, 0.08563109, 0.04592342]], dtype=float32), array([0., 0., 0.], dtype=float32)]
我们可以直接使用自定义层进行前向传播计算。
linear(torch.rand(2, 5))
tensor([[0.0000, 1.7772, 0.0000], [0.0000, 1.0303, 0.0000]])
dense.initialize()dense(np.random.uniform(size=(2, 5)))
array([[0. , 0.01633355, 0. ], [0. , 0.01581812, 0. ]])
dense.apply(params, jax.random.uniform(d2l.get_key(), (2, 5)))
array([[0.05256309, 0. , 0. ], [0.3500959 , 0. , 0.30999148]], dtype=float32)
dense(tf.random.uniform((2, 5)))
我们还可以使用自定义层构建模型。一旦我们有了它，我们就可以像使用内置的全连接层一样使用它。
net = nn.sequential(mylinear(64, 8), mylinear(8, 1))net(torch.rand(2, 64))
tensor([[0.], [0.]])
net = nn.sequential()net.add(mydense(8, in_units=64), mydense(1, in_units=8))net.initialize()net(np.random.uniform(size=(2, 64)))
array([[0.06508517], [0.0615553 ]])
net = nn.sequential([mydense(64, 8), mydense(8, 1)])y, _ = net.init_with_output(d2l.get_key(), jax.random.uniform(d2l.get_key(), (2, 64)))y
array([[8.348445 ], [2.0591817]], dtype=float32)
net = tf.keras.models.sequential([mydense(8), mydense(1)])net(tf.random.uniform((2, 64)))
6.5.3. 概括
我们可以通过基本层类来设计自定义层。这使我们能够定义灵活的新层，这些层的行为不同于库中的任何现有层。一旦定义好，自定义层就可以在任意上下文和架构中被调用。层可以有局部参数，可以通过内置函数创建。
6.5.4. 练习
设计一个接受输入并计算张量缩减的层，即它返回yk=∑i,jwijkxixj.
设计一个返回数据傅里叶系数前半部分的层。

5G时代最新MIMO技术介绍与测试方案（一）
DSOX4022A示波器了解一下
智能监控提高配电网可靠性
超强三防！ToughPad FZ-A2松下平板电脑
DRAM价格走势将持平，DDR3短期仍为主流
PyTorch教程-6.5. 自定义图层
新唐科技Smart Amp简介英文版
业界判断，中兴事件将随着中美斡旋而出现转机
Libra缺乏加密密钥安全的哪一部分
从几个方面来看物联网技术在日常生活中的应用
做联盟链可以一帆风顺吗
手机充电器语音芯片—WTN6的特点是怎样的
GM8775C型DSI转双通道LVDS发送器
Nvidia/ARM交易，能怎样为下一个计算机时代创造主导的生态系统
回流焊的特性_影响回流焊产品性能的因素
全球机器人行业面临新的转折点
2021年三星半导体部门设备投资额有望再创历史新高
WAN服务类型
物联网的数据洪流强化如何来实现
关于联想电脑防水的秘密介绍