118 lines
4.3 KiB
Plaintext
118 lines
4.3 KiB
Plaintext
//
|
|
// Copyright (C) 2018 The Android Open Source Project
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
// Flatbuffer schema for Neurosis (FFNN with embeddings) parameters.
|
|
//
|
|
// Contains the same information as an EmbeddingNetworkProto.
|
|
|
|
namespace libtextclassifier3.saft_fbs;
|
|
|
|
// NS stands for NeurosiS. The next two digits are meant to identify
|
|
// incompatible versions. Ideally, we'll never have to go beyond 00.
|
|
file_identifier "NS00";
|
|
|
|
// Should be kept in sync with the C++ enum nlp_saft::QuantizationType.
|
|
enum QuantizationType : byte {
|
|
NONE = 0,
|
|
UINT8 = 1,
|
|
UINT4 = 2,
|
|
FLOAT16 = 3,
|
|
}
|
|
|
|
table Matrix {
|
|
// Number of rows of this matrix.
|
|
rows:int;
|
|
|
|
// Number of columns of this matrix.
|
|
cols:int;
|
|
|
|
// Type of quantization used for the values from this matrix.
|
|
//
|
|
// If this is QuantizationType_NONE, then the unquantized values should be
|
|
// stored in |values| below. Otherwise, the bytes of the quantized values
|
|
// should be stored in |quantized_values| and the float16 quantization scales
|
|
// should be stored in |scales|.
|
|
quantization_type:QuantizationType = NONE;
|
|
|
|
// Non-quantized matrix elements, in row-major order. See comments for
|
|
// |quantization_type|.
|
|
values:[float];
|
|
|
|
// Quantized matrix elements, in row-major order. See comments for
|
|
// |quantization_type|.
|
|
quantized_values:[ubyte];
|
|
|
|
// Quantization factors (float16), one per matrix row. There is no float16
|
|
// primitive type for flatbuffers, we just use another 16 bit type. See
|
|
// comments for |quantization_type|.
|
|
scales:[ushort];
|
|
}
|
|
|
|
// The input layer for a Neurosis network is composed of several parts (named
|
|
// "chunks" below, "embedding spaces" in some other parts, etc). For each
|
|
// chunk, we have |num_features| features that extract feature values in that
|
|
// chunk. All values extracted by a feature get projected via the embedding
|
|
// matrix |embedding| and summed together, producing a vector of
|
|
// |embedding.cols| elements. The resulting vector gets concatenated with the
|
|
// similar vectors for other |num_features| features, producing a "chunk" of
|
|
// |num_features * embedding.cols| elements. This chunk gets concatenated with
|
|
// the other chunks.
|
|
//
|
|
// Note: the specification that indicates what those |num_features| features are
|
|
// is stored elsewhere (usually in a ModelParameter, see model.fbs). But we
|
|
// need to know |num_features| here, in order to specify the geometry of the
|
|
// Neurosis network.
|
|
table InputChunk {
|
|
embedding:Matrix;
|
|
num_features:int;
|
|
}
|
|
|
|
// One layer of neurons from the Neurosis network. This table can represent a
|
|
// hidden layer or the final (output / softmax) layer.
|
|
//
|
|
// Our formalism is a bit different, but equivalent to the usual description
|
|
// from the literature:
|
|
//
|
|
// Technically, in Neurosis, each layer takes an input (a vector of floats); if
|
|
// this is not the first layer, we apply a nonlinear function (ReLU); for the
|
|
// first layer, we skip ReLU. Next, we multiply by |weights| and add |bias|,
|
|
// get the input for the next level and so on. The output from the last layer
|
|
// is generally used for softmax classification. That's why we say that the
|
|
// last layer is the "softmax layer".
|
|
table NeuralLayer {
|
|
// Weight matrix for this layer. Geometry: num_inputs x num_neurons, where
|
|
// num_inputs is the number of values produced by previous layer (which can be
|
|
// the input layer, or another hidden layer) and num_neurons is the number of
|
|
// neurons from this layer.
|
|
weights:Matrix;
|
|
|
|
// Bias vector for this layer.
|
|
//
|
|
// NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1
|
|
// geometries: the layout of the elements is the same in both cases.
|
|
bias:Matrix;
|
|
}
|
|
|
|
table EmbeddingNetwork {
|
|
// Specification of the chunks that compose the input layer.
|
|
input_chunks:[InputChunk];
|
|
|
|
// Hidden layers, followed by the final (softmax) layer.
|
|
layers:[NeuralLayer];
|
|
}
|
|
|
|
root_type EmbeddingNetwork;
|