// // Copyright (C) 2018 The Android Open Source Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Flatbuffer schema for Neurosis (FFNN with embeddings) parameters. // // Contains the same information as an EmbeddingNetworkProto. namespace libtextclassifier3.saft_fbs; // NS stands for NeurosiS. The next two digits are meant to identify // incompatible versions. Ideally, we'll never have to go beyond 00. file_identifier "NS00"; // Should be kept in sync with the C++ enum nlp_saft::QuantizationType. enum QuantizationType : byte { NONE = 0, UINT8 = 1, UINT4 = 2, FLOAT16 = 3, } table Matrix { // Number of rows of this matrix. rows:int; // Number of columns of this matrix. cols:int; // Type of quantization used for the values from this matrix. // // If this is QuantizationType_NONE, then the unquantized values should be // stored in |values| below. Otherwise, the bytes of the quantized values // should be stored in |quantized_values| and the float16 quantization scales // should be stored in |scales|. quantization_type:QuantizationType = NONE; // Non-quantized matrix elements, in row-major order. See comments for // |quantization_type|. values:[float]; // Quantized matrix elements, in row-major order. See comments for // |quantization_type|. quantized_values:[ubyte]; // Quantization factors (float16), one per matrix row. There is no float16 // primitive type for flatbuffers, we just use another 16 bit type. See // comments for |quantization_type|. scales:[ushort]; } // The input layer for a Neurosis network is composed of several parts (named // "chunks" below, "embedding spaces" in some other parts, etc). For each // chunk, we have |num_features| features that extract feature values in that // chunk. All values extracted by a feature get projected via the embedding // matrix |embedding| and summed together, producing a vector of // |embedding.cols| elements. The resulting vector gets concatenated with the // similar vectors for other |num_features| features, producing a "chunk" of // |num_features * embedding.cols| elements. This chunk gets concatenated with // the other chunks. // // Note: the specification that indicates what those |num_features| features are // is stored elsewhere (usually in a ModelParameter, see model.fbs). But we // need to know |num_features| here, in order to specify the geometry of the // Neurosis network. table InputChunk { embedding:Matrix; num_features:int; } // One layer of neurons from the Neurosis network. This table can represent a // hidden layer or the final (output / softmax) layer. // // Our formalism is a bit different, but equivalent to the usual description // from the literature: // // Technically, in Neurosis, each layer takes an input (a vector of floats); if // this is not the first layer, we apply a nonlinear function (ReLU); for the // first layer, we skip ReLU. Next, we multiply by |weights| and add |bias|, // get the input for the next level and so on. The output from the last layer // is generally used for softmax classification. That's why we say that the // last layer is the "softmax layer". table NeuralLayer { // Weight matrix for this layer. Geometry: num_inputs x num_neurons, where // num_inputs is the number of values produced by previous layer (which can be // the input layer, or another hidden layer) and num_neurons is the number of // neurons from this layer. weights:Matrix; // Bias vector for this layer. // // NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1 // geometries: the layout of the elements is the same in both cases. bias:Matrix; } table EmbeddingNetwork { // Specification of the chunks that compose the input layer. input_chunks:[InputChunk]; // Hidden layers, followed by the final (softmax) layer. layers:[NeuralLayer]; } root_type EmbeddingNetwork;