563 lines
24 KiB
C++
563 lines
24 KiB
C++
/*
|
|
* Copyright (C) 2018 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "annotator/pod_ner/pod-ner-impl.h"
|
|
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <thread> // NOLINT(build/c++11)
|
|
|
|
#include "annotator/model_generated.h"
|
|
#include "annotator/types.h"
|
|
#include "utils/jvm-test-utils.h"
|
|
#include "utils/test-data-test-utils.h"
|
|
#include "utils/tokenizer-utils.h"
|
|
#include "utils/utf8/unicodetext.h"
|
|
#include "utils/utf8/unilib.h"
|
|
#include "gmock/gmock.h"
|
|
#include "gtest/gtest.h"
|
|
|
|
namespace libtextclassifier3 {
|
|
namespace {
|
|
|
|
using ::testing::IsEmpty;
|
|
using ::testing::Not;
|
|
|
|
using PodNerModel_::Label_::BoiseType;
|
|
using PodNerModel_::Label_::BoiseType_BEGIN;
|
|
using PodNerModel_::Label_::BoiseType_END;
|
|
using PodNerModel_::Label_::BoiseType_INTERMEDIATE;
|
|
using PodNerModel_::Label_::BoiseType_O;
|
|
using PodNerModel_::Label_::BoiseType_SINGLE;
|
|
using PodNerModel_::Label_::MentionType;
|
|
using PodNerModel_::Label_::MentionType_NAM;
|
|
using PodNerModel_::Label_::MentionType_NOM;
|
|
using PodNerModel_::Label_::MentionType_UNDEFINED;
|
|
|
|
constexpr int kMinNumberOfTokens = 1;
|
|
constexpr int kMinNumberOfWordpieces = 1;
|
|
constexpr float kDefaultPriorityScore = 0.5;
|
|
|
|
class PodNerTest : public testing::Test {
|
|
protected:
|
|
PodNerTest() {
|
|
PodNerModelT model;
|
|
|
|
model.min_number_of_tokens = kMinNumberOfTokens;
|
|
model.min_number_of_wordpieces = kMinNumberOfWordpieces;
|
|
model.priority_score = kDefaultPriorityScore;
|
|
|
|
const std::string tflite_model_buffer =
|
|
GetTestFileContent("annotator/pod_ner/test_data/tflite_model.tflite");
|
|
model.tflite_model = std::vector<uint8_t>(tflite_model_buffer.begin(),
|
|
tflite_model_buffer.end());
|
|
const std::string word_piece_vocab_buffer =
|
|
GetTestFileContent("annotator/pod_ner/test_data/vocab.txt");
|
|
model.word_piece_vocab = std::vector<uint8_t>(
|
|
word_piece_vocab_buffer.begin(), word_piece_vocab_buffer.end());
|
|
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
builder.Finish(PodNerModel::Pack(builder, &model));
|
|
|
|
model_buffer_ =
|
|
std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
|
|
builder.GetSize());
|
|
model_ = static_cast<const PodNerModel*>(
|
|
flatbuffers::GetRoot<PodNerModel>(model_buffer_.data()));
|
|
|
|
model.append_final_period = true;
|
|
flatbuffers::FlatBufferBuilder builder_append_final_period;
|
|
builder_append_final_period.Finish(
|
|
PodNerModel::Pack(builder_append_final_period, &model));
|
|
|
|
model_buffer_append_final_period_ =
|
|
std::string(reinterpret_cast<const char*>(
|
|
builder_append_final_period.GetBufferPointer()),
|
|
builder_append_final_period.GetSize());
|
|
model_append_final_period_ =
|
|
static_cast<const PodNerModel*>(flatbuffers::GetRoot<PodNerModel>(
|
|
model_buffer_append_final_period_.data()));
|
|
|
|
unilib_ = CreateUniLibForTesting();
|
|
}
|
|
|
|
std::string model_buffer_;
|
|
const PodNerModel* model_;
|
|
std::string model_buffer_append_final_period_;
|
|
const PodNerModel* model_append_final_period_;
|
|
std::unique_ptr<UniLib> unilib_;
|
|
};
|
|
|
|
TEST_F(PodNerTest, AnnotateSmokeTest) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(
|
|
UTF8ToUnicodeText("Google New York , in New York"), &annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
}
|
|
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(
|
|
UTF8ToUnicodeText("Jamie I'm in the first picture and Cameron and Zach "
|
|
"are in the second "
|
|
"picture."),
|
|
&annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
}
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateEmptyInput) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(""), &annotations));
|
|
EXPECT_THAT(annotations, IsEmpty());
|
|
}
|
|
}
|
|
|
|
void FillCollections(
|
|
const std::vector<std::string>& collection_names,
|
|
const std::vector<float>& single_token_priority_scores,
|
|
const std::vector<float>& multi_token_priority_scores,
|
|
std::vector<std::unique_ptr<PodNerModel_::CollectionT>>* collections) {
|
|
ASSERT_TRUE(collection_names.size() == single_token_priority_scores.size() &&
|
|
collection_names.size() == multi_token_priority_scores.size());
|
|
collections->clear();
|
|
for (int i = 0; i < collection_names.size(); ++i) {
|
|
collections->push_back(std::make_unique<PodNerModel_::CollectionT>());
|
|
collections->back()->name = collection_names[i];
|
|
collections->back()->single_token_priority_score =
|
|
single_token_priority_scores[i];
|
|
collections->back()->multi_token_priority_score =
|
|
multi_token_priority_scores[i];
|
|
}
|
|
}
|
|
|
|
void EmplaceToLabelVector(
|
|
BoiseType boise_type, MentionType mention_type, int collection_id,
|
|
std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
|
|
labels->push_back(std::make_unique<PodNerModel_::LabelT>());
|
|
labels->back()->boise_type = boise_type;
|
|
labels->back()->mention_type = mention_type;
|
|
labels->back()->collection_id = collection_id;
|
|
}
|
|
|
|
void FillLabels(int num_collections,
|
|
std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
|
|
labels->clear();
|
|
for (auto boise_type :
|
|
{BoiseType_BEGIN, BoiseType_END, BoiseType_INTERMEDIATE}) {
|
|
for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
|
|
for (int i = 0; i < num_collections - 1; ++i) { // skip undefined
|
|
EmplaceToLabelVector(boise_type, mention_type, i, labels);
|
|
}
|
|
}
|
|
}
|
|
EmplaceToLabelVector(BoiseType_O, MentionType_UNDEFINED, num_collections - 1,
|
|
labels);
|
|
for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
|
|
for (int i = 0; i < num_collections - 1; ++i) { // skip undefined
|
|
EmplaceToLabelVector(BoiseType_SINGLE, mention_type, i, labels);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateDefaultCollections) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
std::string multi_word_location = "I live in New York";
|
|
std::string single_word_location = "I live in Zurich";
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
|
|
&annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
EXPECT_EQ(annotations[0].classification[0].collection, "location");
|
|
EXPECT_EQ(annotations[0].classification[0].priority_score,
|
|
kDefaultPriorityScore);
|
|
|
|
annotations.clear();
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
|
|
&annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
EXPECT_EQ(annotations[0].classification[0].collection, "location");
|
|
EXPECT_EQ(annotations[0].classification[0].priority_score,
|
|
kDefaultPriorityScore);
|
|
}
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateConfigurableCollections) {
|
|
std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
|
|
ASSERT_TRUE(unpacked_model != nullptr);
|
|
|
|
float xxx_single_token_priority = 0.9;
|
|
float xxx_multi_token_priority = 1.7;
|
|
const std::vector<std::string> collection_names = {
|
|
"art", "consumer_good", "event", "xxx",
|
|
"organization", "ner_entity", "person", "undefined"};
|
|
FillCollections(collection_names,
|
|
/*single_token_priority_scores=*/
|
|
{0., 0., 0., xxx_single_token_priority, 0., 0., 0., 0.},
|
|
/*multi_token_priority_scores=*/
|
|
{0., 0., 0., xxx_multi_token_priority, 0., 0., 0., 0.},
|
|
&(unpacked_model->collections));
|
|
FillLabels(collection_names.size(), &(unpacked_model->labels));
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
|
|
std::string model_buffer =
|
|
std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
|
|
builder.GetSize());
|
|
std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
|
|
static_cast<const PodNerModel*>(
|
|
flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
|
|
*unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
std::string multi_word_location = "I live in New York";
|
|
std::string single_word_location = "I live in Zurich";
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
|
|
&annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
|
|
EXPECT_EQ(annotations[0].classification[0].priority_score,
|
|
xxx_multi_token_priority);
|
|
|
|
annotations.clear();
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
|
|
&annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
|
|
EXPECT_EQ(annotations[0].classification[0].priority_score,
|
|
xxx_single_token_priority);
|
|
}
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateMinNumTokens) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
std::string text = "in New York";
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
}
|
|
|
|
std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
|
|
ASSERT_TRUE(unpacked_model != nullptr);
|
|
|
|
unpacked_model->min_number_of_tokens = 4;
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
|
|
|
|
std::string model_buffer =
|
|
std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
|
|
builder.GetSize());
|
|
annotator = PodNerAnnotator::Create(
|
|
static_cast<const PodNerModel*>(
|
|
flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
|
|
*unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
|
|
EXPECT_THAT(annotations, IsEmpty());
|
|
}
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateMinNumWordpieces) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
std::string text = "in New York";
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
}
|
|
|
|
std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
|
|
ASSERT_TRUE(unpacked_model != nullptr);
|
|
|
|
unpacked_model->min_number_of_wordpieces = 10;
|
|
flatbuffers::FlatBufferBuilder builder;
|
|
builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
|
|
|
|
std::string model_buffer =
|
|
std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
|
|
builder.GetSize());
|
|
annotator = PodNerAnnotator::Create(
|
|
static_cast<const PodNerModel*>(
|
|
flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
|
|
*unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
{
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
|
|
EXPECT_THAT(annotations, IsEmpty());
|
|
}
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateNonstandardText) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
const std::string nonstandard_text =
|
|
"abcNxCDU1RWNvbXByLXI4NS8xNzcwLzE3NzA4NDY2L3J1Ymluby1raWRzLXJlY2xpbmVyLXd"
|
|
"pdGgtY3VwLWhvbGRlci5qcGc=/"
|
|
"UnViaW5vIEtpZHMgUmVjbGluZXIgd2l0aCBDdXAgSG9sZGVyIGJ5IEhhcnJpZXQgQmVl."
|
|
"html>";
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(
|
|
annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
|
|
EXPECT_THAT(annotations, IsEmpty());
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateTextWithLinefeed) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
std::string nonstandard_text = "My name is Kuba\x09";
|
|
nonstandard_text += "and this is a test.";
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(
|
|
annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
|
|
|
|
nonstandard_text = "My name is Kuba\x09 and this is a test.";
|
|
ASSERT_TRUE(
|
|
annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
|
|
}
|
|
|
|
TEST_F(PodNerTest, AnnotateWithUnknownWordpieces) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
const std::string long_text =
|
|
"It is easy to spend a fun and exciting day in Seattle without a car. "
|
|
"There are lots of ways to modify this itinerary. Add a ferry ride "
|
|
"from the waterfront. Spending the day at the Seattle Center or at the "
|
|
"aquarium could easily extend this from one to several days. Take the "
|
|
"Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
|
|
"Museum which is fun and free. In the summer months you can ride the "
|
|
"passenger-only Water Taxi from the waterfront to West Seattle and "
|
|
"Alki Beach. Here's a sample one day itinerary: Start at the Space "
|
|
"Needle by taking the Seattle Monorail from downtown. Look around the "
|
|
"Seattle Center or go to the Space Needle.";
|
|
const std::string text_with_unknown_wordpieces = "před chvílí";
|
|
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(
|
|
annotator->Annotate(UTF8ToUnicodeText("Google New York , in New York. " +
|
|
text_with_unknown_wordpieces),
|
|
&annotations));
|
|
EXPECT_THAT(annotations, IsEmpty());
|
|
ASSERT_TRUE(annotator->Annotate(
|
|
UTF8ToUnicodeText(long_text + " " + text_with_unknown_wordpieces),
|
|
&annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
}
|
|
|
|
class PodNerTestWithOrWithoutFinalPeriod
|
|
: public PodNerTest,
|
|
public testing::WithParamInterface<bool> {};
|
|
|
|
INSTANTIATE_TEST_SUITE_P(TestAnnotateLongText,
|
|
PodNerTestWithOrWithoutFinalPeriod,
|
|
testing::Values(true, false));
|
|
|
|
TEST_P(PodNerTestWithOrWithoutFinalPeriod, AnnotateLongText) {
|
|
std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
|
|
GetParam() ? model_append_final_period_ : model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
const std::string long_text =
|
|
"It is easy to spend a fun and exciting day in Seattle without a car. "
|
|
"There are lots of ways to modify this itinerary. Add a ferry ride "
|
|
"from the waterfront. Spending the day at the Seattle Center or at the "
|
|
"aquarium could easily extend this from one to several days. Take the "
|
|
"Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
|
|
"Museum which is fun and free. In the summer months you can ride the "
|
|
"passenger-only Water Taxi from the waterfront to West Seattle and "
|
|
"Alki Beach. Here's a sample one day itinerary: Start at the Space "
|
|
"Needle by taking the Seattle Monorail from downtown. Look around the "
|
|
"Seattle Center or go to the Space Needle. If you're interested in "
|
|
"music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
|
|
"is located at the foot of the Space Needle. It has a lot of rock'n "
|
|
"roll memorabilia that you may find interesting. The Chihuly Garden "
|
|
"and Glass musuem is near the Space Needle and you can get a "
|
|
"combination ticket for both. It gets really good reviews. If you're "
|
|
"interested, then the Bill & Melinda Gates Foundation is across from "
|
|
"the EMP and has a visitors center that is free. Come see how Bill "
|
|
"Gates is giving away his millions. Take the Monorail back downtown. "
|
|
"You will be at 5th and Pine (Westlake Center). Head west to the Pike "
|
|
"Place Market. Look around then head for the Pike Place hill climb "
|
|
"which is a series of steps that walk down to the waterfront. You will "
|
|
"end up across the street from the Seattle Aquarium. Plenty of things "
|
|
"to do on the waterfront, boat cruises, seafood restaurants, the "
|
|
"Aquarium, or your typical tourist activities. You can walk or take "
|
|
"the waterfront trolley bus. Note that waterfront construction has "
|
|
"relocated the trolley Metro bus route 99 that will take you from "
|
|
"Pioneer Square all the way to the end of the waterfront where you can "
|
|
"visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
|
|
"Pier 70. The route goes thru Chinatown/International District, "
|
|
"through Pioneer Square, up 1st ave past the Pike Place Market and to "
|
|
"1st and Cedar which is walking distance to the Space Needle. It then "
|
|
"goes down Broad Street toward the Olympic Sculpture Garden. It runs "
|
|
"approximately every 30 minutes during the day and early evening.";
|
|
std::vector<AnnotatedSpan> annotations;
|
|
ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(long_text), &annotations));
|
|
EXPECT_THAT(annotations, Not(IsEmpty()));
|
|
|
|
const std::string location_from_beginning = "Seattle";
|
|
int start_span_location_from_beginning =
|
|
long_text.find(location_from_beginning);
|
|
EXPECT_EQ(annotations[0].span,
|
|
CodepointSpan(start_span_location_from_beginning,
|
|
start_span_location_from_beginning +
|
|
location_from_beginning.length()));
|
|
|
|
const std::string location_from_end = "Olympic Sculpture Garden";
|
|
int start_span_location_from_end = long_text.find(location_from_end);
|
|
const AnnotatedSpan& last_annotation = *annotations.rbegin();
|
|
EXPECT_EQ(
|
|
last_annotation.span,
|
|
CodepointSpan(start_span_location_from_end,
|
|
start_span_location_from_end + location_from_end.length()));
|
|
}
|
|
|
|
TEST_F(PodNerTest, SuggestSelectionLongText) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
const std::string long_text =
|
|
"It is easy to spend a fun and exciting day in Seattle without a car. "
|
|
"There are lots of ways to modify this itinerary. Add a ferry ride "
|
|
"from the waterfront. Spending the day at the Seattle Center or at the "
|
|
"aquarium could easily extend this from one to several days. Take the "
|
|
"Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
|
|
"Museum which is fun and free. In the summer months you can ride the "
|
|
"passenger-only Water Taxi from the waterfront to West Seattle and "
|
|
"Alki Beach. Here's a sample one day itinerary: Start at the Space "
|
|
"Needle by taking the Seattle Monorail from downtown. Look around the "
|
|
"Seattle Center or go to the Space Needle. If you're interested in "
|
|
"music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
|
|
"is located at the foot of the Space Needle. It has a lot of rock'n "
|
|
"roll memorabilia that you may find interesting. The Chihuly Garden "
|
|
"and Glass musuem is near the Space Needle and you can get a "
|
|
"combination ticket for both. It gets really good reviews. If you're "
|
|
"interested, then the Bill & Melinda Gates Foundation is across from "
|
|
"the EMP and has a visitors center that is free. Come see how Bill "
|
|
"Gates is giving away his millions. Take the Monorail back downtown. "
|
|
"You will be at 5th and Pine (Westlake Center). Head west to the Pike "
|
|
"Place Market. Look around then head for the Pike Place hill climb "
|
|
"which is a series of steps that walk down to the waterfront. You will "
|
|
"end up across the street from the Seattle Aquarium. Plenty of things "
|
|
"to do on the waterfront, boat cruises, seafood restaurants, the "
|
|
"Aquarium, or your typical tourist activities. You can walk or take "
|
|
"the waterfront trolley bus. Note that waterfront construction has "
|
|
"relocated the trolley Metro bus route 99 that will take you from "
|
|
"Pioneer Square all the way to the end of the waterfront where you can "
|
|
"visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
|
|
"Pier 70. The route goes thru Chinatown/International District, "
|
|
"through Pioneer Square, up 1st ave past the Pike Place Market and to "
|
|
"1st and Cedar which is walking distance to the Space Needle. It then "
|
|
"goes down Broad Street toward the Olympic Sculpture Garden. It runs "
|
|
"approximately every 30 minutes during the day and early evening.";
|
|
const std::string klondike = "Klondike Gold Rush Museum";
|
|
int klondike_start = long_text.find(klondike);
|
|
|
|
AnnotatedSpan suggested_span;
|
|
EXPECT_TRUE(annotator->SuggestSelection(UTF8ToUnicodeText(long_text),
|
|
{klondike_start, klondike_start + 8},
|
|
&suggested_span));
|
|
EXPECT_EQ(suggested_span.span,
|
|
CodepointSpan(klondike_start, klondike_start + klondike.length()));
|
|
}
|
|
|
|
TEST_F(PodNerTest, SuggestSelectionTest) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
AnnotatedSpan suggested_span;
|
|
EXPECT_TRUE(annotator->SuggestSelection(
|
|
UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
|
|
&suggested_span));
|
|
EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
|
|
EXPECT_FALSE(annotator->SuggestSelection(
|
|
UTF8ToUnicodeText("Google New York, in New York"), {17, 19},
|
|
&suggested_span));
|
|
EXPECT_EQ(suggested_span.span, CodepointSpan(kInvalidIndex, kInvalidIndex));
|
|
}
|
|
|
|
TEST_F(PodNerTest, ClassifyTextTest) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
ClassificationResult result;
|
|
ASSERT_TRUE(annotator->ClassifyText(UTF8ToUnicodeText("We met in New York"),
|
|
{10, 18}, &result));
|
|
EXPECT_EQ(result.collection, "location");
|
|
}
|
|
|
|
TEST_F(PodNerTest, ThreadSafety) {
|
|
std::unique_ptr<PodNerAnnotator> annotator =
|
|
PodNerAnnotator::Create(model_, *unilib_);
|
|
ASSERT_TRUE(annotator != nullptr);
|
|
|
|
// Do inference in 20 threads. When run with --config=tsan, this should fire
|
|
// if there's a problem.
|
|
std::vector<std::thread> thread_pool(20);
|
|
for (std::thread& thread : thread_pool) {
|
|
thread = std::thread([&annotator]() {
|
|
AnnotatedSpan suggested_span;
|
|
EXPECT_TRUE(annotator->SuggestSelection(
|
|
UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
|
|
&suggested_span));
|
|
EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
|
|
});
|
|
}
|
|
for (std::thread& thread : thread_pool) {
|
|
thread.join();
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace libtextclassifier3
|