tetengo 1.8.1
A multipurpose library set
Loading...
Searching...
No Matches
tetengo::lattice Namespace Reference

A lattice library. More...

Classes

class  cap
 A cap. More...
 
class  connection
 A connection. More...
 
class  constraint
 A constraint. More...
 
class  constraint_element
 A constraint element. More...
 
class  entry
 An entry. More...
 
class  entry_view
 An entry view. More...
 
class  input
 An input. More...
 
class  lattice
 A lattice. More...
 
class  n_best_iterator
 An N-best lattice path iterator. More...
 
class  node
 A node. More...
 
class  node_constraint_element
 A node constraint element. More...
 
class  path
 A path. More...
 
class  string_input
 A string input. More...
 
class  unordered_map_vocabulary
 An unordered map vocabulary. More...
 
class  vocabulary
 A vocabulary. More...
 
class  wildcard_constraint_element
 A wildcard constraint element. More...
 

Detailed Description

A lattice library.

Usage

Viterbi Search

C++

#include <algorithm>
#include <any>
#include <cassert>
#include <iterator>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <boost/stl_interfaces/iterator_interface.hpp> // IWYU pragma: keep
namespace usage_tetengo::lattice
{
std::unique_ptr<tetengo::lattice::vocabulary> build_vocabulary();
std::string to_string(const tetengo::lattice::path& path_);
void viterbi()
{
/*
Makes the following lattice and searches it.
/-----[ab:AwaBizan]-----\
/ (7) (9) (1) \
/ \
/ (2) (4) (7) \
[BOS]-----[a:Alpha]---[b:Bravo]-----[EOS]
\ (3) \ /(1) (2) /
\‍(1) X /(6)
\ / \‍(5) /
`-[a:Alice]---[b:Bob]---'
(1) (9) (8)
Path Cost
[BOS]-[Alice]-[Bravo]-[EOS] 1+1+1+7+2=12
[BOS]---[AwaBizan]----[EOS] 7+9+1 =17
[BOS]-[Alpha]-[Bravo]-[EOS] 3+2+4+7+2=18
[BOS]-[Alpha]-[Bob]---[EOS] 3+2+5+8+6=24
[BOS]-[Alice]-[Bob]---[EOS] 1+1+9+8+6=25
*/
// Builds a vocabulary.
const auto p_vocabulary = build_vocabulary();
// Creates an object for a lattice.
tetengo::lattice::lattice lattice_{ *p_vocabulary };
// Enters key characters to construct the lattice.
lattice_.push_back(std::make_unique<tetengo::lattice::string_input>("a"));
lattice_.push_back(std::make_unique<tetengo::lattice::string_input>("b"));
// Finishes the lattice construction.
const auto eos_and_preceding_costs = lattice_.settle();
// Creates an iterator to enumerate the paths in the lattice.
eos_and_preceding_costs.first,
std::make_unique<tetengo::lattice::constraint>() };
// Enumerates the paths.
std::vector<std::string> paths{};
std::for_each(
std::move(first), tetengo::lattice::n_best_iterator{}, [&paths](const tetengo::lattice::path& path_) {
paths.push_back(to_string(path_));
});
static const std::vector<std::string> expected{
// clang-format off
"[BOS]-[Alice]-[Bravo]-[EOS] (12)",
"[BOS]-[AwaBizan]-[EOS] (17)",
"[BOS]-[Alpha]-[Bravo]-[EOS] (18)",
"[BOS]-[Alpha]-[Bob]-[EOS] (24)",
"[BOS]-[Alice]-[Bob]-[EOS] (25)",
// clang-format on
};
assert(paths == expected);
}
std::string value_of(const tetengo::lattice::entry_view& entry)
{
// The value is stored in the std::any object.
return entry.value()->has_value() ? std::any_cast<std::string>(*entry.value()) : std::string{};
}
std::unique_ptr<tetengo::lattice::vocabulary> build_vocabulary()
{
// The contents of the vocabulary.
static const std::vector<tetengo::lattice::entry> entries{
// clang-format off
{ std::make_unique<tetengo::lattice::string_input>("a"), std::string{ "Alpha" }, 2 },
{ std::make_unique<tetengo::lattice::string_input>("b"), std::string{ "Bravo" }, 7 },
{ std::make_unique<tetengo::lattice::string_input>("a"), std::string{ "Alice" }, 1 },
{ std::make_unique<tetengo::lattice::string_input>("b"), std::string{ "Bob" }, 8 },
{ std::make_unique<tetengo::lattice::string_input>("ab"), std::string{ "AwaBizan" }, 9 },
// clang-format on
};
std::vector<std::pair<std::string, std::vector<tetengo::lattice::entry>>> entry_mappings{
// clang-format off
{ "a", { entries[0], entries[2] } },
{ "b", { entries[1], entries[3] } },
{ "ab", { entries[4] }},
// clang-format on
};
std::vector<std::pair<std::pair<tetengo::lattice::entry, tetengo::lattice::entry>, int>> connections{
// clang-format off
{ { tetengo::lattice::entry::bos_eos(), entries[0] }, 3 },
{ { tetengo::lattice::entry::bos_eos(), entries[2] }, 1 },
{ { entries[0], entries[1] }, 4 },
{ { entries[2], entries[1] }, 1 },
{ { entries[0], entries[3] }, 5 },
{ { entries[2], entries[3] }, 9 },
{ { entries[1], tetengo::lattice::entry::bos_eos() }, 2 },
{ { entries[3], tetengo::lattice::entry::bos_eos() }, 6 },
{ { tetengo::lattice::entry::bos_eos(), entries[4] }, 7 },
{ { entries[4], tetengo::lattice::entry::bos_eos() }, 1 },
// clang-format on
};
// Returns a vocabulary implemented with hash tables.
return std::make_unique<tetengo::lattice::unordered_map_vocabulary>(
std::move(entry_mappings),
std::move(connections),
[](const tetengo::lattice::entry_view& entry) {
return (entry.p_key() ? entry.p_key()->hash_value() : 0) ^ std::hash<std::string>{}(value_of(entry));
},
return ((!entry1.p_key() && !entry2.p_key()) ||
(entry1.p_key() && entry2.p_key() && *entry1.p_key() == *entry2.p_key())) &&
(value_of(entry1) == value_of(entry2));
});
}
std::string value_of(const tetengo::lattice::node& node_, const bool first)
{
if (node_.value().has_value())
{
// The value is stored in the std::any object.
return std::any_cast<std::string>(node_.value());
}
else if (first)
{
return "BOS";
}
else
{
return "EOS";
}
}
std::string to_string(const tetengo::lattice::path& path_)
{
// Each path object holds the nodes that make up itself, and the whole cost.
std::string result{};
for (const auto& node: path_.nodes())
{
if (!std::empty(result))
{
result += "-";
}
result += "[" + value_of(node, std::empty(result)) + "]";
}
result += " (" + std::to_string(path_.cost()) + ")";
return result;
}
}
An entry view.
Definition entry.hpp:110
constexpr const std::any * value() const
Returns the value.
Definition entry.hpp:162
constexpr const input * p_key() const
Returns the pointer to the key.
Definition entry.hpp:152
static const entry & bos_eos()
Returns the BOS/EOS (Beginning/End of Sequence) entry.
std::size_t hash_value() const
Returns the hash value.
A lattice.
Definition lattice.hpp:29
void push_back(std::unique_ptr< input > &&p_input)
Pushes back an input.
An N-best lattice path iterator.
Definition n_best_iterator.hpp:99
A node.
Definition node.hpp:27
constexpr const std::any & value() const
Returns the value.
Definition node.hpp:153
A path.
Definition path.hpp:21
int cost() const
Returns the cost.
A constraint.
An entry.
An input.
A lattice.
An N-best lattice path iterator.
A node.
A path.
A string input.
An unordered map vocabulary.
A vocabulary.

C

#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
tetengo_lattice_vocabulary_t* build_vocabulary();
const char* to_string(const tetengo_lattice_path_t* p_path);
void usage_tetengo_lattice_viterbi()
{
/*
Makes the following lattice and searches it.
/-----[ab:AwaBizan]-----\
/ (7) (9) (1) \
/ \
/ (2) (4) (7) \
[BOS]-----[a:Alpha]---[b:Bravo]-----[EOS]
\ (3) \ /(1) (2) /
\‍(1) X /(6)
\ / \‍(5) /
`-[a:Alice]---[b:Bob]---'
(1) (9) (8)
Path Cost
[BOS]-[Alice]-[Bravo]-[EOS] 1+1+1+7+2=12
[BOS]---[AwaBizan]----[EOS] 7+9+1 =17
[BOS]-[Alpha]-[Bravo]-[EOS] 3+2+4+7+2=18
[BOS]-[Alpha]-[Bob]---[EOS] 3+2+5+8+6=24
[BOS]-[Alice]-[Bob]---[EOS] 1+1+9+8+6=25
*/
// Builds a vocabulary.
const tetengo_lattice_vocabulary_t* const p_vocabulary = build_vocabulary();
// Creates an object for a lattice.
tetengo_lattice_lattice_t* const p_lattice = tetengo_lattice_lattice_create(p_vocabulary);
// Enters key characters to construct the lattice.
tetengo_lattice_input_t* const p_input_a = tetengo_lattice_input_createStringInput("a");
tetengo_lattice_input_t* const p_input_b = tetengo_lattice_input_createStringInput("b");
tetengo_lattice_lattice_pushBack(p_lattice, p_input_a);
tetengo_lattice_lattice_pushBack(p_lattice, p_input_b);
// Finishes the lattice construction.
const size_t eos_preceding_count = tetengo_lattice_lattice_settle(p_lattice, NULL, NULL);
if (eos_preceding_count == 0)
{
return;
}
int* const p_eos_preceding_costs = (int*)malloc(eos_preceding_count * sizeof(int));
tetengo_lattice_lattice_settle(p_lattice, &eos_node, p_eos_preceding_costs);
// Creates an iterator to enumerate the paths in the lattice.
tetengo_lattice_nBestIterator_t* const p_iterator =
// Enumerates the paths.
char paths[256] = { 0 };
{
// Obtains the current path.
tetengo_lattice_path_t* const p_path = tetengo_lattice_nBestIterator_createPath(p_iterator);
strcat(paths, to_string(p_path));
// Moves to the next path.
}
// clang-format off
static const char* const expected =
"[BOS]-[Alice]-[Bravo]-[EOS] (12)\n"
"[BOS]-[AwaBizan]-[EOS] (17)\n"
"[BOS]-[Alpha]-[Bravo]-[EOS] (18)\n"
"[BOS]-[Alpha]-[Bob]-[EOS] (24)\n"
"[BOS]-[Alice]-[Bob]-[EOS] (25)\n";
// clang-format on
assert(strcmp(paths, expected) == 0);
free((void*)p_eos_preceding_costs);
}
static size_t hash(const char* const string, const size_t length)
{
size_t value = 0;
for (size_t i = 0; i < length; ++i)
{
value ^= string[i];
}
return value;
}
static size_t entry_hash(const tetengo_lattice_entryView_t* const p_entry)
{
const tetengo_lattice_input_t* const p_entry_key = tetengo_lattice_entryView_createKeyOf(p_entry->key_handle);
const size_t key_hash = p_entry_key ? tetengo_lattice_input_hashValue(p_entry_key) : 0;
const char* const value = (const char*)tetengo_lattice_entryView_valueOf(p_entry->value_handle);
const size_t value_hash = value ? hash(value, strlen(value)) : 0;
return key_hash ^ value_hash;
}
static int
entry_equal_to(const tetengo_lattice_entryView_t* const p_entry1, const tetengo_lattice_entryView_t* const p_entry2)
{
const tetengo_lattice_input_t* const p_entry_key1 = tetengo_lattice_entryView_createKeyOf(p_entry1->key_handle);
const tetengo_lattice_input_t* const p_entry_key2 = tetengo_lattice_entryView_createKeyOf(p_entry2->key_handle);
const char* const value1 = (const char*)tetengo_lattice_entryView_valueOf(p_entry1->value_handle);
const char* const value2 = (const char*)tetengo_lattice_entryView_valueOf(p_entry2->value_handle);
int equality = 1;
equality = equality && ((!p_entry_key1 && !p_entry_key2) ||
(p_entry_key1 && p_entry_key2 && tetengo_lattice_input_equal(p_entry_key1, p_entry_key2)));
equality = equality && ((!value1 && !value2) || (value1 && value2 && strcmp(value1, value2) == 0));
return equality;
}
tetengo_lattice_vocabulary_t* build_vocabulary()
{
// The contents of the vocabulary.
const tetengo_lattice_input_t* const p_entry_key_a = tetengo_lattice_input_createStringInput("a");
const tetengo_lattice_input_t* const p_entry_key_b = tetengo_lattice_input_createStringInput("b");
const tetengo_lattice_input_t* const p_entry_key_ab = tetengo_lattice_input_createStringInput("ab");
const tetengo_lattice_entry_keyHandle_t entry_key_handle_a = tetengo_lattice_entry_toKeyHandle(p_entry_key_a);
const tetengo_lattice_entry_keyHandle_t entry_key_handle_b = tetengo_lattice_entry_toKeyHandle(p_entry_key_b);
const tetengo_lattice_entry_keyHandle_t entry_key_handle_ab = tetengo_lattice_entry_toKeyHandle(p_entry_key_ab);
tetengo_lattice_entry_t entries[5] = { { 0, 0, 0 } };
const tetengo_lattice_input_t* const p_bos_eos_key =
entries[0].key_handle = entry_key_handle_a;
entries[0].p_value = "Alpha";
entries[0].cost = 2;
entries[1].key_handle = entry_key_handle_a;
entries[1].p_value = "Alice";
entries[1].cost = 1;
entries[2].key_handle = entry_key_handle_b;
entries[2].p_value = "Bravo";
entries[2].cost = 7;
entries[3].key_handle = entry_key_handle_b;
entries[3].p_value = "Bob";
entries[3].cost = 8;
entries[4].key_handle = entry_key_handle_ab;
entries[4].p_value = "AwaBizan";
entries[4].cost = 9;
entry_mappings[0].key.p_head = "a";
entry_mappings[0].key.length = 1;
entry_mappings[0].p_entries = &entries[0];
entry_mappings[0].entry_count = 2;
entry_mappings[1].key.p_head = "b";
entry_mappings[1].key.length = 1;
entry_mappings[1].p_entries = &entries[2];
entry_mappings[1].entry_count = 2;
entry_mappings[2].key.p_head = "ab";
entry_mappings[2].key.length = 2;
entry_mappings[2].p_entries = &entries[4];
entry_mappings[2].entry_count = 1;
connections[0].p_from = &bos_eos;
connections[0].p_to = &entries[0];
connections[0].cost = 3;
connections[1].p_from = &bos_eos;
connections[1].p_to = &entries[1];
connections[1].cost = 1;
connections[2].p_from = &entries[0];
connections[2].p_to = &entries[2];
connections[2].cost = 4;
connections[3].p_from = &entries[1];
connections[3].p_to = &entries[2];
connections[3].cost = 1;
connections[4].p_from = &entries[0];
connections[4].p_to = &entries[3];
connections[4].cost = 5;
connections[5].p_from = &entries[1];
connections[5].p_to = &entries[3];
connections[5].cost = 9;
connections[6].p_from = &entries[2];
connections[6].p_to = &bos_eos;
connections[6].cost = 2;
connections[7].p_from = &entries[3];
connections[7].p_to = &bos_eos;
connections[7].cost = 6;
connections[8].p_from = &bos_eos;
connections[8].p_to = &entries[4];
connections[8].cost = 7;
connections[9].p_from = &entries[4];
connections[9].p_to = &bos_eos;
connections[9].cost = 1;
tetengo_lattice_vocabulary_t* const p_vocabulary = tetengo_lattice_vocabulary_createUnorderedMapVocabulary(
entry_mappings,
sizeof(entry_mappings) / sizeof(tetengo_lattice_keyEntriesPair_t),
connections,
sizeof(connections) / sizeof(tetengo_lattice_entriesConnectionCostPair_t),
entry_hash,
entry_equal_to);
// Returns a vocabulary implemented with hash tables.
return p_vocabulary;
}
static const char* value_of(const tetengo_lattice_node_t* const p_node, const bool first)
{
// The value is accessible by the handle.
const char* const value = (const char*)tetengo_lattice_entryView_valueOf(p_node->value_handle);
if (value)
{
return value;
}
else if (first)
{
return "BOS";
}
else
{
return "EOS";
}
}
const char* to_string(const tetengo_lattice_path_t* const p_path)
{
static char result[48] = { 0 };
result[0] = '\0';
{
// Each path object holds the nodes that make up itself, and the whole cost.
const size_t node_count = tetengo_lattice_path_pNodes(p_path, NULL);
tetengo_lattice_node_t* const p_nodes =
(tetengo_lattice_node_t*)malloc(node_count * sizeof(tetengo_lattice_node_t));
if (!p_nodes)
{
return result;
}
tetengo_lattice_path_pNodes(p_path, p_nodes);
for (size_t i = 0; i < node_count; ++i)
{
if (i > 0)
{
strncat(result, "-", sizeof(result) - strlen(result) - 1);
}
strncat(result, "[", sizeof(result) - strlen(result) - 1);
strncat(result, value_of(&p_nodes[i], i == 0), sizeof(result) - strlen(result) - 1);
strncat(result, "]", sizeof(result) - strlen(result) - 1);
}
strncat(result, " (", sizeof(result) - strlen(result) - 1);
snprintf(result + strlen(result), sizeof(result) - strlen(result), "%d", tetengo_lattice_path_cost(p_path));
strncat(result, ")", sizeof(result) - strlen(result) - 1);
free(p_nodes);
}
strncat(result, "\n", sizeof(result) - strlen(result) - 1);
return result;
}
A connection.
A constraint.
tetengo_lattice_constraint_t * tetengo_lattice_constraint_createEmpty()
Creates an empty constraint.
An entry.
const tetengo_lattice_entryView_t * tetengo_lattice_entryView_bosEos()
Returns the pointer to the BOS/EOS (Beginning/End of Sequence) entry.
tetengo_lattice_entry_keyHandle_t tetengo_lattice_entry_toKeyHandle(const tetengo_lattice_input_t *p_content)
Returns the entry key handle.
const void * tetengo_lattice_entryView_valueOf(tetengo_lattice_entryView_valueHandle_t handle)
Returns the entry view value by a handle.
const struct tetengo_lattice_entry_keyHandle_tag * tetengo_lattice_entry_keyHandle_t
An entry key handle.
Definition entry.h:23
const tetengo_lattice_input_t * tetengo_lattice_entryView_createKeyOf(tetengo_lattice_entryView_keyHandle_t handle)
Creates an entry view key by a handle.
An input.
size_t tetengo_lattice_input_hashValue(const tetengo_lattice_input_t *p_input)
Returns the hash value.
void tetengo_lattice_input_destroy(const tetengo_lattice_input_t *p_input)
Destroys an input.
tetengo_lattice_input_t * tetengo_lattice_input_createStringInput(const char *value)
Creates a string input.
bool tetengo_lattice_input_equal(const tetengo_lattice_input_t *p_one, const tetengo_lattice_input_t *p_another)
Returns true if one input is equal to another.
A lattice.
bool tetengo_lattice_lattice_pushBack(tetengo_lattice_lattice_t *p_lattice, tetengo_lattice_input_t *p_input)
Pushes back an input.
size_t tetengo_lattice_lattice_settle(tetengo_lattice_lattice_t *p_lattice, tetengo_lattice_node_t *p_eos_node, int *p_preceding_edge_costs)
Settles this lattice.
tetengo_lattice_lattice_t * tetengo_lattice_lattice_create(const tetengo_lattice_vocabulary_t *p_vocabulary)
Creates a lattice.
void tetengo_lattice_lattice_destroy(const tetengo_lattice_lattice_t *p_lattice)
Destroys a lattice.
An N-best lattice path iterator.
tetengo_lattice_nBestIterator_t * tetengo_lattice_nBestIterator_create(const tetengo_lattice_lattice_t *p_lattice, const tetengo_lattice_node_t *p_eos_node, tetengo_lattice_constraint_t *p_constraint)
Creates an iterator.
void tetengo_lattice_nBestIterator_next(tetengo_lattice_nBestIterator_t *p_iterator)
Increments the iterator.
bool tetengo_lattice_nBestIterator_hasNext(const tetengo_lattice_nBestIterator_t *p_iterator)
Returns true when the iterator will return more elements.
void tetengo_lattice_nBestIterator_destroy(const tetengo_lattice_nBestIterator_t *p_iterator)
Destroys an iterator.
tetengo_lattice_path_t * tetengo_lattice_nBestIterator_createPath(const tetengo_lattice_nBestIterator_t *p_iterator)
Dereferences the iterator.
A node.
A path.
int tetengo_lattice_path_cost(const tetengo_lattice_path_t *p_path)
Returns the cost.
size_t tetengo_lattice_path_pNodes(const tetengo_lattice_path_t *p_path, tetengo_lattice_node_t *p_nodes)
Returns the nodes.
void tetengo_lattice_path_destroy(const tetengo_lattice_path_t *p_path)
Destroys a path.
A string view.
A pair of entries and a connection cost.
Definition connection.h:33
int cost
Definition connection.h:41
const tetengo_lattice_entry_t * p_to
Definition connection.h:38
const tetengo_lattice_entry_t * p_from
Definition connection.h:35
An entry view.
Definition entry.h:57
int cost
Definition entry.h:65
tetengo_lattice_entryView_keyHandle_t key_handle
Definition entry.h:59
tetengo_lattice_entryView_valueHandle_t value_handle
Definition entry.h:62
An entry.
Definition entry.h:40
int cost
Definition entry.h:48
const void * p_value
Definition entry.h:45
tetengo_lattice_entry_keyHandle_t key_handle
Definition entry.h:42
A pair of a key and entries.
Definition entry.h:74
tetengo_lattice_stringView_t key
Definition entry.h:76
const tetengo_lattice_entry_t * p_entries
Definition entry.h:79
size_t entry_count
Definition entry.h:82
A node.
Definition node.h:25
tetengo_lattice_entryView_valueHandle_t value_handle
Definition node.h:30
size_t length
Definition stringView.h:26
const char * p_head
Definition stringView.h:23
A vocabulary.
tetengo_lattice_vocabulary_t * tetengo_lattice_vocabulary_createUnorderedMapVocabulary(const tetengo_lattice_keyEntriesPair_t *p_entries, size_t entry_count, const tetengo_lattice_entriesConnectionCostPair_t *p_connections, size_t connection_count, size_t(*p_entry_hash)(const tetengo_lattice_entryView_t *), int(*p_entry_equal_to)(const tetengo_lattice_entryView_t *, const tetengo_lattice_entryView_t *))
Creates an unordered map vocabulary.
void tetengo_lattice_vocabulary_destroy(const tetengo_lattice_vocabulary_t *p_vocabulary)
Destroys a vocabulary.