Go to the documentation of this file.
8 #include "special_tokens.h"
9 #include "WordStream.h"
12 #include <unordered_map>
25 std::unordered_map<std::string, std::string> word_to_ind_;
27 std::unordered_map<std::string, std::string> ind_to_word_;
32 void insert_special_tokens() {
33 word_to_ind_[BOS_TOK] = BOS_IND;
34 ind_to_word_[BOS_IND] = BOS_TOK;
35 word_to_ind_[EOS_TOK] = EOS_IND;
36 ind_to_word_[EOS_IND] = EOS_TOK;
39 ind_to_word_[UNK_IND] = UNK_TOK;
64 return word_to_ind_.find(
word) != word_to_ind_.end();
71 std::string
index = std::to_string(++V_);
80 auto it = ind_to_word_.find(
index);
81 if (it != ind_to_word_.end())
return it->second;
89 auto it = word_to_ind_.find(
word);
90 if (it != word_to_ind_.end())
return it->second;
97 size_t length ()
const {
return ind_to_word_.size() - 3; }
112 std::pair<size_t, std::string>
kgram_code (std::string kgram)
const
114 std::pair<size_t, std::string> res{0,
""};
116 std::string
word, ind;
117 for (; ; res.first++) {
118 word = stream.pop_word();
122 res.second += ind +
" ";
125 res.second.pop_back();
130 #endif // DICTIONARY_H
size_t length() const
Return size of the dictionary, excluding the special tokens (BOS, EOS, UNK).
Definition: Dictionary.h:97
std::string index(std::string word) const
Return the index corresponding to a given word.
Definition: Dictionary.h:88
void insert(std::string word)
Insert a word in the Dictionary.
Definition: Dictionary.h:69
Dictionary(const std::vector< std::string > &dict)
Initialize Dictionary from list of words.
Definition: Dictionary.h:56
std::pair< size_t, std::string > kgram_code(std::string kgram) const
Extract k-gram code from a string.
Definition: Dictionary.h:112
bool contains(std::string word) const
Check if a word is contained in the Dictionary.
Definition: Dictionary.h:63
std::string word(std::string index) const
Return the word corresponding to a given word index.
Definition: Dictionary.h:79
Word dictionary for language models.
Definition: Dictionary.h:22
size_t size() const
Return size of the dictionary, excluding the special tokens (BOS, EOS, UNK).
Definition: Dictionary.h:102
Definition: WordStream.h:6
Dictionary()
Default constructor.
Definition: Dictionary.h:48