Go to the documentation of this file.
10 #include <unordered_map>
13 #include "WordStream.h"
14 #include "CircularBuffer.h"
15 #include "special_tokens.h"
23 using FrequencyTable = std::unordered_map<std::string, size_t>;
36 std::vector<FrequencyTable> freqs_;
51 void process_sentence (
const std::string &,
53 bool fixed_dictionary =
false
63 : N_(
N), freqs_(
N + 1) {}
71 : N_(
N), freqs_(
N + 1), dict_(dict) {}
78 : N_(
N), freqs_(
N + 1), dict_(dict) {}
82 bool fixed_dictionary =
false
87 double query (std::string)
const;
99 size_t N()
const {
return N_; }
112 #endif // KGRAM_FREQS_H
size_t N() const
Maximum order of k-grams.
Definition: kgramFreqs.h:99
size_t length() const
Return size of the dictionary, excluding the special tokens (BOS, EOS, UNK).
Definition: Dictionary.h:97
const Dictionary & dictionary() const
Return constant reference to Dictionary.
Definition: kgramFreqs.h:109
kgramFreqs(size_t N, const Dictionary &dict)
Constructor with predefined dictionary.
Definition: kgramFreqs.h:77
Definition: CircularBuffer.h:5
kgramFreqs(size_t N)
Constructor with empty dictionary.
Definition: kgramFreqs.h:62
size_t V() const
Dictionary size.
Definition: kgramFreqs.h:105
bool contains(std::string word) const
Check if a word is contained in the Dictionary.
Definition: Dictionary.h:63
Word dictionary for language models.
Definition: Dictionary.h:22
kgramFreqs(size_t N, const std::vector< std::string > &dict)
Constructor with predefined dictionary.
Definition: kgramFreqs.h:70
bool dict_contains(std::string word) const
Check if a word is found in the dictionary.
Definition: kgramFreqs.h:93
void process_sentences(const std::vector< std::string > &, bool fixed_dictionary=false)
store k-gram counts from a list of sentences.
Definition: kgramFreqs.cpp:13
Definition of Dictionary class.
double query(std::string) const
Retrieve counts for a given k-gram.
Definition: kgramFreqs.cpp:82
Store k-gram frequency counts in hash tables
Definition: kgramFreqs.h:20