kgrams  0.1.0
kgramFreqs.h
Go to the documentation of this file.
1 
5 #ifndef KGRAM_FREQS_H
6 #define KGRAM_FREQS_H
7 
8 #include <string>
9 #include <vector>
10 #include <unordered_map>
11 #include <utility>
12 #include "Dictionary.h"
13 #include "WordStream.h"
14 #include "CircularBuffer.h"
15 #include "special_tokens.h"
16 
19 
20 class kgramFreqs {
21  //--------Aliases--------//
23  using FrequencyTable = std::unordered_map<std::string, size_t>;
24 
25  //--------Private variables--------//
26  size_t N_;
27 
36  std::vector<FrequencyTable> freqs_;
37 
42 
43  Dictionary dict_;
44 
45  //--------Private methods--------//
46 
50  // <BOS> <BOS> ... <BOS> at the start of each iteration (sentence)
51  void process_sentence (const std::string &,
53  bool fixed_dictionary = false
54  ); // kgramFreqs.cpp
55 public:
56  //--------Constructors--------//
57 
62  kgramFreqs(size_t N)
63  : N_(N), freqs_(N + 1) {}
64 
70  kgramFreqs(size_t N, const std::vector<std::string> & dict)
71  : N_(N), freqs_(N + 1), dict_(dict) {}
72 
77  kgramFreqs(size_t N, const Dictionary & dict)
78  : N_(N), freqs_(N + 1), dict_(dict) {}
79 
80  //--------Process k-gram counts--------//
81  void process_sentences (const std::vector<std::string> &,
82  bool fixed_dictionary = false
83  ); // kgramFreqs.cpp
84 
85  //--------Query k-grams and words--------//
86  // Get k-gram counts
87  double query (std::string) const; // kgramFreqs.cpp
88 
89 
93  bool dict_contains (std::string word) const
94  { return dict_.contains(word); }
95 
99  size_t N() const { return N_; }
100 
105  size_t V() const { return dict_.length(); }
106 
107 
109  const Dictionary & dictionary() const { return dict_; };
110 }; // kgramFreqs
111 
112 #endif // KGRAM_FREQS_H
kgramFreqs::N
size_t N() const
Maximum order of k-grams.
Definition: kgramFreqs.h:99
Dictionary::length
size_t length() const
Return size of the dictionary, excluding the special tokens (BOS, EOS, UNK).
Definition: Dictionary.h:97
kgramFreqs::dictionary
const Dictionary & dictionary() const
Return constant reference to Dictionary.
Definition: kgramFreqs.h:109
kgramFreqs::kgramFreqs
kgramFreqs(size_t N, const Dictionary &dict)
Constructor with predefined dictionary.
Definition: kgramFreqs.h:77
CircularBuffer
Definition: CircularBuffer.h:5
kgramFreqs::kgramFreqs
kgramFreqs(size_t N)
Constructor with empty dictionary.
Definition: kgramFreqs.h:62
kgramFreqs::V
size_t V() const
Dictionary size.
Definition: kgramFreqs.h:105
Dictionary::contains
bool contains(std::string word) const
Check if a word is contained in the Dictionary.
Definition: Dictionary.h:63
Dictionary
Word dictionary for language models.
Definition: Dictionary.h:22
kgramFreqs::kgramFreqs
kgramFreqs(size_t N, const std::vector< std::string > &dict)
Constructor with predefined dictionary.
Definition: kgramFreqs.h:70
kgramFreqs::dict_contains
bool dict_contains(std::string word) const
Check if a word is found in the dictionary.
Definition: kgramFreqs.h:93
kgramFreqs::process_sentences
void process_sentences(const std::vector< std::string > &, bool fixed_dictionary=false)
store k-gram counts from a list of sentences.
Definition: kgramFreqs.cpp:13
Dictionary.h
Definition of Dictionary class.
kgramFreqs::query
double query(std::string) const
Retrieve counts for a given k-gram.
Definition: kgramFreqs.cpp:82
kgramFreqs
Store k-gram frequency counts in hash tables
Definition: kgramFreqs.h:20