kgrams  0.1.0
Smoothers.h
1 
5 #ifndef SMOOTHERS_H
6 #define SMOOTHERS_H
7 
8 #include "kgramFreqs.h"
9 #include "Sampler.h"
10 #include <Rmath.h>
11 #include <cmath>
12 #include <limits>
13 
16 class SBOSmoother {
17  //--------Private variables--------//
18 
19  kgramFreqs & f_;
20  size_t V_;
21  const double & lambda_;
22 
23  //--------Private methods--------//
24 
26  void backoff (std::string & context) {
27  size_t pos = context.find_first_not_of(" ");
28  pos = context.find_first_of(" ", pos);
29  if (pos == std::string::npos)
30  context.erase();
31  else
32  context = context.substr(pos);
33  }
34 public:
35  //--------Constructor--------//
36 
43  SBOSmoother (kgramFreqs & f, const double & lambda)
44  : f_(f), V_(f.V()), lambda_(lambda) {}
45 
46  //--------Probabilities--------//
47 
56  double operator() (const std::string & word, std::string context)
57  {
58  double kgram_count, penalization = 1.;
59  size_t n_backoffs = 0;
60  while ((kgram_count = f_.query(context + " " + word)) == 0) {
61  backoff(context);
62  penalization *= lambda_;
63  n_backoffs++;
64  if (n_backoffs > f_.N() - 1)
65  return 0;
66  }
67  return penalization * kgram_count / f_.query(context);
68  }
69 
70  friend class Sampler<SBOSmoother>;
71 }; // class SBOSmoother
72 
75 class AddkSmoother {
76  //--------Private variables--------//
77 
78  kgramFreqs & f_;
79  size_t V_;
80  const double & k_;
81 public:
82  //--------Constructor--------//
83 
89  AddkSmoother (kgramFreqs & f, const double & k)
90  : f_(f), V_(f.V()), k_(k) {}
91 
92  //--------Probabilities--------//
93 
102  double operator() (const std::string & word, std::string context)
103  {
104  double num = f_.query(context + " " + word) + k_;
105  double den = f_.query(context) + k_ * (V_ + 2);
106  return num / den;
107  }
108 
109  friend class Sampler<AddkSmoother>;
110 }; // class AddkSmoother
111 
114 class MLSmoother {
115  //--------Private variables--------//
116  kgramFreqs & f_;
117  size_t V_;
118 public:
119  //--------Constructors--------//
120 
125  MLSmoother (kgramFreqs & f) : f_(f), V_(f.V()) {}
126 
127 
128  //--------Probabilities--------//
129 
138  double operator() (const std::string & word, std::string context)
139  {
140  double den = f_.query(context);
141  if (den == 0)
142  return -1;
143  else
144  return f_.query(context + " " + word) / den;
145  }
146 
147  friend class Sampler<MLSmoother>;
148 }; // class MLSmoother
149 
150 #endif //SMOOTHERS_H
kgramFreqs::N
size_t N() const
Maximum order of k-grams.
Definition: kgramFreqs.h:99
MLSmoother
Maximum-Likelihood continuation probability smoother.
Definition: Smoothers.h:114
MLSmoother::MLSmoother
MLSmoother(kgramFreqs &f)
Initialize an AddkSmoother from a kgramFreqs object with a fixed constant 'k'.
Definition: Smoothers.h:125
AddkSmoother::operator()
double operator()(const std::string &word, std::string context)
Return Add-k continuation probability of a word given a context.
Definition: Smoothers.h:102
kgramFreqs.h
Definition of kgramFreqs class.
SBOSmoother::SBOSmoother
SBOSmoother(kgramFreqs &f, const double &lambda)
Initialize a SBOSmoother from a kgramFreqs object with a fixed backoff penalization.
Definition: Smoothers.h:43
AddkSmoother::AddkSmoother
AddkSmoother(kgramFreqs &f, const double &k)
Initialize an AddkSmoother from a kgramFreqs object with a fixed constant 'k'.
Definition: Smoothers.h:89
SBOSmoother::operator()
double operator()(const std::string &word, std::string context)
Return Stupid Backoff continuation score of a word given a context.
Definition: Smoothers.h:56
SBOSmoother
Stupid Backoff continuation probability smoother.
Definition: Smoothers.h:16
MLSmoother::operator()
double operator()(const std::string &word, std::string context)
Return Maximum-Likelihood continuation probability of a word given a context.
Definition: Smoothers.h:138
Sampler
Sample sequences from a k-gram language model.
Definition: Sampler.h:15
kgramFreqs::query
double query(std::string) const
Retrieve counts for a given k-gram.
Definition: kgramFreqs.cpp:82
AddkSmoother
Add-k continuation probability smoother.
Definition: Smoothers.h:75
kgramFreqs
Store k-gram frequency counts in hash tables
Definition: kgramFreqs.h:20