Electroneum
language_base.h
Go to the documentation of this file.
1 // Copyrights(c) 2017-2019, The Electroneum Project
2 // Copyrights(c) 2014-2017, The Monero Project
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without modification, are
7 // permitted provided that the following conditions are met:
8 //
9 // 1. Redistributions of source code must retain the above copyright notice, this list of
10 // conditions and the following disclaimer.
11 //
12 // 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 // of conditions and the following disclaimer in the documentation and/or other
14 // materials provided with the distribution.
15 //
16 // 3. Neither the name of the copyright holder nor the names of its contributors may be
17 // used to endorse or promote products derived from this software without specific
18 // prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
21 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 // THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
28 // THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
36 #ifndef LANGUAGE_BASE_H
37 #define LANGUAGE_BASE_H
38 
39 #include <vector>
40 #include <unordered_map>
41 #include <string>
42 #include "misc_log_ex.h"
43 
48 namespace Language
49 {
57  inline std::string utf8prefix(const std::string &s, size_t count)
58  {
59  std::string prefix = "";
60  const char *ptr = s.c_str();
61  while (count-- && *ptr)
62  {
63  prefix += *ptr++;
64  while (((*ptr) & 0xc0) == 0x80)
65  prefix += *ptr++;
66  }
67  return prefix;
68  }
69 
75  class Base
76  {
77  protected:
78  enum {
81  };
82  const std::vector<std::string> word_list;
83  std::unordered_map<std::string, uint32_t> word_map;
84  std::unordered_map<std::string, uint32_t> trimmed_word_map;
85  std::string language_name;
90  void populate_maps(uint32_t flags = 0)
91  {
92  int ii;
93  std::vector<std::string>::const_iterator it;
94  if (word_list.size () != 1626)
95  throw std::runtime_error("Wrong word list length for " + language_name);
96  for (it = word_list.begin(), ii = 0; it != word_list.end(); it++, ii++)
97  {
98  word_map[*it] = ii;
99  if ((*it).size() < unique_prefix_length)
100  {
101  if (flags & ALLOW_SHORT_WORDS)
102  MWARNING(language_name << " word '" << *it << "' is shorter than its prefix length, " << unique_prefix_length);
103  else
104  throw std::runtime_error("Too short word in " + language_name + " word list: " + *it);
105  }
106  std::string trimmed;
107  if (it->length() > unique_prefix_length)
108  {
109  trimmed = utf8prefix(*it, unique_prefix_length);
110  }
111  else
112  {
113  trimmed = *it;
114  }
115  if (trimmed_word_map.find(trimmed) != trimmed_word_map.end())
116  {
118  MWARNING("Duplicate prefix in " << language_name << " word list: " << trimmed);
119  else
120  throw std::runtime_error("Duplicate prefix in " + language_name + " word list: " + trimmed);
121  }
122  trimmed_word_map[trimmed] = ii;
123  }
124  }
125  public:
126  Base(const char *language_name, const std::vector<std::string> &words, uint32_t prefix_length):
127  word_list(words),
128  unique_prefix_length(prefix_length),
130  {
131  }
132  virtual ~Base()
133  {
134  }
139  const std::vector<std::string>& get_word_list() const
140  {
141  return word_list;
142  }
147  const std::unordered_map<std::string, uint32_t>& get_word_map() const
148  {
149  return word_map;
150  }
155  const std::unordered_map<std::string, uint32_t>& get_trimmed_word_map() const
156  {
157  return trimmed_word_map;
158  }
163  const std::string &get_language_name() const
164  {
165  return language_name;
166  }
171  uint32_t get_unique_prefix_length() const
172  {
173  return unique_prefix_length;
174  }
175  };
176 }
177 
178 #endif
std::string language_name
Definition: language_base.h:85
std::string utf8prefix(const std::string &s, size_t count)
Returns a string made of (at most) the first count characters in s. Assumes well formedness. No check is made for this.
Definition: language_base.h:57
A base language class which all languages have to inherit from for Polymorphism.
Definition: language_base.h:75
Definition: language_base.h:79
flags
Definition: http_parser_merged.h:136
Definition: language_base.h:80
uint32_t get_unique_prefix_length() const
Returns the number of unique starting characters to be used for matching.
Definition: language_base.h:171
Mnemonic language related namespace.
const std::vector< std::string > & get_word_list() const
Returns a pointer to the word list.
Definition: language_base.h:139
virtual ~Base()
Definition: language_base.h:132
std::unordered_map< std::string, uint32_t > word_map
Definition: language_base.h:83
const std::vector< std::string > word_list
Definition: language_base.h:82
void populate_maps(uint32_t flags=0)
Populates the word maps after the list is ready.
Definition: language_base.h:90
const std::unordered_map< std::string, uint32_t > & get_word_map() const
Returns a pointer to the word map.
Definition: language_base.h:147
Base(const char *language_name, const std::vector< std::string > &words, uint32_t prefix_length)
Definition: language_base.h:126
const std::string & get_language_name() const
Returns the name of the language.
Definition: language_base.h:163
std::unordered_map< std::string, uint32_t > trimmed_word_map
Definition: language_base.h:84
const std::unordered_map< std::string, uint32_t > & get_trimmed_word_map() const
Returns a pointer to the trimmed word map.
Definition: language_base.h:155
#define s(x, c)
Definition: aesb.c:46
uint32_t unique_prefix_length
Definition: language_base.h:86